Esempio n. 1
0
    def _create_mapping_stats_gmap_report(self):
        """
        Creates and returns a Mapping Stats gmap report
        """
        from pbreports.report.mapping_stats import to_report
        from test_pbreports_report_mapping_stats import _GMAP_DATA_DIR as datadir

        log.info('Creating Mapping Stats gmap report using datadir {d}'.format(
            d=datadir))

        _to_p = lambda x: os.path.join(datadir, x)

        filtered_regions_fofn = _to_p('filtered_regions.fofn')
        filtered_summary = _to_p('filtered_summary.csv')
        input_fofn = _to_p('input.fofn')
        aligned_reads_cmph5 = _to_p('aligned_reads.cmp.h5')

        mode = "external"
        report_json = 'mapping_report.json'
        output_json = os.path.join(self._output_dir, 'mapping_report.json')
        bas_files = fofn_to_files(input_fofn)
        region_files = fofn_to_files(filtered_regions_fofn)

        report = to_report(bas_files,
                           region_files,
                           aligned_reads_cmph5,
                           report_json,
                           self._output_dir,
                           filter_summary_csv=filtered_summary,
                           mode=mode)

        pformat(report.to_dict())
        report.write_json(output_json)

        return self._deserialize_report(output_json)
def main(argv=sys.argv):
    """Main point of entry"""
    p = get_parser()
    # the first arg with be the exe name
    args = p.parse_args(argv[1:])

    fofn = args.fofn
    cmp_h5 = args.cmp_h5
    movie_files = fofn_to_files(fofn)
    output_csv = args.output_csv
    external_mode = args.external

    if args.debug:
        setup_log(log, level=logging.DEBUG)
    else:
        log.addHandler(logging.NullHandler())

    log.debug(args)
    started_at = time.time()
    try:
        rcode = run(cmp_h5, movie_files, output_csv,
                    external_mode=external_mode)
    except Exception as e:
        rcode = -1
        log.error(e, exc_info=True)
        sys.stderr.write(str(e) + "\n")

    run_time = time.time() - started_at

    _d = dict(f=os.path.basename(__file__), x=rcode, s=run_time, v=__version__)
    log.info(
        "completed {f} v{v} with exit code {x} in {s:.2f} sec.".format(**_d))
    return rcode
Esempio n. 3
0
def fofn_to_chunks(fofn):
    files = fofn_to_files(fofn)
    chunks = []
    for i, f in enumerate(files):
        chunk_id = "chunk-{i}".format(i=i)
        _d = {Constants.CHUNK_KEY_FOFN: f}
        p = PipelineChunk(chunk_id, **_d)
        chunks.append(p)
    return chunks
Esempio n. 4
0
def _args_chunk_fofn(args):
    fofn_files = fofn_to_files(args.input_fofn)
    log.info("read in fofn with {n} files.".format(n=len(fofn_files)))
    chunks = CU.write_grouped_fofn_chunks(fofn_files, args.max_total_chunks,
                                          args.output_dir,
                                          args.chunk_report_json)
    log.debug("Converted {x} Fofn into {n} chunks. Write chunks to {f}".format(
        n=len(chunks), f=args.chunk_report_json, x=len(fofn_files)))
    return 0
Esempio n. 5
0
def fofn_to_chunks(fofn):
    files = fofn_to_files(fofn)
    chunks = []
    for i, f in enumerate(files):
        chunk_id = "chunk-{i}".format(i=i)
        _d = {Constants.CHUNK_KEY_FOFN: f}
        p = PipelineChunk(chunk_id, **_d)
        chunks.append(p)
    return chunks
Esempio n. 6
0
    def _create_reads_of_insert_report(self):
        """
        Creates and returns a Reads of Insert report.
        """
        from pbreports.report.reads_of_insert import to_report
        from test_pbreports_report_reads_of_insert import _DATA_DIR as datadir
        log.info('Creating reads_of_insert report using datadir {d}'.format(
            d=datadir))

        bas_files = fofn_to_files(os.path.join(datadir,
                                               'reads_of_insert.fofn'))
        output_json = os.path.join(self._output_dir, 'reads_of_insert.json')
        report = to_report(bas_files, self._output_dir)
        pformat(report.to_dict())
        report.write_json(output_json)

        return self._deserialize_report(output_json)
Esempio n. 7
0
    def _create_overview_report(self):
        """
        Creates and returns an Overview report
        """
        from pbreports.report.overview import run
        from test_pbreports_report_overview import _DATA_DIR as datadir
        log.info(
            'Creating Overview report using datadir {d}'.format(d=datadir))

        input_name = 'input.fofn'
        input_fofn = os.path.join(datadir, input_name)
        bas_files = fofn_to_files(input_fofn)
        report_json = 'overview.json'
        output_json = os.path.join(self._output_dir, report_json)
        report = run(bas_files)
        pformat(report.to_dict())
        report.write_json(output_json)

        return self._deserialize_report(output_json)
Esempio n. 8
0
def gather_fofn(input_files, output_file, skip_empty=True):
    """
    This should be better spec'ed and impose a tighter constraint on the FOFN

    :param input_files: List of file paths
    :param output_file: File Path
    :param skip_empty: Ignore empty files

    :return: Output file

    :rtype: str
    """

    all_files = []
    for input_file in input_files:
        file_names = fofn_to_files(input_file)
        all_files.extend(file_names)

    with open(output_file, 'w') as f:
        f.write("\n".join(all_files))

    return output_file
Esempio n. 9
0
def gather_fofn(input_files, output_file, skip_empty=True):
    """
    This should be better spec'ed and impose a tighter constraint on the FOFN

    :param input_files: List of file paths
    :param output_file: File Path
    :param skip_empty: Ignore empty files

    :return: Output file

    :rtype: str
    """

    all_files = []
    for input_file in input_files:
        file_names = fofn_to_files(input_file)
        all_files.extend(file_names)

    with open(output_file, 'w') as f:
        f.write("\n".join(all_files))

    return output_file
Esempio n. 10
0
def nchunk_fofn(input_file, max_chunks):
    input_files = fofn_to_files(input_file)
    nchunks = min(len(input_files), max_chunks)
    return nchunks
Esempio n. 11
0
def _fofn_to_metadata(path):
    files = fofn_to_files(path)
    return DatasetMetadata(len(files), len(files))
Esempio n. 12
0
def _fofn_to_metadata(path):
    files = fofn_to_files(path)
    return DatasetMetadata(len(files), len(files))
Esempio n. 13
0
def nchunk_fofn(input_file, max_chunks):
    input_files = fofn_to_files(input_file)
    nchunks = min(len(input_files), max_chunks)
    return nchunks
Esempio n. 14
0
def _args_chunk_fofn(args):
    fofn_files = fofn_to_files(args.input_fofn)
    log.info("read in fofn with {n} files.".format(n=len(fofn_files)))
    chunks = CU.write_grouped_fofn_chunks(fofn_files, args.max_total_chunks, args.output_dir, args.chunk_report_json)
    log.debug("Converted {x} Fofn into {n} chunks. Write chunks to {f}".format(n=len(chunks), f=args.chunk_report_json, x=len(fofn_files)))
    return 0