Ejemplo n.º 1
0
def main(argv):
    """Main function to process mapping for each read using functions in prepare_mapping_funcs"""
    args = parser.parse_args()
    print("Running prepare_mapping using flip-flop remapping")

    if not args.overwrite:
        if os.path.exists(args.output):
            print("Cowardly refusing to overwrite {}".format(args.output))
            sys.exit(1)

    # Make an iterator that yields all the reads we're interested in.
    fast5_reads = fast5utils.iterate_fast5_reads(
        args.input_folder,
        limit=args.limit,
        strand_list=args.input_strand_list)

    # Set up arguments (kwargs) for the worker function for each read
    kwargs = helpers.get_kwargs(args,
                                ['alphabet', 'collapse_alphabet', 'device'])
    kwargs[
        'per_read_params_dict'] = prepare_mapping_funcs.get_per_read_params_dict_from_tsv(
            args.input_per_read_params)
    kwargs['references'] = helpers.fasta_file_to_dict(args.references)
    kwargs['model'] = helpers.load_model(args.model)
    workerFunction = prepare_mapping_funcs.oneread_remap  # remaps a single read using flip-flip network

    results = imap_mp(workerFunction,
                      fast5_reads,
                      threads=args.jobs,
                      fix_kwargs=kwargs,
                      unordered=True)

    # results is an iterable of dicts
    # each dict is a set of return values from a single read
    prepare_mapping_funcs.generate_output_from_results(results, args)
Ejemplo n.º 2
0
def write_signal_mappings(sig_map_q, sig_map_conn, ref_out_info, aux_failed_q):
    def apply_sig_map_offset(read_mapping):
        """Apply signal mapping shift to center coarse mappings to a registered
        signal based mapping.
        """
        if (
            ref_out_info.sig_map_offset is not None
            and ref_out_info.sig_map_offset != 0
        ):
            if ref_out_info.sig_map_offset > 0:
                # clip beginning of signal mapping and end of reference to
                # shift signal assignments to the left
                read_mapping[0]["Ref_to_signal"] = read_mapping[0][
                    "Ref_to_signal"
                ][ref_out_info.sig_map_offset :]
                read_mapping[0]["Reference"] = read_mapping[0]["Reference"][
                    : -ref_out_info.sig_map_offset
                ]
            else:
                # clip end of signal mapping and beginning of reference to
                # shift signal assignments to the right
                read_mapping[0]["Ref_to_signal"] = read_mapping[0][
                    "Ref_to_signal"
                ][: ref_out_info.sig_map_offset]
                read_mapping[0]["Reference"] = read_mapping[0]["Reference"][
                    -ref_out_info.sig_map_offset :
                ]
        return read_mapping

    def iter_mappings():
        workers_active = True
        LOGGER.debug("GetterInitComplete")
        while workers_active or not sig_map_q.empty():
            try:
                read_mapping = sig_map_q.get(timeout=0.1)
                yield apply_sig_map_offset(read_mapping)
            except queue.Empty:
                if sig_map_conn.poll():
                    workers_active = False

    try:
        LOGGER.debug("GetterStarting")
        prepare_mapping_funcs.generate_output_from_results(
            iter_mappings(),
            mh.get_megalodon_fn(ref_out_info.out_dir, mh.SIG_MAP_NAME),
            ref_out_info.alphabet_info,
            verbose=False,
        )
        LOGGER.debug("GetterClosing")
    except Exception as e:
        aux_failed_q.put(
            ("SigMapProcessingError", str(e), traceback.format_exc())
        )
Ejemplo n.º 3
0
def write_signal_mappings(sig_map_q, sig_map_conn, sig_map_fn, alphabet_info):
    def iter_mappings():
        while True:
            try:
                read_mapping = sig_map_q.get(block=True, timeout=0.01)
                yield read_mapping
            except queue.Empty:
                if sig_map_conn.poll():
                    break
                continue

        while not sig_map_q.empty():
            read_mapping = sig_map_q.get(block=False)
            yield read_mapping

    prepare_mapping_funcs.generate_output_from_results(iter_mappings(),
                                                       sig_map_fn,
                                                       alphabet_info,
                                                       verbose=False)

    return
Ejemplo n.º 4
0
def write_signal_mappings(sig_map_q, sig_map_conn, ref_out_info, aux_failed_q):
    def iter_mappings():
        workers_active = True
        LOGGER.debug('GetterInitComplete')
        while workers_active or not sig_map_q.empty():
            try:
                read_mapping = sig_map_q.get(timeout=0.1)
                yield read_mapping
            except queue.Empty:
                if sig_map_conn.poll():
                    workers_active = False

    try:
        LOGGER.debug('GetterStarting')
        prepare_mapping_funcs.generate_output_from_results(
            iter_mappings(),
            mh.get_megalodon_fn(ref_out_info.out_dir, mh.SIG_MAP_NAME),
            ref_out_info.alphabet_info,
            verbose=False)
        LOGGER.debug('GetterClosing')
    except Exception as e:
        aux_failed_q.put(
            ('SigMapProcessingError', str(e), traceback.format_exc()))
Ejemplo n.º 5
0
def main():
    """Main function to process mapping for each read using functions in prepare_mapping_funcs"""
    args = parser.parse_args()
    print("Running prepare_mapping using flip-flop remapping")

    if not args.overwrite:
        if os.path.exists(args.output):
            print("Cowardly refusing to overwrite {}".format(args.output))
            sys.exit(1)

    # Create alphabet and check for consistency
    modified_bases = [elt[0] for elt in args.mod]
    canonical_bases = [elt[1] for elt in args.mod]
    for b in modified_bases:
        assert len(
            b
        ) == 1, "Modified bases must be a single character, got {}".format(b)
        assert b not in args.alphabet, "Modified base must not be a canonical base, got {}".format(
            b)
    for b in canonical_bases:
        assert len(
            b
        ) == 1, "Canonical coding for modified bases must be a single character, got {}".format(
            b)
        assert b in args.alphabet, "Canonical coding for modified base must be a canonical base, got {}".format(
            b)
    full_alphabet = args.alphabet + ''.join(modified_bases)
    flat_alphabet = args.alphabet + ''.join(canonical_bases)
    modification_names = [elt[2] for elt in args.mod]

    alphabet_info = alphabet.AlphabetInfo(full_alphabet,
                                          flat_alphabet,
                                          modification_names,
                                          do_reorder=True)

    print("Converting references to labels using {}".format(
        str(alphabet_info)))

    # Make an iterator that yields all the reads we're interested in.
    fast5_reads = fast5utils.iterate_fast5_reads(
        args.input_folder,
        limit=args.limit,
        strand_list=args.input_strand_list,
        recursive=args.recursive)

    # Set up arguments (kwargs) for the worker function for each read
    kwargs = {}
    kwargs[
        'per_read_params_dict'] = prepare_mapping_funcs.get_per_read_params_dict_from_tsv(
            args.input_per_read_params)
    kwargs['model'] = helpers.load_model(args.model)
    kwargs['alphabet_info'] = alphabet_info
    kwargs['max_read_length'] = args.max_read_length
    kwargs['localpen'] = args.localpen

    # remaps a single read using flip-flip network
    workerFunction = prepare_mapping_funcs.oneread_remap

    def iter_jobs():
        references = bio.fasta_file_to_dict(args.references,
                                            alphabet=full_alphabet)
        for fn, read_id in fast5_reads:
            yield fn, read_id, references.get(read_id, None)

    if args.limit is not None:
        chunksize = args.limit // (2 * args.jobs)
        chunksize = int(np.clip(chunksize, 1, 50))
    else:
        chunksize = 50

    results = imap_mp(workerFunction,
                      iter_jobs(),
                      threads=args.jobs,
                      fix_kwargs=kwargs,
                      unordered=True,
                      chunksize=chunksize)

    # results is an iterable of dicts
    # each dict is a set of return values from a single read
    prepare_mapping_funcs.generate_output_from_results(results, args.output,
                                                       alphabet_info)