Example #1
0
def main():
    args = get_parser().parse_args()

    worker_kwarg_names = ['back_prob', 'localpen', 'minscore', 'trim']

    model = helpers.load_model(args.model)

    fast5_reads = fast5utils.iterate_fast5_reads(
        args.read_dir, limit=args.limit, strand_list=args.input_strand_list,
        recursive=args.recursive)

    with helpers.open_file_or_stdout(args.output) as fh:
        for res in imap_mp(
                squiggle_match.worker, fast5_reads, threads=args.jobs,
                fix_kwargs=helpers.get_kwargs(args, worker_kwarg_names),
                unordered=True, init=squiggle_match.init_worker,
                initargs=[model, args.references]):
            if res is None:
                continue
            read_id, sig, score, path, squiggle, bases = res
            bases = bases.decode('ascii')
            fh.write('#{} {}\n'.format(read_id, score))
            for i, (s, p) in enumerate(zip(sig, path)):
                fh.write('{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format(
                    read_id, i, s, p, bases[p], squiggle[p, 0], squiggle[p, 1],
                    squiggle[p, 2]))
Example #2
0
def main(argv):
    """Main function to process mapping for each read using functions in prepare_mapping_funcs"""
    args = parser.parse_args()
    print("Running prepare_mapping using flip-flop remapping")

    if not args.overwrite:
        if os.path.exists(args.output):
            print("Cowardly refusing to overwrite {}".format(args.output))
            sys.exit(1)

    # Make an iterator that yields all the reads we're interested in.
    fast5_reads = fast5utils.iterate_fast5_reads(
        args.input_folder,
        limit=args.limit,
        strand_list=args.input_strand_list)

    # Set up arguments (kwargs) for the worker function for each read
    kwargs = helpers.get_kwargs(args,
                                ['alphabet', 'collapse_alphabet', 'device'])
    kwargs[
        'per_read_params_dict'] = prepare_mapping_funcs.get_per_read_params_dict_from_tsv(
            args.input_per_read_params)
    kwargs['references'] = helpers.fasta_file_to_dict(args.references)
    kwargs['model'] = helpers.load_model(args.model)
    workerFunction = prepare_mapping_funcs.oneread_remap  # remaps a single read using flip-flip network

    results = imap_mp(workerFunction,
                      fast5_reads,
                      threads=args.jobs,
                      fix_kwargs=kwargs,
                      unordered=True)

    # results is an iterable of dicts
    # each dict is a set of return values from a single read
    prepare_mapping_funcs.generate_output_from_results(results, args)
def main():
    """Main function to process mapping for each read using functions in prepare_mapping_funcs"""
    args = parser.parse_args()
    print("Running prepare_mapping using flip-flop remapping")

    if not args.overwrite:
        if os.path.exists(args.output):
            print("Cowardly refusing to overwrite {}".format(args.output))
            sys.exit(1)

    # Create alphabet and check for consistency
    modified_bases = [elt[0] for elt in args.mod]
    canonical_bases = [elt[1] for elt in args.mod]
    for b in modified_bases:
        assert len(
            b
        ) == 1, "Modified bases must be a single character, got {}".format(b)
        assert b not in args.alphabet, "Modified base must not be a canonical base, got {}".format(
            b)
    for b in canonical_bases:
        assert len(
            b
        ) == 1, "Canonical coding for modified bases must be a single character, got {}".format(
            b)
        assert b in args.alphabet, "Canonical coding for modified base must be a canonical base, got {}".format(
            b)
    full_alphabet = args.alphabet + ''.join(modified_bases)
    flat_alphabet = args.alphabet + ''.join(canonical_bases)
    modification_names = [elt[2] for elt in args.mod]

    alphabet_info = alphabet.AlphabetInfo(full_alphabet,
                                          flat_alphabet,
                                          modification_names,
                                          do_reorder=True)

    print("Converting references to labels using {}".format(
        str(alphabet_info)))

    # Make an iterator that yields all the reads we're interested in.
    fast5_reads = fast5utils.iterate_fast5_reads(
        args.input_folder,
        limit=args.limit,
        strand_list=args.input_strand_list,
        recursive=args.recursive)

    # Set up arguments (kwargs) for the worker function for each read
    kwargs = helpers.get_kwargs(args, ['device'])
    kwargs[
        'per_read_params_dict'] = prepare_mapping_funcs.get_per_read_params_dict_from_tsv(
            args.input_per_read_params)
    kwargs['references'] = helpers.fasta_file_to_dict(args.references,
                                                      alphabet=full_alphabet)
    kwargs['model'] = helpers.load_model(args.model)
    kwargs['alphabet_info'] = alphabet_info

    # remaps a single read using flip-flip network
    workerFunction = prepare_mapping_funcs.oneread_remap

    results = imap_mp(workerFunction,
                      fast5_reads,
                      threads=args.jobs,
                      fix_kwargs=kwargs,
                      unordered=True,
                      chunksize=50)

    # results is an iterable of dicts
    # each dict is a set of return values from a single read
    prepare_mapping_funcs.generate_output_from_results(results, args.output,
                                                       alphabet_info)
Example #4
0
                    action=FileExists,
                    help='Directory for fast5 reads')

if __name__ == '__main__':
    args = parser.parse_args()

    worker_kwarg_names = ['back_prob', 'localpen', 'minscore', 'trim']

    model = helpers.load_model(args.model)

    fast5_reads = fast5utils.iterate_fast5_reads(
        args.read_dir, limit=args.limit, strand_list=args.input_strand_list)

    for res in imap_mp(squiggle_match.worker,
                       fast5_reads,
                       threads=args.jobs,
                       fix_kwargs=helpers.get_kwargs(args, worker_kwarg_names),
                       unordered=True,
                       init=squiggle_match.init_worker,
                       initargs=[model, args.references]):
        if res is None:
            continue
        read_id, sig, score, path, squiggle, bases = res
        bases = bases.decode('ascii')
        print('#{} {}'.format(read_id, score))
        for i, (s, p) in enumerate(zip(sig, path)):
            print('{}\t{}\t{}\t{}\t{}\t{}\t{}'.format(read_id, i, s, p,
                                                      bases[p], squiggle[p, 0],
                                                      squiggle[p, 1],
                                                      squiggle[p, 2]))