def main(): args = get_parser().parse_args() worker_kwarg_names = ['back_prob', 'localpen', 'minscore', 'trim'] model = helpers.load_model(args.model) fast5_reads = fast5utils.iterate_fast5_reads( args.read_dir, limit=args.limit, strand_list=args.input_strand_list, recursive=args.recursive) with helpers.open_file_or_stdout(args.output) as fh: for res in imap_mp( squiggle_match.worker, fast5_reads, threads=args.jobs, fix_kwargs=helpers.get_kwargs(args, worker_kwarg_names), unordered=True, init=squiggle_match.init_worker, initargs=[model, args.references]): if res is None: continue read_id, sig, score, path, squiggle, bases = res bases = bases.decode('ascii') fh.write('#{} {}\n'.format(read_id, score)) for i, (s, p) in enumerate(zip(sig, path)): fh.write('{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format( read_id, i, s, p, bases[p], squiggle[p, 0], squiggle[p, 1], squiggle[p, 2]))
def main(argv): """Main function to process mapping for each read using functions in prepare_mapping_funcs""" args = parser.parse_args() print("Running prepare_mapping using flip-flop remapping") if not args.overwrite: if os.path.exists(args.output): print("Cowardly refusing to overwrite {}".format(args.output)) sys.exit(1) # Make an iterator that yields all the reads we're interested in. fast5_reads = fast5utils.iterate_fast5_reads( args.input_folder, limit=args.limit, strand_list=args.input_strand_list) # Set up arguments (kwargs) for the worker function for each read kwargs = helpers.get_kwargs(args, ['alphabet', 'collapse_alphabet', 'device']) kwargs[ 'per_read_params_dict'] = prepare_mapping_funcs.get_per_read_params_dict_from_tsv( args.input_per_read_params) kwargs['references'] = helpers.fasta_file_to_dict(args.references) kwargs['model'] = helpers.load_model(args.model) workerFunction = prepare_mapping_funcs.oneread_remap # remaps a single read using flip-flip network results = imap_mp(workerFunction, fast5_reads, threads=args.jobs, fix_kwargs=kwargs, unordered=True) # results is an iterable of dicts # each dict is a set of return values from a single read prepare_mapping_funcs.generate_output_from_results(results, args)
def main(): """Main function to process mapping for each read using functions in prepare_mapping_funcs""" args = parser.parse_args() print("Running prepare_mapping using flip-flop remapping") if not args.overwrite: if os.path.exists(args.output): print("Cowardly refusing to overwrite {}".format(args.output)) sys.exit(1) # Create alphabet and check for consistency modified_bases = [elt[0] for elt in args.mod] canonical_bases = [elt[1] for elt in args.mod] for b in modified_bases: assert len( b ) == 1, "Modified bases must be a single character, got {}".format(b) assert b not in args.alphabet, "Modified base must not be a canonical base, got {}".format( b) for b in canonical_bases: assert len( b ) == 1, "Canonical coding for modified bases must be a single character, got {}".format( b) assert b in args.alphabet, "Canonical coding for modified base must be a canonical base, got {}".format( b) full_alphabet = args.alphabet + ''.join(modified_bases) flat_alphabet = args.alphabet + ''.join(canonical_bases) modification_names = [elt[2] for elt in args.mod] alphabet_info = alphabet.AlphabetInfo(full_alphabet, flat_alphabet, modification_names, do_reorder=True) print("Converting references to labels using {}".format( str(alphabet_info))) # Make an iterator that yields all the reads we're interested in. fast5_reads = fast5utils.iterate_fast5_reads( args.input_folder, limit=args.limit, strand_list=args.input_strand_list, recursive=args.recursive) # Set up arguments (kwargs) for the worker function for each read kwargs = helpers.get_kwargs(args, ['device']) kwargs[ 'per_read_params_dict'] = prepare_mapping_funcs.get_per_read_params_dict_from_tsv( args.input_per_read_params) kwargs['references'] = helpers.fasta_file_to_dict(args.references, alphabet=full_alphabet) kwargs['model'] = helpers.load_model(args.model) kwargs['alphabet_info'] = alphabet_info # remaps a single read using flip-flip network workerFunction = prepare_mapping_funcs.oneread_remap results = imap_mp(workerFunction, fast5_reads, threads=args.jobs, fix_kwargs=kwargs, unordered=True, chunksize=50) # results is an iterable of dicts # each dict is a set of return values from a single read prepare_mapping_funcs.generate_output_from_results(results, args.output, alphabet_info)
action=FileExists, help='Directory for fast5 reads') if __name__ == '__main__': args = parser.parse_args() worker_kwarg_names = ['back_prob', 'localpen', 'minscore', 'trim'] model = helpers.load_model(args.model) fast5_reads = fast5utils.iterate_fast5_reads( args.read_dir, limit=args.limit, strand_list=args.input_strand_list) for res in imap_mp(squiggle_match.worker, fast5_reads, threads=args.jobs, fix_kwargs=helpers.get_kwargs(args, worker_kwarg_names), unordered=True, init=squiggle_match.init_worker, initargs=[model, args.references]): if res is None: continue read_id, sig, score, path, squiggle, bases = res bases = bases.decode('ascii') print('#{} {}'.format(read_id, score)) for i, (s, p) in enumerate(zip(sig, path)): print('{}\t{}\t{}\t{}\t{}\t{}\t{}'.format(read_id, i, s, p, bases[p], squiggle[p, 0], squiggle[p, 1], squiggle[p, 2]))