def open_pyguppy_backend(args): args.do_not_use_guppy_server = False try: mh.mkdir(args.output_directory, False) except mh.MegaError: LOGGER.warning( "Guppy logs output directory exists. Potentially overwriting guppy " "logs." ) backend_params = backends.parse_backend_params(args) model_info = None try: model_info = backends.ModelInfo(backend_params, args.processes) # if spawning multiple workers run this inside newly spawned processes model_info.prep_model_worker() LOGGER.info(model_info.get_alphabet_str()) LOGGER.info( "Model structure:\n\tStride: {}\n\tState size: {}".format( model_info.stride, model_info.output_size ) ) # use model_info.iter_basecalled_reads to basecall reads and return # relevant signal anchored information. model_info.client.disconnect() finally: # ensure guppy server is closed in finally block if model_info is not None: model_info.close()
def _main(args): try: mh.mkdir(args.guppy_logs_output_directory, False) except mh.MegaError: LOGGER.warning( "Guppy logs output directory exists. Potentially overwriting " + "guppy logs.") logging.init_logger(args.guppy_logs_output_directory) # add required attributes for loading guppy, but not valid options for # this script. args.do_not_use_guppy_server = False args.output_directory = args.guppy_logs_output_directory args.outputs = [mh.PR_VAR_NAME] LOGGER.info("Loading model.") backend_params = backends.parse_backend_params(args) with backends.ModelInfo(backend_params, args.processes) as model_info: LOGGER.info("Loading reference.") aligner = mappy.Aligner(str(args.reference), preset=str("map-ont"), best_n=1) process_all_reads( args.fast5s_dir, not args.not_recursive, args.num_reads, args.read_ids_filename, model_info, aligner, args.processes, args.output, args.suppress_progress, args.compute_false_reference_scores, )
def main(): args = get_parser().parse_args() # add required attributes for loading guppy, but not valid options for # this script. args.do_not_use_guppy_server = False args.output_directory = args.guppy_logs_output_directory try: mh.mkdir(args.output_directory, False) except mh.MegaError: sys.stderr.write( '***** WARNING ***** Guppy logs output directory exists. ' + 'Potentially overwriting guppy logs.\n') sys.stderr.write('Loading model.\n') backend_params = backends.parse_backend_params(args) with backends.ModelInfo(backend_params, args.processes) as model_info: sys.stderr.write('Loading reference.\n') aligner = mapping.alignerPlus(str(args.reference), preset=str('map-ont'), best_n=1) process_all_reads(args.fast5s_dir, args.num_reads, args.read_ids_filename, model_info, aligner, args.processes, args.output, args.suppress_progress, args.compute_false_reference_scores)
def _main(args): logging.init_logger(log_fn=args.log_filename, quiet=args.quiet) # add required attributes for loading guppy, but not valid options for # this script. args.do_not_use_guppy_server = False args.output_directory = args.guppy_logs_output_directory try: mh.mkdir(args.output_directory, False) except mh.MegaError: LOGGER.warning( "Guppy logs output directory exists. Potentially overwriting " + "guppy logs." ) args = add_trim_guppy_none(args) args.outputs = [mh.PR_MOD_NAME] # make edge_buffer >= context_bases to simplify processing if args.edge_buffer < args.mod_context_bases: LOGGER.warning( "[--edge-buffer] less than [--mod-context-bases]. Setting " + "[--edge-buffer] to value from [--mod-context-bases]" ) args.edge_buffer = args.mod_context_bases LOGGER.info("Loading model.") backend_params = backends.parse_backend_params(args) with backends.ModelInfo(backend_params, args.processes) as model_info: check_map_sig_alphabet(model_info, args.mapped_signal_file) motifs = parse_motifs(args.motif, model_info, args.modified_bases_set) can_labs, mod_labs = extract_label_conversions(model_info) can_post_indices = model_info.can_indices.astype(np.uintp) all_mod_llrs, all_can_llrs = compute_diff_scores( args.mapped_signal_file, model_info, args.mod_context_bases, args.edge_buffer, args.num_reads, motifs, can_labs, mod_labs, can_post_indices, ) mod_summary = [ ( mod, len(all_mod_llrs[mod]) if mod in all_mod_llrs else 0, len(all_can_llrs[mod]) if mod in all_can_llrs else 0, ) for mod in set(all_mod_llrs).union(all_can_llrs) ] LOGGER.info( "Data summary:\n\tmod\tmod_N\tcan_N\n" + "\n".join("\t" + "\t".join(map(str, x)) for x in mod_summary) ) output_mods_data(all_mod_llrs, all_can_llrs, args.out_filename)
def main(): args = get_parser().parse_args() log_suffix = ('aggregation' if args.output_suffix is None else 'aggregation.' + args.output_suffix) logging.init_logger(args.output_directory, out_suffix=log_suffix) logger = logging.get_logger() mod_agg_info = mods.AGG_INFO(mods.BIN_THRESH_NAME, args.mod_binary_threshold) mod_names = [] if mh.MOD_NAME in args.outputs: logger.info('Loading model.') mod_names = backends.ModelInfo( mh.get_model_fn(args.taiyaki_model_filename)).mod_long_names if args.reference is not None: logger.info('Loading reference.') aligner = mapping.alignerPlus(str(args.reference), preset=str('map-ont'), best_n=1) if args.reference is not None: aligner.add_ref_lens() valid_read_ids = None if args.read_ids_filename is not None: with open(args.read_ids_filename) as read_ids_fp: valid_read_ids = set(line.strip() for line in read_ids_fp) aggregate.aggregate_stats( args.outputs, args.output_directory, args.processes, args.write_vcf_log_probs, args.heterozygous_factors, snps.HAPLIOD_MODE if args.haploid else snps.DIPLOID_MODE, mod_names, mod_agg_info, args.write_mod_log_probs, args.mod_output_formats, args.suppress_progress, aligner.ref_names_and_lens, valid_read_ids, args.output_suffix) # note reference is required in order to annotate contigs for VCF writing if mh.SNP_NAME in args.outputs and args.reference is not None: logger.info('Sorting output variant file') variant_fn = mh.add_fn_suffix( mh.get_megalodon_fn(args.output_directory, mh.SNP_NAME), args.output_suffix) sort_variant_fn = mh.add_fn_suffix(variant_fn, 'sorted') snps.sort_variants(variant_fn, sort_variant_fn) logger.info('Indexing output variant file') index_var_fn = snps.index_variants(sort_variant_fn) return
def main(): args = get_parser().parse_args() sys.stderr.write('Loading model.\n') model_info = backends.ModelInfo(args.taiyaki_model_filename, args.devices, args.processes, args.chunk_size, args.chunk_overlap, args.max_concurrent_chunks) sys.stderr.write('Loading reference.\n') aligner = mapping.alignerPlus(str(args.reference), preset=str('map-ont'), best_n=1) process_all_reads(args.fast5s_dir, args.num_reads, args.read_ids_filename, model_info, aligner, args.processes, args.output, args.suppress_progress, args.compute_false_reference_scores) return
def _main(args): logging.init_logger() # set args that are not relevant to alphabet args.devices = None # set guppy args args.guppy_server_port = None args.guppy_timeout = mh.DEFAULT_GUPPY_TIMEOUT args.output_directory = args.guppy_logs_output_directory # set taiyaki args args.chunk_size = 1000 args.chunk_overlap = 100 args.max_concurrent_chunks = 200 try: mh.mkdir(args.output_directory, False) except mh.MegaError: LOGGER.warning( 'Guppy logs output directory exists. Potentially overwriting ' + 'guppy logs.') backend_params = backends.parse_backend_params(args) with backends.ModelInfo(backend_params, 1) as model_info: if model_info.is_cat_mod: can_bs = [ can_b for mod_b, _ in model_info.mod_long_names for can_b, can_mod_bs in model_info.can_base_mods.items() if mod_b in can_mod_bs ] LOGGER.info( ('Model contains canonical alphabet {} and modified ' + 'bases {}.').format( model_info.can_alphabet, '; '.join('{}={} (alt to {})'.format(mod_b, mln, can_b) for (mod_b, mln), can_b in zip( model_info.mod_long_names, can_bs)))) else: LOGGER.info('Model contains canonical alphabet {}.'.format( model_info.can_alphabet))
def _main(args): try: mh.mkdir(args.output_directory, False) except mh.MegaError: LOGGER.warning( 'Guppy logs output directory exists. Potentially overwriting ' + 'guppy logs.') logging.init_logger(args.log_directory) # set args that are not relevant to alphabet args.devices = None # set guppy args args.guppy_server_port = None args.guppy_timeout = mh.DEFAULT_GUPPY_TIMEOUT args.output_directory = args.guppy_logs_output_directory # set taiyaki args args.chunk_size = 1000 args.chunk_overlap = 100 args.max_concurrent_chunks = 200 backend_params = backends.parse_backend_params(args) with backends.ModelInfo(backend_params, 1) as model_info: LOGGER.info(model_info.get_alphabet_str())
def _main(): args = get_parser().parse_args() mkdir(args.output_directory, args.overwrite) logging.init_logger(args.output_directory) logger = logging.get_logger() logger.debug('Command: """' + ' '.join(sys.argv) + '"""') if _DO_PROFILE: args = profile_validation(args) args, pr_ref_filts = parse_pr_ref_output(args) tai_model_fn = mh.get_model_fn(args.taiyaki_model_filename) model_info = backends.ModelInfo(tai_model_fn, args.devices, args.processes, args.chunk_size, args.chunk_overlap, args.max_concurrent_chunks) args, mods_info = mods_validation(args, model_info) aligner = aligner_validation(args) args, snps_data = snps_validation(args, model_info.is_cat_mod, model_info.output_size, aligner) process_all_reads(args.fast5s_dir, not args.not_recursive, args.num_reads, args.read_ids_filename, model_info, args.outputs, args.output_directory, args.basecalls_format, aligner, snps_data, args.processes, args.verbose_read_progress, args.suppress_progress, mods_info, args.database_safety, args.edge_buffer, pr_ref_filts) if mh.MAP_NAME in args.outputs: logger.info('Spawning process to sort mappings') map_p = post_process_mapping(args.output_directory, aligner.out_fmt, aligner.ref_fn) if mh.WHATSHAP_MAP_NAME in args.outputs: logger.info('Spawning process to sort whatshap mappings') whatshap_sort_fn, whatshap_p = post_process_whatshap( args.output_directory, aligner.out_fmt, aligner.ref_fn) if mh.SNP_NAME in args.outputs or mh.MOD_NAME in args.outputs: post_process_aggregate( mods_info, args.outputs, args.mod_binary_threshold, args.output_directory, args.processes, args.write_vcf_log_probs, args.heterozygous_factors, snps_data, args.write_mod_log_probs, args.suppress_progress, aligner.ref_names_and_lens) if mh.SNP_NAME in args.outputs: logger.info('Sorting output variant file') variant_fn = mh.get_megalodon_fn(args.output_directory, mh.SNP_NAME) sort_variant_fn = mh.add_fn_suffix(variant_fn, 'sorted') snps.sort_variants(variant_fn, sort_variant_fn) logger.info('Indexing output variant file') index_variant_fn = snps.index_variants(sort_variant_fn) if mh.WHATSHAP_MAP_NAME in args.outputs: if whatshap_p.is_alive(): logger.info('Waiting for whatshap mappings sort') while whatshap_p.is_alive(): sleep(0.1) logger.info( snps.get_whatshap_command(index_variant_fn, whatshap_sort_fn, mh.add_fn_suffix(variant_fn, 'phased'))) if mh.MAP_NAME in args.outputs: if map_p.is_alive(): logger.info('Waiting for mappings sort') while map_p.is_alive(): sleep(0.1) return