def main(): args = get_parser().parse_args() # add required attributes for loading guppy, but not valid options for # this script. args.do_not_use_guppy_server = False args.output_directory = args.guppy_logs_output_directory try: mh.mkdir(args.output_directory, False) except mh.MegaError: sys.stderr.write( '***** WARNING ***** Guppy logs output directory exists. ' + 'Potentially overwriting guppy logs.\n') sys.stderr.write('Loading model.\n') backend_params = backends.parse_backend_params(args) with backends.ModelInfo(backend_params, args.processes) as model_info: sys.stderr.write('Loading reference.\n') aligner = mapping.alignerPlus(str(args.reference), preset=str('map-ont'), best_n=1) process_all_reads(args.fast5s_dir, args.num_reads, args.read_ids_filename, model_info, aligner, args.processes, args.output, args.suppress_progress, args.compute_false_reference_scores)
def aligner_validation(args): logger = logging.get_logger() if len(mh.ALIGN_OUTPUTS.intersection(args.outputs)) > 0: if args.reference is None: logger.error( ('Output(s) requiring reference alignment requested ({}), ' + 'but --reference not provided.').format(', '.join( mh.ALIGN_OUTPUTS.intersection(args.outputs)))) sys.exit(1) logger.info('Loading reference.') if not (os.path.exists(args.reference) and os.path.isfile(args.reference)): logger.error('Provided reference file does not exist or is ' + 'not a file.') sys.exit(1) aligner = mapping.alignerPlus(str(args.reference), preset=str('map-ont'), best_n=1) setattr(aligner, 'out_fmt', args.mappings_format) setattr(aligner, 'ref_fn', mh.resolve_path(args.reference)) aligner.add_ref_lens() mapping.test_open_alignment_out_file(args.output_directory, aligner.out_fmt, aligner.ref_names_and_lens, aligner.ref_fn) else: aligner = None if args.reference is not None: logger.warning( '[--reference] provided, but no [--outputs] requiring ' + 'alignment was requested. Argument will be ignored.') return aligner
def _main(args): logging.init_logger() # add required attributes for loading guppy, but not valid options for # this script. args.do_not_use_guppy_server = False args.output_directory = args.guppy_logs_output_directory try: mh.mkdir(args.output_directory, False) except mh.MegaError: LOGGER.warning( 'Guppy logs output directory exists. Potentially overwriting ' + 'guppy logs.') LOGGER.info('Loading model.') backend_params = backends.parse_backend_params(args) with backends.ModelInfo(backend_params, args.processes) as model_info: LOGGER.info('Loading reference.') aligner = mapping.alignerPlus(str(args.reference), preset=str('map-ont'), best_n=1) process_all_reads(args.fast5s_dir, not args.not_recursive, args.num_reads, args.read_ids_filename, model_info, aligner, args.processes, args.output, args.suppress_progress, args.compute_false_reference_scores)
def _main(args): logging.init_logger() LOGGER.info('Loading reference') aligner = mapping.alignerPlus(str(args.reference), preset=str('map-ont'), best_n=1) aligner.add_ref_lens() LOGGER.info('Loading variants') var_data = variants.VarData(args.in_vcf, args.max_indel_size, keep_var_fp_open=True, aligner=aligner) contigs = var_data.variants_idx.header.contigs.values() LOGGER.info('Atomizing variants') with open(args.out_vcf, 'w') as out_vars: out_vars.write('\n'.join(HEADER + [ CONTIG_HEADER_LINE.format(ctg.name, ctg.length) for ctg in contigs ] + [ variants.CONTEXT_BASE_MI_LINE, COMMAND_HEADER_LINE.format(' '.join(sys.argv)), FIELDS_LINE ]) + '\n') for ctg in contigs: chrm_seq = aligner.seq(ctg.name) if len(chrm_seq) != ctg.length: LOGGER.warning(('Mismatched contig lengths ({}) between ' + 'reference ({}) and input VCF ({})').format( ctg.name, len(chrm_seq), ctg.length)) map_pos = mapping.MAP_POS(chrm=ctg.name, strand=None, start=0, end=len(chrm_seq), q_trim_start=None, q_trim_end=None) for var in var_data.fetch_read_variants(map_pos, mh.seq_to_int(chrm_seq)): out_vars.write( RECORD_LINE.format(chrm=ctg.name, pos=var.ref_start + 1, rid=var.id, ref=var.ref, alts=','.join(var.alts), info=variants.HAS_CONTEXT_BASE_TAG if var.has_context_base else '.')) LOGGER.info('Indexing output variant file') variants.index_variants(args.out_vcf)
def main(): args = get_parser().parse_args() log_suffix = ('aggregation' if args.output_suffix is None else 'aggregation.' + args.output_suffix) logging.init_logger(args.output_directory, out_suffix=log_suffix) logger = logging.get_logger() mod_agg_info = mods.AGG_INFO(mods.BIN_THRESH_NAME, args.mod_binary_threshold) mod_names = [] if mh.MOD_NAME in args.outputs: logger.info('Loading model.') mod_names = backends.ModelInfo( mh.get_model_fn(args.taiyaki_model_filename)).mod_long_names if args.reference is not None: logger.info('Loading reference.') aligner = mapping.alignerPlus(str(args.reference), preset=str('map-ont'), best_n=1) if args.reference is not None: aligner.add_ref_lens() valid_read_ids = None if args.read_ids_filename is not None: with open(args.read_ids_filename) as read_ids_fp: valid_read_ids = set(line.strip() for line in read_ids_fp) aggregate.aggregate_stats( args.outputs, args.output_directory, args.processes, args.write_vcf_log_probs, args.heterozygous_factors, snps.HAPLIOD_MODE if args.haploid else snps.DIPLOID_MODE, mod_names, mod_agg_info, args.write_mod_log_probs, args.mod_output_formats, args.suppress_progress, aligner.ref_names_and_lens, valid_read_ids, args.output_suffix) # note reference is required in order to annotate contigs for VCF writing if mh.SNP_NAME in args.outputs and args.reference is not None: logger.info('Sorting output variant file') variant_fn = mh.add_fn_suffix( mh.get_megalodon_fn(args.output_directory, mh.SNP_NAME), args.output_suffix) sort_variant_fn = mh.add_fn_suffix(variant_fn, 'sorted') snps.sort_variants(variant_fn, sort_variant_fn) logger.info('Indexing output variant file') index_var_fn = snps.index_variants(sort_variant_fn) return
def main(): args = get_parser().parse_args() sys.stderr.write('Loading model.\n') model_info = backends.ModelInfo(args.taiyaki_model_filename, args.devices, args.processes, args.chunk_size, args.chunk_overlap, args.max_concurrent_chunks) sys.stderr.write('Loading reference.\n') aligner = mapping.alignerPlus(str(args.reference), preset=str('map-ont'), best_n=1) process_all_reads(args.fast5s_dir, args.num_reads, args.read_ids_filename, model_info, aligner, args.processes, args.output, args.suppress_progress, args.compute_false_reference_scores) return