def main():
    args = get_parser().parse_args()
    # add required attributes for loading guppy, but not valid options for
    # this script.
    args.do_not_use_guppy_server = False
    args.output_directory = args.guppy_logs_output_directory
    try:
        mh.mkdir(args.output_directory, False)
    except mh.MegaError:
        sys.stderr.write(
            '***** WARNING ***** Guppy logs output directory exists. ' +
            'Potentially overwriting guppy logs.\n')

    sys.stderr.write('Loading model.\n')
    backend_params = backends.parse_backend_params(args)
    with backends.ModelInfo(backend_params, args.processes) as model_info:
        sys.stderr.write('Loading reference.\n')
        aligner = mapping.alignerPlus(str(args.reference),
                                      preset=str('map-ont'),
                                      best_n=1)

        process_all_reads(args.fast5s_dir, args.num_reads,
                          args.read_ids_filename, model_info, aligner,
                          args.processes, args.output, args.suppress_progress,
                          args.compute_false_reference_scores)
예제 #2
0
def aligner_validation(args):
    logger = logging.get_logger()
    if len(mh.ALIGN_OUTPUTS.intersection(args.outputs)) > 0:
        if args.reference is None:
            logger.error(
                ('Output(s) requiring reference alignment requested ({}), ' +
                 'but --reference not provided.').format(', '.join(
                     mh.ALIGN_OUTPUTS.intersection(args.outputs))))
            sys.exit(1)
        logger.info('Loading reference.')
        if not (os.path.exists(args.reference)
                and os.path.isfile(args.reference)):
            logger.error('Provided reference file does not exist or is ' +
                         'not a file.')
            sys.exit(1)
        aligner = mapping.alignerPlus(str(args.reference),
                                      preset=str('map-ont'),
                                      best_n=1)
        setattr(aligner, 'out_fmt', args.mappings_format)
        setattr(aligner, 'ref_fn', mh.resolve_path(args.reference))
        aligner.add_ref_lens()
        mapping.test_open_alignment_out_file(args.output_directory,
                                             aligner.out_fmt,
                                             aligner.ref_names_and_lens,
                                             aligner.ref_fn)
    else:
        aligner = None
        if args.reference is not None:
            logger.warning(
                '[--reference] provided, but no [--outputs] requiring ' +
                'alignment was requested. Argument will be ignored.')
    return aligner
def _main(args):
    logging.init_logger()
    # add required attributes for loading guppy, but not valid options for
    # this script.
    args.do_not_use_guppy_server = False
    args.output_directory = args.guppy_logs_output_directory
    try:
        mh.mkdir(args.output_directory, False)
    except mh.MegaError:
        LOGGER.warning(
            'Guppy logs output directory exists. Potentially overwriting ' +
            'guppy logs.')

    LOGGER.info('Loading model.')
    backend_params = backends.parse_backend_params(args)
    with backends.ModelInfo(backend_params, args.processes) as model_info:
        LOGGER.info('Loading reference.')
        aligner = mapping.alignerPlus(str(args.reference),
                                      preset=str('map-ont'),
                                      best_n=1)

        process_all_reads(args.fast5s_dir, not args.not_recursive,
                          args.num_reads, args.read_ids_filename, model_info,
                          aligner, args.processes, args.output,
                          args.suppress_progress,
                          args.compute_false_reference_scores)
예제 #4
0
def _main(args):
    logging.init_logger()
    LOGGER.info('Loading reference')
    aligner = mapping.alignerPlus(str(args.reference),
                                  preset=str('map-ont'),
                                  best_n=1)
    aligner.add_ref_lens()
    LOGGER.info('Loading variants')
    var_data = variants.VarData(args.in_vcf,
                                args.max_indel_size,
                                keep_var_fp_open=True,
                                aligner=aligner)
    contigs = var_data.variants_idx.header.contigs.values()
    LOGGER.info('Atomizing variants')
    with open(args.out_vcf, 'w') as out_vars:
        out_vars.write('\n'.join(HEADER + [
            CONTIG_HEADER_LINE.format(ctg.name, ctg.length) for ctg in contigs
        ] + [
            variants.CONTEXT_BASE_MI_LINE,
            COMMAND_HEADER_LINE.format(' '.join(sys.argv)), FIELDS_LINE
        ]) + '\n')
        for ctg in contigs:
            chrm_seq = aligner.seq(ctg.name)
            if len(chrm_seq) != ctg.length:
                LOGGER.warning(('Mismatched contig lengths ({}) between ' +
                                'reference ({}) and input VCF ({})').format(
                                    ctg.name, len(chrm_seq), ctg.length))
            map_pos = mapping.MAP_POS(chrm=ctg.name,
                                      strand=None,
                                      start=0,
                                      end=len(chrm_seq),
                                      q_trim_start=None,
                                      q_trim_end=None)
            for var in var_data.fetch_read_variants(map_pos,
                                                    mh.seq_to_int(chrm_seq)):
                out_vars.write(
                    RECORD_LINE.format(chrm=ctg.name,
                                       pos=var.ref_start + 1,
                                       rid=var.id,
                                       ref=var.ref,
                                       alts=','.join(var.alts),
                                       info=variants.HAS_CONTEXT_BASE_TAG
                                       if var.has_context_base else '.'))

    LOGGER.info('Indexing output variant file')
    variants.index_variants(args.out_vcf)
예제 #5
0
def main():
    args = get_parser().parse_args()
    log_suffix = ('aggregation' if args.output_suffix is None else
                  'aggregation.' + args.output_suffix)
    logging.init_logger(args.output_directory, out_suffix=log_suffix)
    logger = logging.get_logger()

    mod_agg_info = mods.AGG_INFO(mods.BIN_THRESH_NAME,
                                 args.mod_binary_threshold)
    mod_names = []
    if mh.MOD_NAME in args.outputs:
        logger.info('Loading model.')
        mod_names = backends.ModelInfo(
            mh.get_model_fn(args.taiyaki_model_filename)).mod_long_names
    if args.reference is not None: logger.info('Loading reference.')
    aligner = mapping.alignerPlus(str(args.reference),
                                  preset=str('map-ont'),
                                  best_n=1)
    if args.reference is not None:
        aligner.add_ref_lens()
    valid_read_ids = None
    if args.read_ids_filename is not None:
        with open(args.read_ids_filename) as read_ids_fp:
            valid_read_ids = set(line.strip() for line in read_ids_fp)
    aggregate.aggregate_stats(
        args.outputs, args.output_directory, args.processes,
        args.write_vcf_log_probs, args.heterozygous_factors,
        snps.HAPLIOD_MODE if args.haploid else snps.DIPLOID_MODE, mod_names,
        mod_agg_info, args.write_mod_log_probs, args.mod_output_formats,
        args.suppress_progress, aligner.ref_names_and_lens, valid_read_ids,
        args.output_suffix)

    # note reference is required in order to annotate contigs for VCF writing
    if mh.SNP_NAME in args.outputs and args.reference is not None:
        logger.info('Sorting output variant file')
        variant_fn = mh.add_fn_suffix(
            mh.get_megalodon_fn(args.output_directory, mh.SNP_NAME),
            args.output_suffix)
        sort_variant_fn = mh.add_fn_suffix(variant_fn, 'sorted')
        snps.sort_variants(variant_fn, sort_variant_fn)
        logger.info('Indexing output variant file')
        index_var_fn = snps.index_variants(sort_variant_fn)

    return
def main():
    args = get_parser().parse_args()

    sys.stderr.write('Loading model.\n')
    model_info = backends.ModelInfo(args.taiyaki_model_filename, args.devices,
                                    args.processes, args.chunk_size,
                                    args.chunk_overlap,
                                    args.max_concurrent_chunks)
    sys.stderr.write('Loading reference.\n')
    aligner = mapping.alignerPlus(str(args.reference),
                                  preset=str('map-ont'),
                                  best_n=1)

    process_all_reads(args.fast5s_dir, args.num_reads, args.read_ids_filename,
                      model_info, aligner, args.processes, args.output,
                      args.suppress_progress,
                      args.compute_false_reference_scores)

    return