Ejemplo n.º 1
0
def _main(args):
    log_suffix = ('aggregation' if args.output_suffix is None else
                  'aggregation.' + args.output_suffix)
    logging.init_logger(args.megalodon_directory, out_suffix=log_suffix)
    LOGGER.debug('Command: """' + ' '.join(sys.argv) + '"""')

    if args.mod_aggregate_method == mh.MOD_EM_NAME:
        mod_agg_info = mods.AGG_INFO(mh.MOD_EM_NAME, None)
    elif args.mod_aggregate_method == mh.MOD_BIN_THRESH_NAME:
        mod_agg_info = mods.AGG_INFO(
            mh.MOD_BIN_THRESH_NAME, args.mod_binary_threshold)
    valid_read_ids = mh.parse_read_ids(args.read_ids_filename)
    aggregate.aggregate_stats(
        args.outputs, args.megalodon_directory, args.processes,
        args.write_vcf_log_probs, args.heterozygous_factors,
        variants.HAPLIOD_MODE if args.haploid else variants.DIPLOID_MODE,
        mod_agg_info, args.write_mod_log_probs, args.mod_output_formats,
        args.suppress_progress, valid_read_ids, args.output_suffix,
        args.aggregate_batch_size)

    if mh.VAR_NAME in args.outputs:
        LOGGER.info('Sorting output variant file')
        variant_fn = mh.add_fn_suffix(
            mh.get_megalodon_fn(args.megalodon_directory, mh.VAR_NAME),
            args.output_suffix)
        sort_variant_fn = mh.add_fn_suffix(variant_fn, 'sorted')
        variants.sort_variants(variant_fn, sort_variant_fn)
        LOGGER.info('Indexing output variant file')
        variants.index_variants(sort_variant_fn)
Ejemplo n.º 2
0
def main():
    args = get_parser().parse_args()
    log_suffix = ('aggregation' if args.output_suffix is None else
                  'aggregation.' + args.output_suffix)
    logging.init_logger(args.output_directory, out_suffix=log_suffix)
    logger = logging.get_logger()

    mod_agg_info = mods.AGG_INFO(mods.BIN_THRESH_NAME,
                                 args.mod_binary_threshold)
    mod_names = []
    if mh.MOD_NAME in args.outputs:
        logger.info('Loading model.')
        mod_names = backends.ModelInfo(
            mh.get_model_fn(args.taiyaki_model_filename)).mod_long_names
    if args.reference is not None: logger.info('Loading reference.')
    aligner = mapping.alignerPlus(str(args.reference),
                                  preset=str('map-ont'),
                                  best_n=1)
    if args.reference is not None:
        aligner.add_ref_lens()
    valid_read_ids = None
    if args.read_ids_filename is not None:
        with open(args.read_ids_filename) as read_ids_fp:
            valid_read_ids = set(line.strip() for line in read_ids_fp)
    aggregate.aggregate_stats(
        args.outputs, args.output_directory, args.processes,
        args.write_vcf_log_probs, args.heterozygous_factors,
        snps.HAPLIOD_MODE if args.haploid else snps.DIPLOID_MODE, mod_names,
        mod_agg_info, args.write_mod_log_probs, args.mod_output_formats,
        args.suppress_progress, aligner.ref_names_and_lens, valid_read_ids,
        args.output_suffix)

    # note reference is required in order to annotate contigs for VCF writing
    if mh.SNP_NAME in args.outputs and args.reference is not None:
        logger.info('Sorting output variant file')
        variant_fn = mh.add_fn_suffix(
            mh.get_megalodon_fn(args.output_directory, mh.SNP_NAME),
            args.output_suffix)
        sort_variant_fn = mh.add_fn_suffix(variant_fn, 'sorted')
        snps.sort_variants(variant_fn, sort_variant_fn)
        logger.info('Indexing output variant file')
        index_var_fn = snps.index_variants(sort_variant_fn)

    return
Ejemplo n.º 3
0
def index_variants(variant_fn):
    try:
        return pysam.tabix_index(variant_fn,
                                 force=True,
                                 preset='vcf',
                                 keep_original=True)
    except OSError:
        # file likely not sorted
        sort_variant_fn = mh.add_fn_suffix(variant_fn, 'sorted')
        sort_variants(variant_fn, sort_variant_fn)
        return pysam.tabix_index(sort_variant_fn,
                                 force=True,
                                 preset='vcf',
                                 keep_original=True)
Ejemplo n.º 4
0
def _main():
    args = get_parser().parse_args()

    mkdir(args.output_directory, args.overwrite)
    logging.init_logger(args.output_directory)
    logger = logging.get_logger()
    logger.debug('Command: """' + ' '.join(sys.argv) + '"""')

    if _DO_PROFILE:
        args = profile_validation(args)

    args, pr_ref_filts = parse_pr_ref_output(args)
    tai_model_fn = mh.get_model_fn(args.taiyaki_model_filename)
    model_info = backends.ModelInfo(tai_model_fn, args.devices, args.processes,
                                    args.chunk_size, args.chunk_overlap,
                                    args.max_concurrent_chunks)
    args, mods_info = mods_validation(args, model_info)
    aligner = aligner_validation(args)
    args, snps_data = snps_validation(args, model_info.is_cat_mod,
                                      model_info.output_size, aligner)

    process_all_reads(args.fast5s_dir, not args.not_recursive, args.num_reads,
                      args.read_ids_filename, model_info, args.outputs,
                      args.output_directory, args.basecalls_format, aligner,
                      snps_data, args.processes, args.verbose_read_progress,
                      args.suppress_progress, mods_info, args.database_safety,
                      args.edge_buffer, pr_ref_filts)

    if mh.MAP_NAME in args.outputs:
        logger.info('Spawning process to sort mappings')
        map_p = post_process_mapping(args.output_directory, aligner.out_fmt,
                                     aligner.ref_fn)

    if mh.WHATSHAP_MAP_NAME in args.outputs:
        logger.info('Spawning process to sort whatshap mappings')
        whatshap_sort_fn, whatshap_p = post_process_whatshap(
            args.output_directory, aligner.out_fmt, aligner.ref_fn)

    if mh.SNP_NAME in args.outputs or mh.MOD_NAME in args.outputs:
        post_process_aggregate(
            mods_info, args.outputs, args.mod_binary_threshold,
            args.output_directory, args.processes, args.write_vcf_log_probs,
            args.heterozygous_factors, snps_data, args.write_mod_log_probs,
            args.suppress_progress, aligner.ref_names_and_lens)

    if mh.SNP_NAME in args.outputs:
        logger.info('Sorting output variant file')
        variant_fn = mh.get_megalodon_fn(args.output_directory, mh.SNP_NAME)
        sort_variant_fn = mh.add_fn_suffix(variant_fn, 'sorted')
        snps.sort_variants(variant_fn, sort_variant_fn)
        logger.info('Indexing output variant file')
        index_variant_fn = snps.index_variants(sort_variant_fn)

    if mh.WHATSHAP_MAP_NAME in args.outputs:
        if whatshap_p.is_alive():
            logger.info('Waiting for whatshap mappings sort')
            while whatshap_p.is_alive():
                sleep(0.1)
        logger.info(
            snps.get_whatshap_command(index_variant_fn, whatshap_sort_fn,
                                      mh.add_fn_suffix(variant_fn, 'phased')))

    if mh.MAP_NAME in args.outputs:
        if map_p.is_alive():
            logger.info('Waiting for mappings sort')
            while map_p.is_alive():
                sleep(0.1)

    return