def process_test_vcf(orig_vcf, test_vcf, output, conf): normalizers = core_norms.get_normalizers() normalizers.update(load_components(conf, 'normalizers', 'get_normalizers')) comparators = core_comps.get_comparators() comparators.update(load_components(conf, 'comparators', 'get_comparators')) processor = bp.VariantProcessor({}, normalizers, comparators, JsonReporter(output), conf) processor.compare_test_vcf(orig_vcf, test_vcf)
def process_vcf(vcf, gt_default, conf, output, callers, fqs=None, snp_info=None, single_batch=False, keep_tmpdir=False, read_depth=250): """ Perform analyses for each variant in the VCF file. :param input_vcf: Path to vcf file containing variants to process :param single_batch: Assume all variants in VCF are part of one batch and process them all simultaneously :param keep_tmpdir: Preserve tmpdirs created (otherwise delete them, unless they are flagged) :param conf: Configuration object """ variant_callers = core_callers.get_callers() variant_callers.update(load_components(conf, 'callers', 'get_callers')) normalizers = core_norms.get_normalizers() normalizers.update(load_components(conf, 'normalizers', 'get_normalizers')) comparators = core_comps.get_comparators() comparators.update(load_components(conf, 'comparators', 'get_comparators')) if callers is not None and len(callers)>0: callers_to_use = {} for caller in callers: if caller not in variant_callers: raise KeyError('No variant caller ' + caller + ' found in callers') callers_to_use[caller] = variant_callers[caller] variant_callers = callers_to_use if fqs is not None: fqs = [os.path.abspath(fq) for fq in fqs] processor = bp.VariantProcessor(variant_callers, normalizers, comparators, JsonReporter(output), conf) logging.info("Processing variants in file " + vcf) if single_batch: logging.info("Processing all variants as one batch") tmp_dir = '{}-tmp'.format(util.strip_extensions(os.path.basename(vcf), ['vcf','gz'])) processor.process_batch(vcf, tmp_dir, gt_default, ex_snp=snp_info, keep_tmpdir=keep_tmpdir, read_depth=read_depth, reads=fqs) else: batches = util.batch_variants(vcf, max_batch_size=1000, min_safe_dist=2000) for batchnum, batch_vcf in enumerate(batches, 1): logging.info('Processing batch #{} of {}'.format(batchnum, len(batches))) tmp_dir = '{}-batch{:03d}-tmp'.format(util.strip_extensions(os.path.basename(vcf), ['vcf','gz']), batchnum) processor.process_batch(batch_vcf, tmp_dir, gt_default, ex_snp=snp_info, keep_tmpdir=keep_tmpdir, read_depth=read_depth, reads=fqs) os.remove(batch_vcf)