Python batch_variants Examples

Programming Language: Python

Namespace/Package Name: util

Method/Function: batch_variants

Examples at hotexamples.com: 2

Python batch_variants - 2 examples found. These are the top rated real world Python examples of util.batch_variants extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: injectvar.py Project: goranrakocevic/varcomp

def gen_reads(vcf, dest_vcf, dest_fq_prefix, ex_snp, gt_policy, read_depth, conf):
    """
    Generate fastqs for the given set of input variants. This code is fired when the user supplies the --generate-fqs
    arg, and closely mimics the fastq generation code in VariantProcessor
    :param vars: List of variants
    :param dest_vcf: Destination filename for final VCF (may be gzipped)
    :param dest_fq_prefix: Destination prefix for fastq files
    :param ex_snp: Info for extra SNP addition
    :param gt_policy: Policy describing genotype (hets, homs, from file, etc.)
    :param read_depth:
    :param conf:
    """

    #First, make sure there aren't variants that are too close to process independently...
    batches = util.batch_variants(vcf, max_batch_size=1e9)
    if len(list(batches))>1:
        raise ValueError('The VCF file ' + vcf + ' contains variants that are too close to include in a single set of fastqs, please ensure no two variants are within 2kb of each other')
    vars = list(pysam.VariantFile(vcf))
    variant_sets = bp.create_variant_sets(vars, ex_snp, gt_policy, pysam.FastaFile( conf.get('main', 'ref_genome')))
    allvars = []
    for vset in variant_sets:
        allvars.extend(vset['vars'])
    variant_batch = sorted(allvars, cmp=util.variant_comp)
    final_vcf = util.write_vcf(variant_batch, dest_vcf, conf)
    logging.info("Writing full VCF to " + final_vcf)
    reads = bam_simulation.gen_alt_fq(conf.get('main', 'ref_genome'), variant_sets, read_depth, dest_prefix=dest_fq_prefix)
    logging.info("Writing fastqs to " + reads[0] + ", " + reads[1])

Example #2

Show file

File: injectvar.py Project: goranrakocevic/varcomp

def process_vcf(vcf, gt_default, conf, output, callers, fqs=None, snp_info=None, single_batch=False, keep_tmpdir=False, read_depth=250):
    """
    Perform analyses for each variant in the VCF file.
    :param input_vcf: Path to vcf file containing variants to process
    :param single_batch: Assume all variants in VCF are part of one batch and process them all simultaneously
    :param keep_tmpdir: Preserve tmpdirs created (otherwise delete them, unless they are flagged)
    :param conf: Configuration object
    """

    variant_callers = core_callers.get_callers()
    #variant_callers.update(load_components(conf, 'callers', 'get_callers'))

    normalizers = core_norms.get_normalizers()
    #normalizers.update(load_components(conf, 'normalizers', 'get_normalizers'))
    comparators = core_comps.get_comparators()
    #comparators.update(load_components(conf, 'comparators', 'get_comparators'))

    if callers is not None and len(callers)>0:
        callers_to_use = {}
        for caller in callers:
            if caller not in variant_callers:
                raise KeyError('No variant caller ' + caller + ' found in callers')
            callers_to_use[caller] = variant_callers[caller]
        variant_callers = callers_to_use

    if fqs is not None:
        nfq = []
        for fq in fqs:
            nfq.append( os.path.abspath(fq))
        fqs = nfq

    processor = bp.VariantProcessor(variant_callers, normalizers, comparators, JsonReporter(output))
    logging.info("Processing variants in file " + vcf)
    if single_batch:
        logging.info("Processing all variants as one batch")
        processor.process_batch(vcf, vcf.replace(".vcf", "-tmpfiles"), conf, gt_default, ex_snp=snp_info, keep_tmpdir=keep_tmpdir, read_depth=read_depth, reads=fqs)
    else:
        batches = util.batch_variants(vcf, max_batch_size=1000, min_safe_dist=2000)
        for batchnum, batch_vcf in enumerate(batches):
            logging.info("Processing batch #" + str(batchnum+1) + " of " + str(len(batches)))
            processor.process_batch(batch_vcf, vcf.replace(".vcf", "-tmpfiles"), conf, gt_default, ex_snp=snp_info, keep_tmpdir=keep_tmpdir, read_depth=read_depth, reads=fqs)
            os.remove(batch_vcf)