Beispiel #1
0
def gen_reads(vcf, dest_vcf, dest_fq_prefix, ex_snp, gt_policy, read_depth, conf):
    """
    Generate fastqs for the given set of input variants. This code is fired when the user supplies the --generate-fqs
    arg, and closely mimics the fastq generation code in VariantProcessor
    :param vars: List of variants
    :param dest_vcf: Destination filename for final VCF (may be gzipped)
    :param dest_fq_prefix: Destination prefix for fastq files
    :param ex_snp: Info for extra SNP addition
    :param gt_policy: Policy describing genotype (hets, homs, from file, etc.)
    :param read_depth:
    :param conf:
    """

    #First, make sure there aren't variants that are too close to process independently...
    batches = util.batch_variants(vcf, max_batch_size=1e9)
    if len(list(batches))>1:
        raise ValueError('The VCF file ' + vcf + ' contains variants that are too close to include in a single set of fastqs, please ensure no two variants are within 2kb of each other')
    vars = list(pysam.VariantFile(vcf))
    variant_sets = bp.create_variant_sets(vars, ex_snp, gt_policy, pysam.FastaFile( conf.get('main', 'ref_genome')))
    allvars = []
    for vset in variant_sets:
        allvars.extend(vset['vars'])
    variant_batch = sorted(allvars, cmp=util.variant_comp)
    final_vcf = util.write_vcf(variant_batch, dest_vcf, conf)
    logging.info("Writing full VCF to " + final_vcf)
    reads = bam_simulation.gen_alt_fq(conf.get('main', 'ref_genome'), variant_sets, read_depth, dest_prefix=dest_fq_prefix)
    logging.info("Writing fastqs to " + reads[0] + ", " + reads[1])