def gen_reads(vcf, dest_vcf, dest_fq_prefix, ex_snp, gt_policy, read_depth, conf): """ Generate fastqs for the given set of input variants. This code is fired when the user supplies the --generate-fqs arg, and closely mimics the fastq generation code in VariantProcessor :param vars: List of variants :param dest_vcf: Destination filename for final VCF (may be gzipped) :param dest_fq_prefix: Destination prefix for fastq files :param ex_snp: Info for extra SNP addition :param gt_policy: Policy describing genotype (hets, homs, from file, etc.) :param read_depth: :param conf: """ #First, make sure there aren't variants that are too close to process independently... batches = util.batch_variants(vcf, max_batch_size=1e9) if len(list(batches))>1: raise ValueError('The VCF file ' + vcf + ' contains variants that are too close to include in a single set of fastqs, please ensure no two variants are within 2kb of each other') vars = list(pysam.VariantFile(vcf)) variant_sets = bp.create_variant_sets(vars, ex_snp, gt_policy, pysam.FastaFile( conf.get('main', 'ref_genome'))) allvars = [] for vset in variant_sets: allvars.extend(vset['vars']) variant_batch = sorted(allvars, cmp=util.variant_comp) final_vcf = util.write_vcf(variant_batch, dest_vcf, conf) logging.info("Writing full VCF to " + final_vcf) reads = bam_simulation.gen_alt_fq(conf.get('main', 'ref_genome'), variant_sets, read_depth, dest_prefix=dest_fq_prefix) logging.info("Writing fastqs to " + reads[0] + ", " + reads[1])
def create_input_vcf(self, raw_vars, ex_snp, gt_policy): ref_path = self.conf.get('main', 'ref_genome') variant_sets = create_variant_sets(raw_vars, ex_snp, gt_policy, pysam.FastaFile(ref_path)) allvars = [] for vset in variant_sets: allvars.extend(vset['vars']) variant_batch = sorted(allvars, cmp=util.variant_comp) orig_vcf = util.write_vcf(variant_batch, "test_input.vcf", self.conf) return orig_vcf, variant_sets