def test_2_vcf_exclusion(self): """Exclude samples from VCF files. """ ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa") config = load_config(os.path.join(self.data_dir, "automated", "post_process-sample.yaml")) out_file = utils.append_stem(self.combo_file, "-exclude") to_exclude = ["S1"] if os.path.exists(out_file): os.remove(out_file) vcfutils.exclude_samples(self.combo_file, out_file, to_exclude, ref_file, config)
def test_2_vcf_exclusion(self): """Exclude samples from VCF files. """ ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa") with make_workdir() as workdir: config = load_config(get_post_process_yaml(self.automated_dir, workdir)) config["algorithm"] = {} out_file = utils.append_stem(self.combo_file, "-exclude") to_exclude = ["S1"] if os.path.exists(out_file): os.remove(out_file) vcfutils.exclude_samples(self.combo_file, out_file, to_exclude, ref_file, config)
def test_2_vcf_exclusion(self): """Exclude samples from VCF files. """ ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa") with make_workdir() as workdir: config = load_config(get_post_process_yaml(self.data_dir, workdir)) config["algorithm"] = {} out_file = utils.append_stem(self.combo_file, "-exclude") to_exclude = ["S1"] if os.path.exists(out_file): os.remove(out_file) vcfutils.exclude_samples(self.combo_file, out_file, to_exclude, ref_file, config)
def test_2_vcf_exclusion(self): """Exclude samples from VCF files. """ fname = os.path.join(self.data_dir, "variants", "S1_S2-combined.vcf") ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa") config = load_config(os.path.join(self.data_dir, "automated", "post_process-sample.yaml")) out_file = "%s-exclude%s" % os.path.splitext(fname) to_exclude = ["S1"] if os.path.exists(out_file): os.remove(out_file) vcfutils.exclude_samples(fname, out_file, to_exclude, ref_file, config)
def test_2_vcf_exclusion(self, global_config): """Exclude samples from VCF files. """ from bcbio.variation import vcfutils config = load_config(global_config) config["algorithm"] = {} out_file = utils.append_stem(self.combo_file, "-exclude") to_exclude = ["S1"] if os.path.exists(out_file): os.remove(out_file) vcfutils.exclude_samples(self.combo_file, out_file, to_exclude, self.ref_file, config)
def test_2_vcf_exclusion(self): """Exclude samples from VCF files. """ with make_workdir() as workdir: config = load_config( get_post_process_yaml(self.automated_dir, workdir)) config["algorithm"] = {} out_file = utils.append_stem(self.combo_file, "-exclude") to_exclude = ["S1"] if os.path.exists(out_file): os.remove(out_file) vcfutils.exclude_samples(self.combo_file, out_file, to_exclude, self.ref_file, config)
def test_2_vcf_exclusion(self): """Exclude samples from VCF files. """ from bcbio.variation import vcfutils with make_workdir() as workdir: config = load_config( get_post_process_yaml(self.automated_dir, workdir)) config["algorithm"] = {} out_file = utils.append_stem(self.combo_file, "-exclude") to_exclude = ["S1"] if os.path.exists(out_file): os.remove(out_file) vcfutils.exclude_samples( self.combo_file, out_file, to_exclude, self.ref_file, config)
def test_2_vcf_exclusion(self): """Exclude samples from VCF files. """ # Be back compatible with 0.7.6 -- remove after 0.7.7 release if prun is None: return ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa") config = load_config(os.path.join(self.data_dir, "automated", "post_process-sample.yaml")) out_file = utils.append_stem(self.combo_file, "-exclude") to_exclude = ["S1"] if os.path.exists(out_file): os.remove(out_file) vcfutils.exclude_samples(self.combo_file, out_file, to_exclude, ref_file, config)
def test_2_vcf_exclusion(self): """Exclude samples from VCF files. """ # Be back compatible with 0.7.6 -- remove after 0.7.7 release if prun is None: return ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa") with make_workdir() as workdir: config = load_config(get_post_process_yaml(self.data_dir, workdir)) config["algorithm"] = {} out_file = utils.append_stem(self.combo_file, "-exclude") to_exclude = ["S1"] if os.path.exists(out_file): os.remove(out_file) vcfutils.exclude_samples(self.combo_file, out_file, to_exclude, ref_file, config)
def main(config_file, env, cores): cores = int(cores) config = read_config(config_file, env) idremap = read_remap_file(config["runinfo"]["idmapping"]) exclude = read_priority_file(config["runinfo"]["priority"], idremap) samples = list(get_input_samples(config["inputs"], idremap)) problem = [x for x in samples if x["id"] is None] if len(problem) > 0: print "Problem identifiers" for p in problem: print p["illuminaid"], os.path.basename(p["dir"]) raise NotImplementedError check_fam(samples, config["runinfo"]["fam"]) config["algorithm"] = {"num_cores": cores} samples = [ s for s in samples if s["id"] is not None and s["id"] not in exclude ] print "Processing %s samples" % len(samples) out_files = [ outf for outf in joblib.Parallel(cores)( joblib.delayed(run_illumina_prep)(s, config) for s in samples) ] merge_file = merge_vcf_files(out_files, cores, config) effects_file = effects.snpeff_effects({ "vrn_file": merge_file, "sam_ref": config["ref"]["GRCh37"], "reference": { "fasta": { "base": config["ref"]["GRCh37"] } }, "genome_resources": { "aliases": { "snpeff": "GRCh37.74" } }, "genome_build": "GRCh37", "config": config }) data = {"config": config, "dirs": {"work": os.getcwd()}, "name": [""]} gemini_db = population.prep_gemini_db( [os.path.join(os.getcwd(), effects_file)], [utils.splitext_plus(config["outputs"]["merge"])[0], "casava", True], [{ "config": config, "work_bam": "yes", "genome_build": "GRCh37", "genome_resources": { "aliases": { "human": True } } }], data)[0][1]["db"] print gemini_db noexclude_file = "%s-noexclude%s" % utils.splitext_plus(effects_file) noexclude_file = vcfutils.exclude_samples(effects_file, noexclude_file, exclude, config["ref"]["GRCh37"], config) prepare_plink_vcftools(noexclude_file, config)
def main(config_file, env, cores): cores = int(cores) config = read_config(config_file, env) idremap = read_remap_file(config["runinfo"]["idmapping"]) exclude = read_priority_file(config["runinfo"]["priority"], idremap) samples = list(get_input_samples(config["inputs"], idremap)) problem = [x for x in samples if x["id"] is None] if len(problem) > 0: print "Problem identifiers" for p in problem: print p["illuminaid"], os.path.basename(p["dir"]) raise NotImplementedError check_fam(samples, config["runinfo"]["fam"]) config["algorithm"] = {"num_cores": cores} samples = [s for s in samples if s["id"] is not None and s["id"] not in exclude] print "Processing %s samples" % len(samples) out_files = [outf for outf in joblib.Parallel(cores)(joblib.delayed(run_illumina_prep)(s, config) for s in samples)] merge_file = merge_vcf_files(out_files, cores, config) effects_file = effects.snpeff_effects({"vrn_file": merge_file, "sam_ref": config["ref"]["GRCh37"], "reference": {"fasta" : {"base": config["ref"]["GRCh37"]}}, "genome_resources": {"aliases" : {"snpeff": "GRCh37.74"}}, "genome_build": "GRCh37", "config": config}) data = {"config": config, "dirs": {"work": os.getcwd()}, "name": [""]} gemini_db = population.prep_gemini_db([os.path.join(os.getcwd(), effects_file)], [utils.splitext_plus(config["outputs"]["merge"])[0], "casava", True], [{"config": config, "work_bam": "yes", "genome_build": "GRCh37", "genome_resources": {"aliases": {"human": True}}}], data)[0][1]["db"] print gemini_db noexclude_file = "%s-noexclude%s" % utils.splitext_plus(effects_file) noexclude_file = vcfutils.exclude_samples(effects_file, noexclude_file, exclude, config["ref"]["GRCh37"], config) prepare_plink_vcftools(noexclude_file, config)