Example #1
0
 def test_2_vcf_exclusion(self):
     """Exclude samples from VCF files.
     """
     ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa")
     config = load_config(os.path.join(self.data_dir, "automated",
                                       "post_process-sample.yaml"))
     out_file = utils.append_stem(self.combo_file, "-exclude")
     to_exclude = ["S1"]
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.exclude_samples(self.combo_file, out_file, to_exclude, ref_file, config)
 def test_2_vcf_exclusion(self):
     """Exclude samples from VCF files.
     """
     ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa")
     with make_workdir() as workdir:
         config = load_config(get_post_process_yaml(self.automated_dir, workdir))
         config["algorithm"] = {}
     out_file = utils.append_stem(self.combo_file, "-exclude")
     to_exclude = ["S1"]
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.exclude_samples(self.combo_file, out_file, to_exclude, ref_file, config)
Example #3
0
 def test_2_vcf_exclusion(self):
     """Exclude samples from VCF files.
     """
     ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa")
     with make_workdir() as workdir:
         config = load_config(get_post_process_yaml(self.data_dir, workdir))
         config["algorithm"] = {}
     out_file = utils.append_stem(self.combo_file, "-exclude")
     to_exclude = ["S1"]
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.exclude_samples(self.combo_file, out_file, to_exclude, ref_file, config)
Example #4
0
 def test_2_vcf_exclusion(self):
     """Exclude samples from VCF files.
     """
     fname = os.path.join(self.data_dir, "variants", "S1_S2-combined.vcf")
     ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa")
     config = load_config(os.path.join(self.data_dir, "automated",
                                       "post_process-sample.yaml"))
     out_file = "%s-exclude%s" % os.path.splitext(fname)
     to_exclude = ["S1"]
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.exclude_samples(fname, out_file, to_exclude, ref_file, config)
Example #5
0
 def test_2_vcf_exclusion(self, global_config):
     """Exclude samples from VCF files.
     """
     from bcbio.variation import vcfutils
     config = load_config(global_config)
     config["algorithm"] = {}
     out_file = utils.append_stem(self.combo_file, "-exclude")
     to_exclude = ["S1"]
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.exclude_samples(self.combo_file, out_file, to_exclude,
                              self.ref_file, config)
Example #6
0
 def test_2_vcf_exclusion(self):
     """Exclude samples from VCF files.
     """
     with make_workdir() as workdir:
         config = load_config(
             get_post_process_yaml(self.automated_dir, workdir))
         config["algorithm"] = {}
     out_file = utils.append_stem(self.combo_file, "-exclude")
     to_exclude = ["S1"]
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.exclude_samples(self.combo_file, out_file, to_exclude,
                              self.ref_file, config)
Example #7
0
 def test_2_vcf_exclusion(self):
     """Exclude samples from VCF files.
     """
     from bcbio.variation import vcfutils
     with make_workdir() as workdir:
         config = load_config(
             get_post_process_yaml(self.automated_dir, workdir))
         config["algorithm"] = {}
     out_file = utils.append_stem(self.combo_file, "-exclude")
     to_exclude = ["S1"]
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.exclude_samples(
         self.combo_file, out_file, to_exclude, self.ref_file, config)
Example #8
0
 def test_2_vcf_exclusion(self):
     """Exclude samples from VCF files.
     """
     # Be back compatible with 0.7.6 -- remove after 0.7.7 release
     if prun is None:
         return
     ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa")
     config = load_config(os.path.join(self.data_dir, "automated",
                                       "post_process-sample.yaml"))
     out_file = utils.append_stem(self.combo_file, "-exclude")
     to_exclude = ["S1"]
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.exclude_samples(self.combo_file, out_file, to_exclude, ref_file, config)
Example #9
0
 def test_2_vcf_exclusion(self):
     """Exclude samples from VCF files.
     """
     # Be back compatible with 0.7.6 -- remove after 0.7.7 release
     if prun is None:
         return
     ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa")
     with make_workdir() as workdir:
         config = load_config(get_post_process_yaml(self.data_dir, workdir))
         config["algorithm"] = {}
     out_file = utils.append_stem(self.combo_file, "-exclude")
     to_exclude = ["S1"]
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.exclude_samples(self.combo_file, out_file, to_exclude, ref_file, config)
def main(config_file, env, cores):
    cores = int(cores)
    config = read_config(config_file, env)
    idremap = read_remap_file(config["runinfo"]["idmapping"])
    exclude = read_priority_file(config["runinfo"]["priority"], idremap)
    samples = list(get_input_samples(config["inputs"], idremap))
    problem = [x for x in samples if x["id"] is None]
    if len(problem) > 0:
        print "Problem identifiers"
        for p in problem:
            print p["illuminaid"], os.path.basename(p["dir"])
        raise NotImplementedError
    check_fam(samples, config["runinfo"]["fam"])

    config["algorithm"] = {"num_cores": cores}
    samples = [
        s for s in samples if s["id"] is not None and s["id"] not in exclude
    ]
    print "Processing %s samples" % len(samples)
    out_files = [
        outf for outf in joblib.Parallel(cores)(
            joblib.delayed(run_illumina_prep)(s, config) for s in samples)
    ]
    merge_file = merge_vcf_files(out_files, cores, config)
    effects_file = effects.snpeff_effects({
        "vrn_file": merge_file,
        "sam_ref": config["ref"]["GRCh37"],
        "reference": {
            "fasta": {
                "base": config["ref"]["GRCh37"]
            }
        },
        "genome_resources": {
            "aliases": {
                "snpeff": "GRCh37.74"
            }
        },
        "genome_build": "GRCh37",
        "config": config
    })
    data = {"config": config, "dirs": {"work": os.getcwd()}, "name": [""]}
    gemini_db = population.prep_gemini_db(
        [os.path.join(os.getcwd(), effects_file)],
        [utils.splitext_plus(config["outputs"]["merge"])[0], "casava", True],
        [{
            "config": config,
            "work_bam": "yes",
            "genome_build": "GRCh37",
            "genome_resources": {
                "aliases": {
                    "human": True
                }
            }
        }], data)[0][1]["db"]
    print gemini_db
    noexclude_file = "%s-noexclude%s" % utils.splitext_plus(effects_file)
    noexclude_file = vcfutils.exclude_samples(effects_file, noexclude_file,
                                              exclude, config["ref"]["GRCh37"],
                                              config)
    prepare_plink_vcftools(noexclude_file, config)
def main(config_file, env, cores):
    cores = int(cores)
    config = read_config(config_file, env)
    idremap = read_remap_file(config["runinfo"]["idmapping"])
    exclude = read_priority_file(config["runinfo"]["priority"], idremap)
    samples = list(get_input_samples(config["inputs"], idremap))
    problem = [x for x in samples if x["id"] is None]
    if len(problem) > 0:
        print "Problem identifiers"
        for p in problem:
            print p["illuminaid"], os.path.basename(p["dir"])
        raise NotImplementedError
    check_fam(samples, config["runinfo"]["fam"])

    config["algorithm"] = {"num_cores": cores}
    samples = [s for s in samples if s["id"] is not None and s["id"] not in exclude]
    print "Processing %s samples" % len(samples)
    out_files = [outf for outf in joblib.Parallel(cores)(joblib.delayed(run_illumina_prep)(s, config)
                                                         for s in samples)]
    merge_file = merge_vcf_files(out_files, cores, config)
    effects_file = effects.snpeff_effects({"vrn_file": merge_file,
                                           "sam_ref": config["ref"]["GRCh37"],
                                           "reference": {"fasta" : {"base": config["ref"]["GRCh37"]}},
                                           "genome_resources": {"aliases" : {"snpeff": "GRCh37.74"}},
                                           "genome_build": "GRCh37",
                                           "config": config})
    data = {"config": config, "dirs": {"work": os.getcwd()}, "name": [""]}
    gemini_db = population.prep_gemini_db([os.path.join(os.getcwd(), effects_file)],
                                          [utils.splitext_plus(config["outputs"]["merge"])[0], "casava", True],
                                          [{"config": config, "work_bam": "yes", "genome_build": "GRCh37",
                                            "genome_resources": {"aliases": {"human": True}}}],
                                          data)[0][1]["db"]
    print gemini_db
    noexclude_file = "%s-noexclude%s" % utils.splitext_plus(effects_file)
    noexclude_file = vcfutils.exclude_samples(effects_file, noexclude_file, exclude,
                                              config["ref"]["GRCh37"], config)
    prepare_plink_vcftools(noexclude_file, config)