def test_1_parallel_vcf_combine(self): """Parallel combination of VCF files, split by chromosome. """ from bcbio.variation import vcfutils files = [ os.path.join(self.var_dir, "S1-variants.vcf"), os.path.join(self.var_dir, "S2-variants.vcf") ] with make_workdir() as workdir: config = load_config( get_post_process_yaml(self.automated_dir, workdir)) config["algorithm"] = {} region_dir = os.path.join(self.var_dir, "S1_S2-combined-regions") if os.path.exists(region_dir): shutil.rmtree(region_dir) if os.path.exists(self.combo_file): os.remove(self.combo_file) reqs = {"type": "local", "cores": 1} with prun.start(reqs, [[config]], config) as run_parallel: vcfutils.parallel_combine_variants( files, self.combo_file, self.ref_file, config, run_parallel) for fname in files: if os.path.exists(fname + ".gz"): subprocess.check_call(["gunzip", fname + ".gz"]) if os.path.exists(fname + ".gz.tbi"): os.remove(fname + ".gz.tbi")
def test_programs(self, data_dir): """Identify programs and versions used in analysis. """ from bcbio.provenance import programs with make_workdir() as workdir: config = load_config(get_post_process_yaml(data_dir, workdir)) print programs._get_versions(config)
def test_1_parallel_vcf_combine(self): """Parallel combination of VCF files, split by chromosome. """ from bcbio.variation import vcfutils files = [ os.path.join(self.var_dir, "S1-variants.vcf"), os.path.join(self.var_dir, "S2-variants.vcf") ] with make_workdir() as workdir: config = load_config( get_post_process_yaml(self.automated_dir, workdir)) config["algorithm"] = {} region_dir = os.path.join(self.var_dir, "S1_S2-combined-regions") if os.path.exists(region_dir): shutil.rmtree(region_dir) if os.path.exists(self.combo_file): os.remove(self.combo_file) reqs = {"type": "local", "cores": 1} with prun.start(reqs, [[config]], config) as run_parallel: vcfutils.parallel_combine_variants(files, self.combo_file, self.ref_file, config, run_parallel) for fname in files: if os.path.exists(fname + ".gz"): subprocess.check_call(["gunzip", fname + ".gz"]) if os.path.exists(fname + ".gz.tbi"): os.remove(fname + ".gz.tbi")
def test_6_bamclean(install_test_files, data_dir): with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, os.pardir, "100326_FC6107FAAXX"), os.path.join(data_dir, "run_info-bamclean.yaml")] subprocess.check_call(cl)
def test_7b_cancer_precall(install_test_files, data_dir): """Test somatic prioritization and effects prediction with pre-called inputs. """ with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, "run_info-cancer3.yaml")] subprocess.check_call(cl)
def test_chipseq(install_test_files, data_dir): """Run a chip-seq alignment with Bowtie2""" with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, os.pardir, "test_chipseq"), os.path.join(data_dir, "run_info-chipseq.yaml")] subprocess.check_call(cl)
def test_10_umi(install_test_files, data_dir): """Allow BAM files as input to pipeline. """ with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, "run_info-umi.yaml")] subprocess.check_call(cl)
def test_9_joint(install_test_files, data_dir): """Perform joint calling/backfilling/squaring off following variant calling. """ with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, "run_info-joint.yaml")] subprocess.check_call(cl)
def test_atacseq(install_test_files, data_dir): """Test ATAC-seq pipeline""" with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, os.pardir, "test_atacseq"), os.path.join(data_dir, "run_info-atacseq.yaml")] subprocess.check_call(cl)
def test_srnaseq_bowtie(install_test_files, data_dir): """Run an sRNA-seq analysis. """ with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, os.pardir, "test_srnaseq"), os.path.join(data_dir, "run_info-srnaseq_bowtie.yaml")] subprocess.check_call(cl)
def test_4_empty_fastq(install_test_files, data_dir): """Handle analysis of empty fastq inputs from failed runs. """ with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, os.pardir, "110221_empty_FC12345AAXX"), os.path.join(data_dir, "run_info-empty.yaml")] subprocess.check_call(cl)
def test_2_rnaseq(install_test_files, data_dir): """Run an RNA-seq analysis with TopHat and generate gene-level counts. """ with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, os.pardir, "110907_ERP000591"), os.path.join(data_dir, "run_info-rnaseq.yaml")] subprocess.check_call(cl)
def test_7_cancer(install_test_files, data_dir): """Test paired tumor-normal calling using multiple calling approaches: MuTect, VarScan, FreeBayes. """ with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, "run_info-cancer.yaml")] subprocess.check_call(cl)
def test_2_star(install_test_files, data_dir): """Run an RNA-seq analysis with STAR and generate gene-level counts. """ with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, os.pardir, "test_fusion"), os.path.join(data_dir, "run_info-star.yaml")] subprocess.check_call(cl)
def test_7_cancer_nonormal(install_test_files, data_dir): """Test cancer calling without normal samples or with normal VCF panels. Requires MuTect and GATK. """ with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, "run_info-cancer2.yaml")] subprocess.check_call(cl)
def test_2_fastrnaseq(install_test_files, data_dir): """Run a fast RNA-seq analysis """ with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, os.pardir, "110907_ERP000591"), os.path.join(data_dir, "run_info-fastrnaseq.yaml")] subprocess.check_call(cl)
def test_2_fusion(install_test_files, data_dir): """Run an RNA-seq analysis and test fusion genes """ with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, os.pardir, "test_fusion"), os.path.join(data_dir, "run_info-fusion.yaml")] subprocess.check_call(cl)
def test_2_fastrnaseq(install_test_files, data_dir): """Run a fast RNA-seq analysis """ with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, os.pardir, "test_fusion"), os.path.join(data_dir, "run_info-fastrnaseq.yaml")] subprocess.check_call(cl)
def test_3_full_pipeline(install_test_files, data_dir): """Run full automated analysis pipeline with multiplexing. """ with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, os.pardir, "110106_FC70BUKAAXX"), os.path.join(data_dir, "run_info.yaml")] subprocess.check_call(cl)
def test_2_scrnaseq(install_test_files, data_dir): """Run a single-cell RNA-seq analysis """ with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, os.pardir, "Harvard-inDrop"), os.path.join(data_dir, "run_info-scrnaseq.yaml")] subprocess.check_call(cl)
def test_variantcall_2(install_test_files, data_dir): """Test variant calling with disambiguation. Requires minikraken database. """ with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, os.pardir, "100326_FC6107FAAXX"), os.path.join(data_dir, "run_info-variantcall_S3_2.yaml")] subprocess.check_call(cl)
def test_detect_fusions_with_ericscipt_with_disambiguate( install_test_files, data_dir, setup_logging): """Run gene fusion analysis on disambiguated reads with EricScript. Requires installation of EricScript and its reference data. """ with make_workdir() as work_dir: sample_config = create_sample_config( data_dir, work_dir, disambiguate=True) ericscript.run(sample_config) assert_run_successfully(work_dir=work_dir, data_dir=data_dir)
def test_1_variantcall(install_test_files, data_dir): """Test variant calling with GATK pipeline. Requires GATK. """ with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, os.pardir, "100326_FC6107FAAXX"), os.path.join(data_dir, "run_info-variantcall.yaml")] subprocess.check_call(cl)
def test_chipseq(install_test_files, data_dir): """ Run a chip-seq alignment with Bowtie2 """ with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, os.pardir, "test_chipseq"), os.path.join(data_dir, "run_info-chipseq.yaml")] subprocess.check_call(cl)
def test_fusion(install_test_files, data_dir): """Run an RNA-seq analysis and test fusion genes, with human-mouse disambiguation. Requires minikraken database. """ with make_workdir() as workdir: cl = ["bcbio_nextgen.py", get_post_process_yaml(data_dir, workdir), os.path.join(data_dir, os.pardir, "test_fusion"), os.path.join(data_dir, "run_info-fusion_S3.yaml")] subprocess.check_call(cl)
def test_8_template(install_test_files, data_dir): """Create a project template from input files and metadata configuration. """ fc_dir = os.path.join(data_dir, os.pardir, "100326_FC6107FAAXX") with make_workdir(): cl = ["bcbio_nextgen.py", "-w", "template", "--only-metadata", "freebayes-variant", os.path.join(fc_dir, "100326.csv"), os.path.join(fc_dir, "7_100326_FC6107FAAXX_1_fastq.txt"), os.path.join(fc_dir, "7_100326_FC6107FAAXX_2_fastq.txt"), os.path.join(fc_dir, "8_100326_FC6107FAAXX.bam")] subprocess.check_call(cl)
def test_1_cwl_local(self, install_test_files, data_dir): """Create a common workflow language description and run on local installation. """ with make_workdir() as workdir: cl = ["bcbio_vm.py", "cwl", "../data/automated/run_info-cwl.yaml", "--systemconfig", get_post_process_yaml(data_dir, workdir)] subprocess.check_call(cl) cl = ["bcbio_vm.py", "cwlrun", "cwltool", "run_info-cwl-workflow", "--no-container"] subprocess.check_call(cl) print print "To run with a CWL tool, cd test_automated_output and:" print " ".join(cl)
def test_2_vcf_exclusion(self): """Exclude samples from VCF files. """ with make_workdir() as workdir: config = load_config( get_post_process_yaml(self.automated_dir, workdir)) config["algorithm"] = {} out_file = utils.append_stem(self.combo_file, "-exclude") to_exclude = ["S1"] if os.path.exists(out_file): os.remove(out_file) vcfutils.exclude_samples(self.combo_file, out_file, to_exclude, self.ref_file, config)
def test_2_vcf_exclusion(self): """Exclude samples from VCF files. """ with make_workdir() as workdir: config = load_config( get_post_process_yaml(self.automated_dir, workdir)) config["algorithm"] = {} out_file = utils.append_stem(self.combo_file, "-exclude") to_exclude = ["S1"] if os.path.exists(out_file): os.remove(out_file) vcfutils.exclude_samples( self.combo_file, out_file, to_exclude, self.ref_file, config)
def test_11_hla(install_test_files, data_dir): """Test HLA typing with OptiType. """ from bcbio.hla import optitype hla_dir = os.path.join(data_dir, os.pardir, "100326_FC6107FAAXX", "hla") with make_workdir() as workdir: data = {"dirs": {"work": workdir}, "rgnames": {"sample": "test"}, "config": {}, "hla": {"fastq": glob.glob(os.path.join(hla_dir, "*"))}} out = optitype.run(data) with open(out["hla"]["call_file"]) as in_handle: header = in_handle.readline().strip().split(",") hla_a = dict(zip(header, in_handle.readline().strip().split(","))) assert hla_a["alleles"] == "HLA-A*11:01;HLA-A*24:02", hla_a
def test_3_vcf_split_combine(self): """Split a VCF file into SNPs and indels, then combine back together. """ with make_workdir() as workdir: config = load_config( get_post_process_yaml(self.automated_dir, workdir)) config["algorithm"] = {} fname = os.path.join(self.var_dir, "S1-variants.vcf") snp_file, indel_file = vcfutils.split_snps_indels( fname, self.ref_file, config) merge_file = "%s-merge%s.gz" % os.path.splitext(fname) vcfutils.combine_variant_files([snp_file, indel_file], merge_file, self.ref_file, config) for f in [snp_file, indel_file, merge_file]: self._remove_vcf(f)
def test_docker(install_test_files, data_dir): """Run an analysis with code and tools inside a docker container. Requires https://github.com/bcbio/bcbio-nextgen-vm """ with make_workdir() as workdir: cl = [ "bcbio_vm.py", "--datadir=%s" % data_dir, "run", "--image=quay.io/bcbio/bcbio-vc", "--systemconfig=%s" % get_post_process_yaml(data_dir, workdir), "--fcdir=%s" % os.path.join(data_dir, os.pardir, "100326_FC6107FAAXX"), os.path.join(data_dir, "run_info-bam.yaml") ] subprocess.check_call(cl)
def test_3_vcf_split_combine(self): """Split a VCF file into SNPs and indels, then combine back together. """ with make_workdir() as workdir: config = load_config(get_post_process_yaml( self.automated_dir, workdir)) config["algorithm"] = {} fname = os.path.join(self.var_dir, "S1-variants.vcf") snp_file, indel_file = vcfutils.split_snps_indels( fname, self.ref_file, config) merge_file = "%s-merge%s.gz" % os.path.splitext(fname) vcfutils.combine_variant_files( [snp_file, indel_file], merge_file, self.ref_file, config) for f in [snp_file, indel_file, merge_file]: self._remove_vcf(f)
def test_11_hla(install_test_files, data_dir): """Test HLA typing with OptiType. """ from bcbio.hla import optitype hla_dir = os.path.join(data_dir, os.pardir, "100326_FC6107FAAXX", "hla") with make_workdir() as workdir: data = {"dirs": {"work": workdir}, "rgnames": {"sample": "test"}, "config": {}, "hla": {"fastq": glob.glob(os.path.join(hla_dir, "*"))}} out = optitype.run(data) with open(out["hla"]["call_file"]) as in_handle: header = in_handle.readline().strip().split(",") hla_a = dict(zip(header), in_handle.readline().strip().split(",")) assert hla_a["alleles"] == "HLA-A*11:01;HLA-A*24:02", hla_a
def test_docker(install_test_files, data_dir): """Run an analysis with code and tools inside a docker container. Requires https://github.com/chapmanb/bcbio-nextgen-vm """ with make_workdir() as workdir: cl = [ "bcbio_vm.py", "--datadir=%s" % data_dir, "run", "--systemconfig=%s" % get_post_process_yaml(data_dir, workdir), "--fcdir=%s" % os.path.join( data_dir, os.pardir, "100326_FC6107FAAXX"), os.path.join(data_dir, "run_info-bam.yaml") ] subprocess.check_call(cl)
def test_2_cwl_docker(install_test_files, data_dir): """Create a common workflow language description and run on a Docker installation. """ with make_workdir() as workdir: cl = [ "bcbio_vm.py", "cwl", "../data/automated/run_info-cwl.yaml", "--systemconfig", get_post_process_yaml(data_dir, workdir) ] subprocess.check_call(cl) cl = ["bcbio_vm.py", "cwlrun", "cwltool", "run_info-cwl-workflow"] subprocess.check_call(cl) print print "To run with a CWL tool, cd test_automated_output and:" print " ".join(cl)
def test_docker_ipython(install_test_files, data_dir): """Run an analysis with code and tools inside a docker container, driven via IPython. Requires https://github.com/chapmanb/bcbio-nextgen-vm """ with make_workdir() as workdir: cl = [ "bcbio_vm.py", "--datadir=%s" % data_dir, "ipython", "--systemconfig=%s" % get_post_process_yaml(data_dir, workdir), "--fcdir=%s" % os.path.join(data_dir, os.pardir, "100326_FC6107FAAXX"), os.path.join(data_dir, "run_info-bam.yaml"), "lsf", "localrun" ] subprocess.check_call(cl)
def test_variant2_pipeline_with_bam_input(install_test_files, data_dir): with make_workdir() as workdir: global_config = get_post_process_yaml(data_dir, workdir) run_config = os.path.join(data_dir, "run_info-bam.yaml") subprocess.check_call(["bcbio_nextgen.py", global_config, run_config])