def test_paired_splitter(): """ Function to test paired splitter """ resource_path = os.path.join(os.path.dirname(__file__), "data/") fastq_1file = resource_path + "bsSeeker.Mouse.SRR892982_1.fastq" fastq_2file = resource_path + "bsSeeker.Mouse.SRR892982_2.fastq" fqs_handle = fastq_splitter() results = fqs_handle.run( { "fastq1" : fastq_1file, "fastq2" : fastq_2file }, { "fastq1": Metadata( "data_rnaseq", "fastq", [], None, {'assembly' : 'test'}), "fastq2": Metadata( "data_rnaseq", "fastq", [], None, {'assembly' : 'test'}) }, {"output" : fastq_1file + ".tar.gz"} ) print("WGBS - PAIRED RESULTS:", results) assert os.path.isfile(results[0]["output"]) is True assert os.path.getsize(results[0]["output"]) > 0
def test_rnaseq_pipeline(): """ Test case to ensure that the RNA-seq pipeline code works. Running the pipeline with the test data from the command line: .. code-block:: none runcompss \\ --lang=python \\ --library_path=${HOME}/bin \\ --pythonpath=/<pyenv_virtenv_dir>/lib/python2.7/site-packages/ \\ --log_level=debug \\ process_rnaseq.py \\ --taxon_id 9606 \\ --genome /<dataset_dir>/Human.GRCh38.fasta \\ --assembly GRCh38 \\ --file /<dataset_dir>/ERR030872_1.fastq \\ --file2 /<dataset_dir>/ERR030872_2.fastq """ resource_path = os.path.join(os.path.dirname(__file__), "data/") files = { 'cdna': resource_path + 'kallisto.Human.GRCh38.fasta', 'fastq1': resource_path + 'kallisto.Human.ERR030872_1.fastq', 'fastq2': resource_path + 'kallisto.Human.ERR030872_2.fastq' } metadata = { "cdna": Metadata("Assembly", "fasta", files['cdna'], None, {'assembly': 'GCA_000001405.22'}), "fastq1": Metadata("data_rna_seq", "fastq", files['fastq1'], None, {'assembly': 'GCA_000001405.22'}), "fastq2": Metadata("data_rna_seq", "fastq", files['fastq2'], None, {'assembly': 'GCA_000001405.22'}), } files_out = { "index": 'tests/data/kallisto.idx', "abundance_h5_file": 'tests/data/kallisto.abundance.h5', "abundance_tsv_file": 'tests/data/kallisto.abundance.tsv', "run_info_file": 'tests/data/kallisto.run_info.json' } rs_handle = process_rnaseq() rs_files, rs_meta = rs_handle.run(files, metadata, files_out) # pylint: disable=unused-variable # Checks that the returned files matches the expected set of results assert len(rs_files) == 4 # Add tests for all files created for f_out in rs_files: print("RNA SEQ RESULTS FILE:", f_out) assert rs_files[f_out] == files_out[f_out] assert os.path.isfile(rs_files[f_out]) is True assert os.path.getsize(rs_files[f_out]) > 0
def test_bed_02_indexer(): """ Function to test Kallisto indexer """ resource_path = os.path.join(os.path.dirname(__file__), "data/") f_check = h5py.File(resource_path + "file_index.hdf5", "a") f_check.close() input_files = { "bed": resource_path + "sample.sorted.bed", "chrom_file": resource_path + "chrom_GRCh38.size", "hdf5_file": resource_path + "file_index.hdf5" } output_files = {"bb_file": resource_path + "sample.bb"} metadata = { "bed": Metadata("data_rnaseq", "bed", "test_bed_location", [], {'assembly': 'test'}), "hdf5_file": Metadata("data_file", "hdf5", "test_location", [], {}) } bs_handle = bedIndexerTool({"bed_type": "bed6+4"}) bs_handle.run(input_files, metadata, output_files) print(resource_path) assert os.path.isfile(resource_path + "sample.bb") is True assert os.path.getsize(resource_path + "sample.bb") > 0
def run(self, input_files, input_metadata, output_files): """ The main function to run the test_writer tool Parameters ---------- input_files : dict List of input files - In this case there are no input files required input_metadata: dict Matching metadata for each of the files, plus any additional data output_files : dict List of the output files that are to be generated Returns ------- output_files : dict List of files with a single entry. output_metadata : dict List of matching metadata for the returned files """ if not output_files["output"]: output_files["output"] = self.configuration['execution'] + '/dinamic_name.tsv' results = self.test_writer( input_files["matrix"], input_files["features"], output_files["output"], output_files["output_tar"] ) results = compss_wait_on(results) if results is False: logger.fatal("Test Writer: run failed") return {}, {} output_metadata = { "output": Metadata( #data_type="<data_type>", #file_type="txt", file_path=output_files["output"], sources=[input_metadata["matrix"].file_path, input_metadata["features"].file_path], taxon_id=input_metadata["matrix"].taxon_id, meta_data={ "tool": "ChAs" } ), "output_tar": Metadata( #data_type="<data_type>", #file_type="txt", file_path=output_files["output"], sources=[input_metadata["matrix"].file_path, input_metadata["features"].file_path], taxon_id=input_metadata["matrix"].taxon_id, meta_data={ "tool": "ChAs" } ) } return (output_files, output_metadata)
def test_trim_galore_pipeline_02(): """ Test case to ensure that the trimgalore pipeline code works for paired end data. Running the pipeline with the test data from the command line: .. code-block:: none runcompss \\ --lang=python \\ --library_path=${HOME}/bin \\ --pythonpath=/<pyenv_virtenv_dir>/lib/python2.7/site-packages/ \\ --log_level=debug \\ process_trim_galore.py \\ --taxon_id 9606 \\ --fastq1 /<dataset_dir>/bsSeeker.Mouse.SRR892982_1.fastq.gz \\ --fastq2 /<dataset_dir>/bsSeeker.Mouse.SRR892982_2.fastq.gz """ resource_path = os.path.join(os.path.dirname(__file__), "data/") files = { 'fastq1': resource_path + 'bsSeeker.Mouse.SRR892982_1.fastq.gz', 'fastq2': resource_path + 'bsSeeker.Mouse.SRR892982_2.fastq.gz' } metadata = { "fastq1": Metadata( "data_wgbs", "fastq", files['fastq1'], None, ), "fastq2": Metadata( "data_wgbs", "fastq", files['fastq2'], None, ) } files_out = { "fastq1_trimmed": 'tests/data/bsSeeker.Mouse.SRR892982_1.trimmed.fastq.gz', "fastq2_trimmed": 'tests/data/bsSeeker.Mouse.SRR892982_2.trimmed.fastq.gz', "fastq1_report": 'tests/data/bsSeeker.Mouse.SRR892982_1.trimmed.report.txt', "fastq2_report": 'tests/data/bsSeeker.Mouse.SRR892982_2.trimmed.report.txt' } tg_handle = process_trim_galore() tg_files, tg_meta = tg_handle.run(files, metadata, files_out) # Checks that the returned files matches the expected set of results assert len(tg_files) == 2 print (tg_meta) # Add tests for all files created for f_out in tg_files: print("TRIM GALORE RESULTS FILE:", f_out) assert tg_files[f_out] == files_out[f_out] assert f_out in tg_meta assert os.path.isfile(tg_files[f_out]) is True assert os.path.getsize(tg_files[f_out]) > 0
def test_idear_pipeline(): """ Test case to ensure that the iDEAR pipeline code works. Running the pipeline with the test data from the command line: """ resource_path = os.path.join(os.path.dirname(__file__), "data/") files = { 'bsgenome': resource_path + "idear.Human.GCA_000001405.22.22.bsgenome.tar.gz", 'bam_1': resource_path + 'idear.Human.SRR3714775.bam', 'bam_2': resource_path + 'idear.Human.SRR3714776.bam', 'bg_bam_1': resource_path + 'idear.Human.SRR3714777.bam', 'bg_bam_2': resource_path + 'idear.Human.SRR3714778.bam', } output_files = {"bigwig": resource_path + "idear.Human.Nup98-GFP.bw"} metadata = { "bsgenome": Metadata("data_damid_seq", "bsgenome", [], None, {'assembly': 'test'}, 9606), "bam_1": Metadata("data_damid_seq", "bam", [], None, {'assembly': 'test'}, 9606), "bam_2": Metadata("data_damid_seq", "bam", [], None, {'assembly': 'test'}, 9606), "bg_bam_1": Metadata("data_damid_seq", "bam", [], None, {'assembly': 'test'}, 9606), "bg_bam_2": Metadata("data_damid_seq", "bam", [], None, {'assembly': 'test'}, 9606), } config_param = { "idear_title": "Full genome sequences for H**o sapiens (GRCh38)", "idear_description": "Full genome sequences for H**o sapiens (GRCh38)", "idear_common_name": "Human", "idear_organism": "H**o sapiens", "idear_provider": "ENA", "idear_release_date": "2013", "idear_sample_param": "Nup98", "idear_background_param": "GFP", } damidseq_handle = process_idear(config_param) damidseq_files, damidseq_meta = damidseq_handle.run( files, metadata, output_files) # pylint: disable=unused-variable print(damidseq_files) # Add tests for all files created for f_out in damidseq_files: assert os.path.isfile(damidseq_files[f_out]) is True assert os.path.getsize(damidseq_files[f_out]) > 0
def test_bwa_aligner_idamidseq(): """ Function to test BWA Aligner """ resource_path = os.path.join(os.path.dirname(__file__), "data/") genome_fa = resource_path + "idear.Human.GCA_000001405.22.fasta" fastq_files = [ resource_path + "idear.Human.SRR3714775.fastq", resource_path + "idear.Human.SRR3714776.fastq", resource_path + "idear.Human.SRR3714777.fastq", resource_path + "idear.Human.SRR3714778.fastq" ] # Unzipped the test data for fastq_file in fastq_files: with gzip.open(fastq_file + '.gz', 'rb') as fgz_in: with open(fastq_file, 'w') as f_out: f_out.write(fgz_in.read()) assert os.path.isfile(fastq_file) is True assert os.path.getsize(fastq_file) > 0 # Run the aligner for each fastq file for fastq_file in fastq_files: input_files = { "genome": genome_fa, "index": genome_fa + ".bwa.tar.gz", "loc": fastq_file } output_files = { "output": fastq_file.replace(".fastq", ".bam") } metadata = { "genome": Metadata( "Assembly", "fasta", genome_fa, None, {"assembly": "test"}), "index": Metadata( "index_bwa", "", [genome_fa], { "assembly": "test", "tool": "bwa_indexer" } ), "loc": Metadata( "data_damid_seq", "fastq", fastq_file, None, {"assembly": "test"} ) } bwa_t = bwaAlignerMEMTool() bwa_t.run(input_files, metadata, output_files) assert os.path.isfile(fastq_file.replace(".fastq", ".bam")) is True assert os.path.getsize(fastq_file.replace(".fastq", ".bam")) > 0
def test_bwa_aligner_mem_paired(): """ Function to test BWA Aligner """ resource_path = os.path.join(os.path.dirname(__file__), "data/") genome_fa = resource_path + "bsSeeker.Mouse.GRCm38.fasta" fastq_file_1 = resource_path + "bsSeeker.Mouse.SRR892982_1.fastq" fastq_file_2 = resource_path + "bsSeeker.Mouse.SRR892982_2.fastq" input_files = { "genome": genome_fa, "index": genome_fa + ".bwa.tar.gz", "loc": fastq_file_1, "fastq2": fastq_file_2 } output_files = { "output": fastq_file_1.replace(".fastq", "_mem.bam") } metadata = { "genome": Metadata( "Assembly", "fasta", genome_fa, None, {"assembly": "test"}), "index": Metadata( "index_bwa", "", [genome_fa], { "assembly": "test", "tool": "bwa_indexer" } ), "loc": Metadata( "data_wgbs", "fastq", fastq_file_1, None, {"assembly": "test"} ), "fastq2": Metadata( "data_wgbs", "fastq", fastq_file_2, None, {"assembly": "test"} ) } bwa_t = bwaAlignerMEMTool() bwa_t.run(input_files, metadata, output_files) print(__file__) assert os.path.isfile(resource_path + "bsSeeker.Mouse.SRR892982_1_mem.bam") is True assert os.path.getsize(resource_path + "bsSeeker.Mouse.SRR892982_1_mem.bam") > 0 try: os.remove(resource_path + "bsSeeker.Mouse.SRR892982_1_mem.bam") except OSError, ose: print("Error: %s - %s." % (ose.filename, ose.strerror))
def test_bs_seeker_methylation_caller(): """ Test that it is possible to call the methylation called by BS seeker """ resource_path = os.path.join(os.path.dirname(__file__), "data/") home = os.path.expanduser('~') input_files = { "genome": resource_path + "bsSeeker.Mouse.GRCm38.fasta", "index": resource_path + "bsSeeker.Mouse.GRCm38.fasta.bt2.tar.gz", "bam": resource_path + "bsSeeker.Mouse.SRR892982_1.filtered.bam", "bai": resource_path + "bsSeeker.Mouse.SRR892982_1.filtered.bai", } output_files = { "wig_file": resource_path + "bsSeeker.Mouse.SRR892982_1.wig", "cgmap_file": resource_path + "bsSeeker.Mouse.SRR892982_1.cgmap", "atcgmap_file": resource_path + "bsSeeker.Mouse.SRR892982_1.atcgmap" } metadata = { "genome": Metadata( "Assembly", "fasta", input_files["genome"], None, {'assembly' : 'test'}), "index": Metadata( "index_bowtie", "index", input_files["genome"], None, {'assembly' : 'test'}), "bam": Metadata( "data_wgbs", "bam", input_files["bam"], None, {'assembly' : 'test'}), "bai": Metadata( "data_wgbs", "bai", input_files["bai"], None, {'assembly' : 'test'}), } config_param = { "aligner" : "bowtie2", "aligner_path" : home + "/lib/bowtie2-2.3.4-linux-x86_64", "bss_path" : home + "/lib/BSseeker2" } bsmc = bs_seeker_methylation_caller.bssMethylationCallerTool(config_param) bsmc.run(input_files, metadata, output_files) assert os.path.isfile(output_files["wig_file"]) is True assert os.path.getsize(output_files["wig_file"]) > 0 assert os.path.isfile(output_files["cgmap_file"]) is True assert os.path.getsize(output_files["cgmap_file"]) > 0 assert os.path.isfile(output_files["atcgmap_file"]) is True assert os.path.getsize(output_files["atcgmap_file"]) > 0
def test_design(): """ Function to test the right generation of CHicago Design files """ path = os.path.join(os.path.dirname(__file__), "data/") config_file = { "makeDesignFiles_minFragLen": "150", "makeDesignFiles_maxFragLen": "40000", "makeDesignFiles_maxLBrownEst": "1500000", "makeDesignFiles_binsize": "20000", "makeDesignFiles_removeb2b": True, "makeDesignFiles_removeAdjacent": True, "makeDesignFiles_outfilePrefix": path + "test_run_chicago/test", #"makeDesignFiles_designDir" : path + "test_run_chicago", "makeDesignFiles_rmap": path + "test_run_chicago/test.rmap", "makeDesignFiles_baitmap": path + "test_run_chicago/test.baitmap" } input_files = { "RMAP": path + "test_run_chicago/test.rmap", "BAITMAP": path + "test_run_chicago/test.baitmap" } metadata = { "RMAP": Metadata("data_chicago_input", ".rmap", path + "test_run_chicago", None, {}, 9606), "BAITMAP": Metadata("data_chicago_input", ".baitmap", path + "test_run_chicago", None, {}, 9606) } output_files = { "nbpb": path + "test_run_chicago/test.nbpb", "npb": path + "test_run_chicago/test.npb", "poe": path + "test_run_chicago/test.poe" } design_handle = makeDesignFilesTool(config_file) design_handle.run(input_files, metadata, output_files) assert os.path.isfile(path + "test_run_chicago/test" + ".nbpb") is True assert os.path.getsize(path + "test_run_chicago/test" + ".nbpb") > 0 assert os.path.isfile(path + "test_run_chicago/test" + ".npb") is True assert os.path.getsize(path + "test_run_chicago/test" + ".npb") > 0 assert os.path.isfile(path + "test_run_chicago/test" + ".poe") is True assert os.path.getsize(path + "test_run_chicago/test" + ".poe") > 0
def test_idear(): """ Function to test forging BSgenomes """ resource_path = os.path.join(os.path.dirname(__file__), "data/") input_files = { "bsgenome": resource_path + "idear.Human.GCA_000001405.22.22.bsgenome.tar.gz", "bam_1": resource_path + "idear.Human.SRR3714775.bam", "bam_2": resource_path + "idear.Human.SRR3714776.bam", "bg_bam_1": resource_path + "idear.Human.SRR3714777.bam", "bg_bam_2": resource_path + "idear.Human.SRR3714778.bam", } output_files = { "bigwig": resource_path + "idear.Human.Nup98-GFP.bw" } metadata = { "bsgenome": Metadata( "data_damid_seq", "bsgenome", [], None, {'assembly' : 'test'}, 9606), "bam_1": Metadata( "data_damid_seq", "bam", [], None, {'assembly' : 'test'}, 9606), "bam_2": Metadata( "data_damid_seq", "bam", [], None, {'assembly' : 'test'}, 9606), "bg_bam_1": Metadata( "data_damid_seq", "bam", [], None, {'assembly' : 'test'}, 9606), "bg_bam_2": Metadata( "data_damid_seq", "bam", [], None, {'assembly' : 'test'}, 9606), } config = { "idear_common_name": "Human", "idear_sample_param": "Nup98", "idear_background_param": "GFP" } idear_handle = idearTool(config) idear_handle.run(input_files, metadata, output_files) assert os.path.isfile(resource_path + "idear.Human.Nup98-GFP.bw") is True assert os.path.getsize(resource_path + "idear.Human.Nup98-GFP.bw") > 0
def test_process_rmap(): """ Test for process_rmapBaitmap pipeline. This pipeline generate .rmap file, input files for CHiCAGO pipeline """ path = os.path.join(os.path.dirname(__file__), "data/") configuration = {"renzime": {"HindIII": 'A|AGCTT'}} input_files = { "genome_fa": path + "test_baitmap/chr21_hg19.fa", } metadata = { "genome_fa": Metadata("txt", "fasta", path + "test_baitmap/chr21_hg19.fa", None, 9606, ""), } output_files = { "RMAP": path + "test_run_chicago/test.rmap", "Rtree_file_dat": path + "test_rmap/rtree_file.dat", "Rtree_file_idx": path + "test_rmap/rtree_file.idx", "chr_handler": path + "test_baitmap/chr_handler.txt" } rmap_handle = process_rmap(configuration) rmap_handle.run(input_files, metadata, output_files) assert os.path.getsize(output_files["Rtree_file_dat"]) assert os.path.getsize(output_files["Rtree_file_idx"])
def test_test_pipeline(): """ Test case to ensure that the Genome indexing pipeline code works. Running the pipeline with the test data from the command line: .. code-block:: none pytest tests/test_pipeline_test.py """ resource_path = os.path.join(os.path.dirname(__file__), "data/") input_files = {"input": resource_path + "test_input.txt"} metadata = { "input": Metadata("text", "txt", input_files["input"], None, {"assembly": "test"}) } files_out = { "output": resource_path + 'test.txt', } tt_handle = process_H_randomizer() tt_files, tt_meta = tt_handle.run(input_files, metadata, files_out) # Add tests for all files created for f_out in tt_files: print("GENOME RESULTS FILE:", f_out) assert os.path.isfile(tt_files[f_out]) is True assert os.path.getsize(tt_files[f_out]) > 0
def test_bamqc(): """ Test case to ensure that the testTool works. .. code-block:: none pytest tests/test_bamqc.py """ resource_path = os.path.join(os.path.dirname(__file__), "data/") bam_file = resource_path + "macs2.Human.bam" input_files = {"bam": bam_file} output_files = {"html": resource_path + "macs2.Human_bamqc.html"} metadata = { "bam": Metadata("data_chip_seq", "bam", bam_file, None, {"assembly": "test"}) } bamqc_handle = bamQC() bamqc_handle.run(input_files, metadata, output_files) assert os.path.isfile(output_files["html"]) is True assert os.path.getsize(output_files["html"]) > 0
def test_gem_indexer(): """ Test case to ensure that the GEM indexer works. """ resource_path = os.path.join(os.path.dirname(__file__), "data/") genome_fa = resource_path + "tb.Human.GCA_000001405.22.fasta" with gzip.open(genome_fa + '.gz', 'rb') as fgz_in: with open(genome_fa, 'wb') as f_out: f_out.write(fgz_in.read()) genome_gem_idx = resource_path + "tb.Human.GCA_000001405.22.fasta.gem.gz" input_files = {"genome": genome_fa} output_files = {"index": genome_gem_idx} metadata = { "genome": Metadata("Assembly", "fasta", genome_fa, None, {'assembly': 'test'}), } print(input_files, output_files) gem_it = gemIndexerTool() gem_it.run(input_files, metadata, output_files) assert os.path.isfile(genome_gem_idx) is True assert os.path.getsize(genome_gem_idx) > 0
def test_bowtie_indexer_wgbs(): """ Test to ensure Bowtie indexer is working for macs data set """ resource_path = os.path.join(os.path.dirname(__file__), "data/") genome_fa = resource_path + "bsSeeker.Mouse.GRCm38.fasta" input_files = { "genome": genome_fa } output_files = { "index": genome_fa + ".bt2.tar.gz" } metadata = { "genome": Metadata( "Assembly", "fasta", genome_fa, None, {'assembly' : 'test'}), } bti = bowtie_indexer.bowtieIndexerTool() bti.run(input_files, metadata, output_files) assert os.path.isfile(output_files["index"]) is True assert os.path.getsize(output_files["index"]) > 0
def test_bwa_indexer_idear(): """ Test case to ensure that the BWA indexer works """ resource_path = os.path.join(os.path.dirname(__file__), "data/") genome_fa = resource_path + "idear.Human.GCA_000001405.22.fasta" input_files = {"genome": genome_fa} output_files = {"index": genome_fa + ".bwa.tar.gz"} metadata = { "genome": Metadata("Assembly", "fasta", genome_fa, None, {'assembly': 'test'}), } print(input_files, output_files) bwa_it = bwaIndexerTool() bwa_it.run(input_files, metadata, output_files) assert os.path.isfile( resource_path + "idear.Human.GCA_000001405.22.fasta.bwa.tar.gz") is True assert os.path.getsize(resource_path + "idear.Human.GCA_000001405.22.fasta.bwa.tar.gz") > 0
def test_rmap_tool(): """ Function to test generation of .rmap input files from CHiCAGO """ path = os.path.join(os.path.dirname(__file__), "data/") configuration = {"chic_RE_name": "HindIII", "chic_RE_sequence": "A|AGCTT"} input_files = { "genome_fa": path + "test_baitmap/chr21_hg19.fa", } metadata = { "genome_fa": Metadata("txt", "fasta", path + "test_baitmap/chr21_hg19.fa", None, 9606, ""), } output_files = { "RMAP": path + "test_run_chicago/test.rmap", "Rtree_file_dat": path + "test_rmap/rtree_file.dat", "Rtree_file_idx": path + "test_rmap/rtree_file.idx", "chr_handler": path + "test_baitmap/chr_handler.txt" } rmap_handle = makeRmapFile(configuration) rmap_handle.run(input_files, metadata, output_files) assert os.path.getsize(output_files["Rtree_file_dat"]) > 0 assert os.path.getsize(output_files["Rtree_file_idx"]) > 0
def test_biobambam_chipseq(): """ Test case to ensure that BioBamBam works """ resource_path = os.path.join(os.path.dirname(__file__), "data/") input_files = {"input": resource_path + "macs2.Human.DRR000150.22_aln.bam"} output_files = { "output": resource_path + "macs2.Human.DRR000150.22_aln_filtered.bam" } metadata = { "input": Metadata("data_chipseq", "fastq", [], None, {'assembly': 'test'}), } bbb = biobambam_filter.biobambam() bbb.run(input_files, metadata, output_files) assert os.path.isfile(resource_path + "macs2.Human.DRR000150.22_aln_filtered.bam") is True assert os.path.getsize(resource_path + "macs2.Human.DRR000150.22_aln_filtered.bam") > 0
def test_biobambam_idamidseq(): """ Test case to ensure that BioBamBam works """ resource_path = os.path.join(os.path.dirname(__file__), "data/") bam_files = [ resource_path + "idear.Human.SRR3714775.bam", resource_path + "idear.Human.SRR3714776.bam", resource_path + "idear.Human.SRR3714777.bam", resource_path + "idear.Human.SRR3714778.bam" ] for bam_file in bam_files: input_files = {"input": bam_file} output_files = {"output": bam_file.replace(".bam", "_filtered.bam")} metadata = { "input": Metadata("data_damid_seq", "bam", [], None, {'assembly': 'test'}), } bbb = biobambam_filter.biobambam() bbb.run(input_files, metadata, output_files) assert os.path.isfile(bam_file.replace(".bam", "_filtered.bam")) is True assert os.path.getsize(bam_file.replace(".bam", "_filtered.bam")) > 0
def run(self, input_files, metadata, output_files): """ Function that runs and pass the parameters to bam2chicago Parameters ---------- input_files : dict hicup_outdir_tar : str rmapFile : str baitmapFile : str metadata : dict Returns ------- output_files : list List of locations for the output files. output_metadata : list List of matching metadata dict objects """ if os.path.isdir(os.path.split(output_files["chinput"])[0]) is False: logger.info("creating output directory") os.mkdir(os.path.split(output_files["chinput"])[0]) folder_name = os.path.split(input_files["hicup_outdir_tar"])[0] + "/"+\ "".join(os.path.split(input_files["hicup_outdir_tar"])[1].split(".")[:-1]) tar = tarfile.open(input_files["hicup_outdir_tar"]) tar.extractall( path="".join(os.path.split(input_files["hicup_outdir_tar"])[0])) tar.close() bam_file = "".join([ file_hdl for file_hdl in os.listdir(folder_name) if file_hdl.endswith(".bam") ]) path_bam = folder_name + "/" + bam_file results = self.bam2chicago(path_bam, input_files["RMAP"], input_files["BAITMAP"], output_files["chinput"]) #results = compss_wait_on(results) output_metadata = { "chinput": Metadata(data_type="CHiC_data", file_type="tar", file_path=output_files["chinput"], sources=[ metadata["RMAP"].file_path, metadata["BAITMAP"].file_path, metadata["hicup_outdir_tar"].file_path ], taxon_id=metadata["hicup_outdir_tar"].taxon_id, meta_data={"tool": "bam2chicago_tool"}) } return output_files, output_metadata
def test_bs_seeker_filter_02(): """ Test that it is possible to call the BSseeker filter """ resource_path = os.path.join(os.path.dirname(__file__), "data/") home = os.path.expanduser('~') input_files = {"fastq": resource_path + "bsSeeker.Mouse.SRR892982_2.fastq"} output_files = { "fastq_filtered": resource_path + "bsSeeker.Mouse.SRR892982_2.filtered.fastq" } metadata = { "fastq": Metadata("data_wgbs", "fastq", input_files["fastq"], None, {'assembly': 'test'}) } config_param = { "aligner": "bowtie2", "aligner_path": home + "/lib/bowtie2-2.3.4-linux-x86_64", "bss_path": home + "/lib/BSseeker2" } bsi = bs_seeker_filter.filterReadsTool(config_param) bsi.run(input_files, metadata, output_files) assert os.path.isfile(output_files["fastq_filtered"]) is True assert os.path.getsize(output_files["fastq_filtered"]) > 0
def test_testTool(): """ Test case to ensure that the testTool works. .. code-block:: none pytest tests/test_tool.py """ resource_path = os.path.join(os.path.dirname(__file__), "data/") input_files = {"input": resource_path + "test_input.txt"} output_files = {"output": resource_path + "test_output.txt"} metadata = { "input": Metadata("text", "txt", input_files["input"], None, {"assembly": "test"}) } tt_handle = testTool() tt_handle.run(input_files, metadata, output_files) assert os.path.isfile(output_files["output"]) is True assert os.path.getsize(output_files["output"]) > 0
def test_bwa_aligner_aln(): """ Function to test BWA Aligner """ resource_path = os.path.join(os.path.dirname(__file__), "data/") genome_fa = resource_path + "macs2.Human.GCA_000001405.22.fasta" fastq_file = resource_path + "macs2.Human.DRR000150.22.fastq" input_files = { "genome": genome_fa, "index": genome_fa + ".bwa.tar.gz", "loc": fastq_file } output_files = { "output": fastq_file.replace(".fastq", "_aln.bam") } metadata = { "genome": Metadata( "Assembly", "fasta", genome_fa, None, {"assembly": "test"}), "index": Metadata( "index_bwa", "", [genome_fa], { "assembly": "test", "tool": "bwa_indexer" } ), "loc": Metadata( "data_chip_seq", "fastq", fastq_file, None, {"assembly": "test"} ) } bwa_t = bwaAlignerTool() bwa_t.run(input_files, metadata, output_files) print(__file__) assert os.path.isfile(resource_path + "macs2.Human.DRR000150.22_aln.bam") is True assert os.path.getsize(resource_path + "macs2.Human.DRR000150.22_aln.bam") > 0 try: os.remove(resource_path + "macs2.Human.DRR000150.22_aln.bam") except OSError, ose: print("Error: %s - %s." % (ose.filename, ose.strerror))
def test_bs_seeker_aligner(): """ Test to ensure bs-Seeker aligner works """ resource_path = os.path.join(os.path.dirname(__file__), "data/") home = os.path.expanduser('~') input_files = { "genome": resource_path + "bsSeeker.Mouse.GRCm38.fasta", "index": resource_path + "bsSeeker.Mouse.GRCm38.fasta.bt2.tar.gz", "fastq1": resource_path + "bsSeeker.Mouse.SRR892982_1.filtered.fastq", "fastq2": resource_path + "bsSeeker.Mouse.SRR892982_2.filtered.fastq", } output_files = { "bam": resource_path + "bsSeeker.Mouse.SRR892982_1.filtered.bam", "bai": resource_path + "bsSeeker.Mouse.SRR892982_1.filtered.bai" } metadata = { "genome": Metadata("Assembly", "fasta", input_files["genome"], None, {'assembly': 'test'}), "index": Metadata("index_bowtie", "index", input_files["genome"], None, {'assembly': 'test'}), "fastq1": Metadata("data_wgbs", "fastq", input_files["fastq1"], None, {'assembly': 'test'}), "fastq2": Metadata("data_wgbs", "fastq", input_files["fastq2"], None, {'assembly': 'test'}) } config_param = { "aligner": "bowtie2", "aligner_path": home + "/lib/bowtie2-2.3.4-linux-x86_64", "bss_path": home + "/lib/BSseeker2" } bsa = bs_seeker_aligner.bssAlignerTool(config_param) bsa.run(input_files, metadata, output_files) assert os.path.isfile(output_files["bam"]) is True assert os.path.getsize(output_files["bam"]) > 0 assert os.path.isfile(output_files["bai"]) is True assert os.path.getsize(output_files["bai"]) > 0
def test_bam2chicago(): """ Function to test bam2chicago.py """ path = os.path.join(os.path.dirname(__file__), "data/") input_files = { "RMAP": path + "test_run_chicago/test.rmap", "BAITMAP": path + "test_run_chicago/test.baitmap", "hicup_outdir_tar": path + "test_hicup/output.tar", "fastq1": path + "/test_truncater/SRR3535023_1_chr21_new.fastq", "fastq2": path + "/test_truncater/SRR3535023_2_chr21_new.fastq" } output_files = { "chinput": path + "test_bam2chicago_tool/output_chinput.chinput", "hicup_outdir_tar": path + "test_hicup/output.tar" } metadata = { "RMAP": Metadata("TXT", ".rmap", path + "/h19_chr20and21_chr.rmap", None, {}, 9606), "BAITMAP": Metadata("TXT", ".baitmap", path + "/h19_chr20and21.baitmap_4col_chr.txt", None, {}, 9606), "hicup_outdir_tar": Metadata( "TAR", "CHiC_data", input_files["hicup_outdir_tar"], { "fastq1": "SRR3535023_1.fastq", "fastq2": "SRR3535023_2.fastq", "genome": "human_hg19" }, 9606), "genome_fa": Metadata("TXT", "RG", path + "/h19_chr20and21.baitmap_4col_chr.txt", None, {}, 9606), } configuration = {"aligner": "tadbit", "execution": path + "test_baitmap"} bam2chicago_handle = bam2chicagoTool(configuration) bam2chicago_handle.run(input_files, metadata, output_files) assert os.path.isfile(output_files["chinput"]) is True assert os.path.getsize(output_files["chinput"]) > 0
def run(self, input_files, input_metadata, output_files): """ The main function to run iNPS for peak calling over a given BAM file and matching background BAM file. Parameters ---------- input_files : list List of input bam file locations where 0 is the bam data file and 1 is the matching background bam file metadata : dict Returns ------- output_files : list List of locations for the output files. output_metadata : list List of matching metadata dict objects """ command_params = [] if "inps_sp_param" in self.configuration: command_params = command_params + [ "--s_p", str(self.configuration["inps_sp_param"]) ] if "inps_pe_max_param" in self.configuration: command_params = command_params + [ "--pe_max", str(self.configuration["inps_pe_max_param"]) ] if "inps_pe_min_param" in self.configuration: command_params = command_params + [ "--pe_min", str(self.configuration["inps_pe_min_param"]) ] results = self.inps_peak_calling(input_files["bam"], output_files["bed"], command_params) results = compss_wait_on(results) output_metadata = { "bed": Metadata(data_type=input_metadata['bam'].data_type, file_type="BED", file_path=output_files["bed"], sources=[input_metadata["bam"].file_path], taxon_id=input_metadata["bam"].taxon_id, meta_data={ "assembly": input_metadata["bam"].meta_data["assembly"], "tool": "inps" }) } return (output_files, output_metadata)
def run(self, input_files, metadata, output_files): """ Function that runs and pass the parameters for all the functions Parameters ---------- input_files: dict metadata: dict output_files: dict """ output_dir = os.path.split(output_files["hicup_outdir_tar"])[0] if os.path.isdir(output_dir) is False: os.mkdir(output_dir) if isinstance(self.configuration["hicup_renzyme"], list) is True: re_enzyme = ":".join(self.configuration["hicup_renzyme"]) else: re_enzyme = self.configuration["hicup_renzyme"] if "renzyme_name2" in self.configuration: genome_d = self.digest_genome(self.configuration["genome_name"], re_enzyme, input_files["genome_fa"], self.configuration["renzyme_name2"]) else: genome_d = self.digest_genome(self.configuration["genome_name"], re_enzyme, input_files["genome_fa"], "enzyme2") parameters_hicup = self.get_hicup_params(self.configuration) #if os.path.isdir(self.configuration["hicup_outdir"]) is False: # os.mkdir(self.configuration["hicup_outdir"]) variable = self.hicup_alig_filt( # pylint: disable=too-many-locals,too-many-arguments parameters_hicup, genome_d, input_files["bowtie_gen_idx"], input_files["genome_fa"], input_files["fastq1"], input_files["fastq2"], output_files["hicup_outdir_tar"]) os.remove(genome_d) #variable = compss_wait_on(variable) output_metadata = { "hicup_outdir_tar": Metadata(data_type="data_CHiC", file_type="TAR", file_path=output_files["hicup_outdir_tar"], sources=[ metadata["genome_fa"].file_path, metadata["fastq1"].file_path, metadata["fastq1"].file_path ], taxon_id=metadata["genome_fa"].taxon_id, meta_data={"tool": "hicup_tool"}) } return output_files, output_metadata
def test_process_chicago(): """ Test the chicago Pipeline Running the chicago pipeline with the test data from the command line """ path = os.path.join(os.path.dirname(__file__), "data/") input_files = { "chinput": path + "test_run_chicago/data_chicago/GM_rep1.chinput", "setting_file": path + "test_run_chicago/data_chicago/sGM12878.settingsFile", "rmap_chicago": path + "test_run_chicago/data_chicago/h19_chr20and21.rmap", "baitmap_chicago": path + "test_run_chicago/data_chicago/h19_chr20and21.baitmap", "nbpb_chicago": path + "test_run_chicago/data_chicago/h19_chr20and21.nbpb", "poe_chicago": path + "test_run_chicago/data_chicago/h19_chr20and21.poe", } output_files = { "output": path + "test_run_chicago/data_chicago/out_run_chicago.tar", } metadata = { "chinput": Metadata("data_chicago", "chinput", [], None, None, 9606) } config = { "chicago_design_dir": path + "/test_run_chicago/data_chicago", "chicago_print_memory": "None", "chicago_out_prefix": "output_test", "chicago_cutoff": "5", "chicago_export_format": "washU_text", "chicago_export_order": "None", "chicago_rda": "None", "chicago_save_df_only": "None", "chicago_examples_prox_dist": "1e6", "chicago_examples_full_range": "None", "chicago_en_feat_files": "None", "chicago_en_min_dist": "0", "chicago_en_max_dist": "1e6", "chicago_en_full_cis_range": "None", "chicago_en_sample_no": "100", "chicago_en_trans": "None", "chicago_features_only": "None" } chicago_handle = process_run_chicago(config) chicago_handle.run(input_files, metadata, output_files) assert os.path.isfile(output_files["output"]) is True assert os.path.getsize(output_files["output"]) > 0
def test_bwa_aligner_mnaseseq(): """ Function to test BWA Aligner for MNase seq data """ resource_path = os.path.join(os.path.dirname(__file__), "data/") genome_fa = resource_path + "inps.Mouse.GRCm38.fasta" fastq_file = resource_path + "inps.Mouse.DRR000386.fastq" input_files = { "genome": genome_fa, "index": genome_fa + ".bwa.tar.gz", "loc": fastq_file } output_files = { "output": fastq_file.replace(".fastq", ".bam") } metadata = { "genome": Metadata( "Assembly", "fasta", genome_fa, None, {"assembly": "test"}), "index": Metadata( "index_bwa", "", [genome_fa], { "assembly": "test", "tool": "bwa_indexer" } ), "loc": Metadata( "data_chip_seq", "fastq", fastq_file, None, {"assembly": "test"} ) } bwa_t = bwaAlignerTool() bwa_t.run(input_files, metadata, output_files) print(__file__) assert os.path.isfile(resource_path + "inps.Mouse.DRR000386.bam") is True assert os.path.getsize(resource_path + "inps.Mouse.DRR000386.bam") > 0