def test_converter(): infile = bioconvert_data("test_fastq2fasta_v1.fastq") with TempFile(suffix=".fasta") as tempfile1, TempFile( suffix=".fasta") as tempfile2: cmd = "bioconvert fastq2fasta {} {} --force".format( infile, tempfile1.name) p = subprocess.Popen(cmd, shell=True) assert p.wait() == 0 import sys sys.argv = [ "bioconvert", "fastq2fasta", infile, tempfile2.name, "--force" ] converter.main() assert md5(tempfile1.name) == md5(tempfile2.name)
def test_verbose(): infile = bioconvert_data("test_fastq2fasta_v1.fastq") with TempFile(suffix=".tt") as tempfile: import sys sys.argv = [ "bioconvert", "-v", "CRITICAL", "fastq2fasta", infile, tempfile.name, "--force" ] converter.main() sys.argv = [ "bioconvert", "--verbosity", "CRITICAL", "fastq2fasta", infile, tempfile.name, "--force" ] converter.main()
def test_conv(method): infile = bioconvert_data("test_measles.fa") with TempFile(suffix=".gbk") as tempfile: converter = FASTA2GENBANK(infile, tempfile.name) converter(method=method) reader_fasta = Fasta(infile) reader_gbk = Genbank(tempfile.name) for fasta_entry, gbk_entry in zip(reader_fasta.read(), reader_gbk.read()): assert fasta_entry["id"] == gbk_entry["LOCUS"]["id"] assert fasta_entry["comment"] in gbk_entry["DEFINITION"] assert fasta_entry["value"] == gbk_entry["ORIGIN"].upper()
def test_in_gz(method): for sample_name in ["test_fastq2fasta_v1", "sample_v2", "sample_v3", "sample_v4"]: infile = bioconvert_data("{}.fastq.gz".format(sample_name)) expected_outfile = bioconvert_data("{}.fasta".format(sample_name)) with TempFile(suffix=".fasta") as expected_unwrapped: Fastq2Fasta.unwrap_fasta( expected_outfile, expected_unwrapped.name, strip_comment=True) md5out = md5(expected_unwrapped.name) # One temporary file for the fasta created using the method # and one for an unwrapped version. # Some methods may output multi-line fasta, so we need to # compare md5 sums of unwrapped versions. with TempFile(suffix=".fasta") as outfile, \ TempFile(suffix=".fasta") as unwrapped: convert = Fastq2Fasta(infile, outfile.name) convert(method=method) Fastq2Fasta.unwrap_fasta( outfile.name, unwrapped.name, strip_comment=True) assert md5(unwrapped.name) == md5out, \ "{} failed for {}".format(method, sample_name)
def test_conv(): infile = bioconvert_data("test_measles.sorted.bam") with TempFile(suffix=".bed") as tempfile: convert = BAM2BED(infile, tempfile.name) convert() # Check that the output is correct with a checksum # Note that we cannot test the md5 on a gzip file but only # on the original data. This check sum was computed # fro the unzipped version of biokit/data/converters/measles.bed assert md5(tempfile.name) == "84702e19ba3a27900f271990e0cc72a0" convert = BAM2BED(infile, tempfile.name) convert(method="bedtools") assert md5(tempfile.name) == "84702e19ba3a27900f271990e0cc72a0"
def test_bam2tsv(): #your code here # you will need data for instance "mydata.fastq and mydata.fasta". # Put it in bioconvert/bioconvert/data # you can then use :: infile = bioconvert_data("test_measles.sorted.bam") #expected_outfile = bioconvert_data("test_measles.tsv") with TempFile(suffix=".tsv") as tempfile: convert = BAM2TSV(infile, tempfile.name) # Check that the output is correct with a checksum #assert md5(tempfile.name) == md5(expected_outfile) convert(method="pysam") assert md5(tempfile.name) == "4c5f3336be8a03c95a6c56be28581fb7" convert(method="samtools") assert md5(tempfile.name) == "4c5f3336be8a03c95a6c56be28581fb7"
def test_benchmark(): input_file = bioconvert_data("test_measles.sorted.bam") with TempFile(suffix=".bed") as fout: conv = BAM2BED(input_file, fout.name) bench = Benchmark(conv) bench.run_methods() bench.plot() try: Benchmark("BAM2BED") assert False except NotImplementedError: assert True except: assert False
def test_conv(method): infile = bioconvert_data("JB409847.gbk", where) with TempFile(suffix=".fasta") as tempfile: converter = GENBANK2FASTA(infile, tempfile.name) converter(method=method) # Load both files reader_fasta = Fasta(tempfile.name) reader_gen = Genbank(infile) for entry_fa, entry_gb in zip(reader_fasta.read(), reader_gen.read()): assert entry_fa["id"].startswith(entry_gb["LOCUS"]["id"]) assert entry_fa["comment"] in entry_gb["DEFINITION"] assert entry_fa["value"].lower() == entry_gb["ORIGIN"]
def test_indirect_conversion(): import sys infile = bioconvert_data("ERR3295124.fastq") #infile = bioconvert_data("fastqutils_1.fastq") with TempFile(suffix=".clustal") as tempfile: sys.argv = [ "bioconvert", "fastq2clustal", infile, tempfile.name, "--force", "--allow-indirect-conversion", "-v", "DEBUG" ] converter.main() sys.argv = [ "bioconvert", "fastq2clustal", infile, tempfile.name, "--force", "-a", "-v", "DEBUG" ] converter.main()
def test_indirect_conversion_without_argument(): import sys infile = bioconvert_data("fastqutils_1.fastq") with TempFile(suffix=".clustal") as tempfile: sys.argv = [ "bioconvert", "fastq2clustal", infile, tempfile.name, "--force" ] # For now we want the user to explicitly indicate that (s)he agrees with an indirect conversion try: converter.main() assert False except SystemExit as e: assert e.code == 2 except: assert False
def test_sra2fastq_gz_single(method): infile = "SRR6477205" outfile = bioconvert_data("SRR6477205.fastq") with TempFile(suffix=".fastq.gz") as tempfile: converter = SRA2FASTQ(infile, tempfile.name, True) converter(method=method) outbasename = os.path.splitext(tempfile.name)[0] with gzip.open(tempfile.name, 'rb') as f_in, open(outbasename + ".fastq", 'wb') as f_out: shutil.copyfileobj(f_in, f_out) # Check that the output is correct with a checksum assert md5(outbasename + ".fastq") == md5(outfile)
def test_conv(method): infile = bioconvert_data("JB409847.embl") with TempFile(suffix=".gbk") as tempfile: converter = EMBL2GENBANK(infile, tempfile.name) converter(method=method) # Check that the output is correct with a checksum if method == "biopython": assert md5(tempfile.name) == "cdd34902975a68e58ad5f105b44ff495" elif method == "squizz": pass # TODO # embl input is not understood by squizz if generated by biopython # assert md5(tempfile.name) == "????" else: raise NotImplementedError
def install_tool(self, executable): """Install the given tool, using the script: bioconvert/install_script/install_executable.sh if the executable is not already present :param executable to install :return: nothing """ import bioconvert from bioconvert import bioconvert_data if shutil.which(executable) is None: logger.info("Installing tool : " + executable) bioconvert_path = bioconvert.__path__[0] script = bioconvert_data('install_' + executable + '.sh', where="../misc") subprocess.call(['sh', script])
def test_bam2tsv(): #your code here # you will need data for instance "mydata.fastq and mydata.fasta". # Put it in bioconvert/bioconvert/data # you can then use :: infile = bioconvert_data("test_measles.sorted.bam") #expected_outfile = bioconvert_data("test_measles.tsv") with TempFile(suffix=".tsv") as tempfile: convert = BAM2TSV(infile, tempfile.name) convert(method="pysam") # impossible to track down why this test fails on python3.6 # looks like pytest changes the behaviour of the BAM2TSV class # when saving the file. Only the header is saved and the content # of the BAM file is not...su if sys.version_info[0] == 3 and sys.version_info[1] == 6: pass else: assert md5(tempfile.name) == "4c5f3336be8a03c95a6c56be28581fb7" convert(method="samtools") assert md5(tempfile.name) == "4c5f3336be8a03c95a6c56be28581fb7"
def install_tool(self, executable): """Install the given tool, using the script: bioconvert/install_script/install_executable.sh if the executable is not already present :param executable: executable to install :return: nothing """ # imported but not unused (when we don't have bioconvert_path) # import bioconvert from bioconvert import bioconvert_data if shutil.which(executable) is None: logger.info("Installing tool : " + executable) # Assigned but never used, says flake8 # bioconvert_path = bioconvert.__path__[0] script = bioconvert_data( 'install_' + executable + '.sh', where="../misc") subprocess.call(['sh', script])
def test_conv(): infile = bioconvert_data("test_measles.sorted.bam") with TempFile(suffix=".fa") as tempfile: convert = BAM2Fasta(infile, tempfile.name) convert(method="bamtools") # Check that the output is correct with a checksum # Note that we cannot test the md5 on a gzip file but only # on the original data. This check sum was computed # fro the unzipped version of biokit/data/converters/measles.bed assert md5(tempfile.name) == "ea5511c3c8913626be152609887c8c4d" convert = BAM2Fasta(infile, tempfile.name) convert(method="samtools") # samtools 1.6 / hstlib 1.6 gives different results on travis and # locally assert md5(tempfile.name) in [ "0924d2a11b43094680d1a7374aaaa45e", "cc9afcef458f3402fbdef1a091e05c39" ]
def test_conv(): infile = bioconvert_data("test_measles.sorted.bam") #outfile = biokit_data("converters/measles.sam") with TempFile(suffix=".bam") as tempfile: convert = BAM2SAM(infile, tempfile.name) convert() # Check that the output is correct with a checksum # Note that we cannot test the md5 on a gzip file but only # on the original data. This check sum was computed # fro the unzipped version of biokit/data/converters/measles.bed #assert md5(tempfile.name) == md5(outfile) # output is a SAM that can be read and must have import pysam sam = pysam.AlignmentFile(tempfile.name) assert sam.count() == 60 convert(method="pysam") convert = BAM2SAM(infile, tempfile.name) convert(method="sambamba") assert md5(tempfile.name) == "ad83af4d159005a77914c5503bc43802"
def test_gz2dsrc(): """ Test that fastq gz file is converted as expected to a fastq .dsrc file """ from bioconvert import bioconvert_data infile = bioconvert_data("test_SP1.fq.dsrc") with TempFile(suffix=".fq.gz") as tempfile: converter = DSRC2GZ(infile, tempfile.name) converter() # uncompress the createdfile, and compare uncompressed file # to the expected md5. We do not directly compare dsrc or gz files as # it is not deterministic assert os.path.isfile(tempfile.name) cmd = "gunzip -c {} | md5sum -".format(tempfile.name) res = subprocess.check_output(cmd, shell=True) res = res.split()[0].decode() # Check that the output is correct with a checksum assert res == "d41d8cd98f00b204e9800998ecf8427e"
def test_nx2phy_biopython(method): infile = bioconvert_data(method + ".nexus") outfile = bioconvert_data(method + ".phylip") with TempFile(suffix=".phylip") as tempfile: converter = NEXUS2PHYLIP(infile, tempfile.name) converter(method=method)
def test_converter(): infile = bioconvert_data("test_measles.sorted.bam") with TempFile(suffix=".bed") as tempfile: cmd = "bioconvert %s %s --force" % (infile, tempfile.name) subprocess.Popen(cmd, shell=True)
def test_converter1(): infile = bioconvert_data("test_measles.sorted.bam") with TempFile(suffix=".bed") as tempfile: import sys sys.argv = ["bioconvert", infile, tempfile.name, "--force"] converter.main()
def test_load(): infile = bioconvert_data("GFF3/gff3_example.gff") reader_gff3 = Gff3(infile) for expected_value, file_value in zip(expected_values, reader_gff3.read()): assert expected_value == file_value
def test_conv(method): infile = bioconvert_data("test_bcf2vcf_v1.bcf") with TempFile(suffix=".vcf") as tempfile: convert = BCF2VCF(infile, tempfile.name) convert(method=method)
def test_xmfa2phy(method): infile = bioconvert_data("test_phylip2xmfa.xmfa") #outfile = bioconvert_data("test_phylip2xmfa.xmfa") with TempFile(suffix=".xmfa") as tempfile: converter = XMFA2PHYLIP(infile, tempfile.name) converter(method=method)
def test_convbase(): infile = bioconvert_data("test_measles.fa") with TempFile(suffix=".bed") as outfile: Bam2Bed(infile, outfile.name) # Wrong name try: class TEST(ConvBase): input_ext = ".fa" output_ext = ".fq" def __call__(self): pass assert False except: assert True # add dot class in2out(ConvBase): input_ext = "in" output_ext = "out" def __call__(self): pass # wrong input extension (int) try: class int2out(ConvBase): input_ext = [1] output_ext = ".out" def __call__(self): pass assert False except: assert True # add dot mix case class in2out(ConvBase): input_ext = ["in", ".in2"] output_ext = "out" def __call__(self): pass try: class in2out(ConvBase): input_ext = 1 output_ext = 2 def __call__(self): pass assert False except: assert True class in2out(ConvBase): input_ext = [".fa"] output_ext = [".fq"] def __call__(self): self.execute("ls") this = in2out("test.fa", "test.fq") assert this.name == "in2out" this()
def test_conv(method): infile = bioconvert_data("test_fastq2fasta_v1.fastq") #expected_outfile = bioconvert_data("test_fastq2qual_v1.qual") with TempFile(suffix=".fasta") as fout: FASTQ2QUAL(infile, fout.name)
import os import bioconvert from bioconvert import cram2bam from bioconvert.cram2bam import CRAM2BAM from bioconvert import bioconvert_data from easydev import TempFile, md5 import pytest from mock import patch reference = bioconvert_data("test_measles.fa") @patch('bioconvert.cram2bam.input', return_value=reference) def test_conv(x): infile = bioconvert_data("test_measles.bam") outfile = bioconvert_data("test_measles.cram") with TempFile(suffix=".bam") as tempfile: convert = CRAM2BAM(infile, tempfile.name) convert(method="samtools", reference=reference) with TempFile(suffix=".bam") as tempfile: convert = CRAM2BAM(infile, tempfile.name) convert(method="samtools") @patch('bioconvert.cram2bam.input', return_value="not_found") def test_conv_error(x): infile = bioconvert_data("test_measles.cram") outfile = bioconvert_data("test_measles.bam") with TempFile(suffix=".bam") as tempfile:
def test_conv(method): infile = bioconvert_data("GFF2/gff2_example.gff") with TempFile(suffix=".tsv") as tempfile: convert = GFF22GFF3(infile, tempfile.name) convert(method=method)
Converter benchmarking =========================== Converter have a default method. Notem however, that several methods may be available. Moreover, you may have a method that you want to compare with the implemented one. To do so you will need to implement your method first. Then, simply use our benchmarking framework as follows. """ ################################################# # from bioconvert import Benchmark from bioconvert import bioconvert_data from bioconvert.bam2bed import BAM2BED ##################################################### # Get the convert you wish to benchmark input_file = bioconvert_data("test_measles.sorted.bam") conv = BAM2BED(input_file, "test.bed") ##################################################### # Get the Benchmark instance bench = Benchmark(conv) bench.plot() # You can now see the different methods implemented in this # converter and which one is the fastest.
def test_conv(): infile = bioconvert_data("test_vcf2bcf_v1.vcf") with TempFile(suffix=".bcf") as tempfile: convert = VCF2BCF(infile, tempfile.name) convert(method="bcftools")