def test_converter():
    infile = bioconvert_data("test_fastq2fasta_v1.fastq")
    with TempFile(suffix=".fasta") as tempfile1, TempFile(
            suffix=".fasta") as tempfile2:
        cmd = "bioconvert fastq2fasta {} {} --force".format(
            infile, tempfile1.name)
        p = subprocess.Popen(cmd, shell=True)
        assert p.wait() == 0
        import sys
        sys.argv = [
            "bioconvert", "fastq2fasta", infile, tempfile2.name, "--force"
        ]
        converter.main()
        assert md5(tempfile1.name) == md5(tempfile2.name)
def test_verbose():
    infile = bioconvert_data("test_fastq2fasta_v1.fastq")
    with TempFile(suffix=".tt") as tempfile:
        import sys
        sys.argv = [
            "bioconvert", "-v", "CRITICAL", "fastq2fasta", infile,
            tempfile.name, "--force"
        ]
        converter.main()
        sys.argv = [
            "bioconvert", "--verbosity", "CRITICAL", "fastq2fasta", infile,
            tempfile.name, "--force"
        ]
        converter.main()
Beispiel #3
0
def test_conv(method):
    infile = bioconvert_data("test_measles.fa")

    with TempFile(suffix=".gbk") as tempfile:
        converter = FASTA2GENBANK(infile, tempfile.name)
        converter(method=method)

        reader_fasta = Fasta(infile)
        reader_gbk = Genbank(tempfile.name)

        for fasta_entry, gbk_entry in zip(reader_fasta.read(),
                                          reader_gbk.read()):
            assert fasta_entry["id"] == gbk_entry["LOCUS"]["id"]
            assert fasta_entry["comment"] in gbk_entry["DEFINITION"]
            assert fasta_entry["value"] == gbk_entry["ORIGIN"].upper()
Beispiel #4
0
def test_in_gz(method):
    for sample_name in ["test_fastq2fasta_v1",
                        "sample_v2", "sample_v3",  
                        "sample_v4"]:
        infile = bioconvert_data("{}.fastq.gz".format(sample_name))

        expected_outfile = bioconvert_data("{}.fasta".format(sample_name))
        with TempFile(suffix=".fasta") as expected_unwrapped:
            Fastq2Fasta.unwrap_fasta(
                expected_outfile, expected_unwrapped.name, strip_comment=True)
            md5out = md5(expected_unwrapped.name)

        # One temporary file for the fasta created using the method
        # and one for an unwrapped version.
        # Some methods may output multi-line fasta, so we need to
        # compare md5 sums of unwrapped versions.
        with TempFile(suffix=".fasta") as outfile, \
                TempFile(suffix=".fasta") as unwrapped:
            convert = Fastq2Fasta(infile, outfile.name)
            convert(method=method)
            Fastq2Fasta.unwrap_fasta(
                outfile.name, unwrapped.name, strip_comment=True)
            assert md5(unwrapped.name) == md5out, \
                "{} failed for {}".format(method, sample_name)
Beispiel #5
0
def test_conv():
    infile = bioconvert_data("test_measles.sorted.bam")
    with TempFile(suffix=".bed") as tempfile:
        convert = BAM2BED(infile, tempfile.name)
        convert()

        # Check that the output is correct with a checksum
        # Note that we cannot test the md5 on a gzip file but only
        # on the original data. This check sum was computed
        # fro the unzipped version of biokit/data/converters/measles.bed
        assert md5(tempfile.name) == "84702e19ba3a27900f271990e0cc72a0"

        convert = BAM2BED(infile, tempfile.name)
        convert(method="bedtools")
        assert md5(tempfile.name) == "84702e19ba3a27900f271990e0cc72a0"
Beispiel #6
0
def test_bam2tsv():
    #your code here
    # you will need data for instance "mydata.fastq and mydata.fasta".
    # Put it in bioconvert/bioconvert/data
    # you can then use ::
    infile = bioconvert_data("test_measles.sorted.bam")
    #expected_outfile = bioconvert_data("test_measles.tsv")
    with TempFile(suffix=".tsv") as tempfile:
        convert = BAM2TSV(infile, tempfile.name)
        # Check that the output is correct with a checksum
        #assert md5(tempfile.name) == md5(expected_outfile)
        convert(method="pysam")
        assert md5(tempfile.name) == "4c5f3336be8a03c95a6c56be28581fb7"
        convert(method="samtools")
        assert md5(tempfile.name) == "4c5f3336be8a03c95a6c56be28581fb7"
def test_benchmark():
    input_file = bioconvert_data("test_measles.sorted.bam")
    with TempFile(suffix=".bed") as fout:
        conv = BAM2BED(input_file, fout.name)
        bench = Benchmark(conv)
        bench.run_methods()
        bench.plot()

        try:
            Benchmark("BAM2BED")
            assert False
        except NotImplementedError:
            assert True
        except:
            assert False
def test_conv(method):
    infile = bioconvert_data("JB409847.gbk", where)

    with TempFile(suffix=".fasta") as tempfile:
        converter = GENBANK2FASTA(infile, tempfile.name)
        converter(method=method)

        # Load both files
        reader_fasta = Fasta(tempfile.name)
        reader_gen = Genbank(infile)

        for entry_fa, entry_gb in zip(reader_fasta.read(), reader_gen.read()):
            assert entry_fa["id"].startswith(entry_gb["LOCUS"]["id"])
            assert entry_fa["comment"] in entry_gb["DEFINITION"]
            assert entry_fa["value"].lower() == entry_gb["ORIGIN"]
def test_indirect_conversion():
    import sys
    infile = bioconvert_data("ERR3295124.fastq")
    #infile = bioconvert_data("fastqutils_1.fastq")
    with TempFile(suffix=".clustal") as tempfile:
        sys.argv = [
            "bioconvert", "fastq2clustal", infile, tempfile.name, "--force",
            "--allow-indirect-conversion", "-v", "DEBUG"
        ]
        converter.main()
        sys.argv = [
            "bioconvert", "fastq2clustal", infile, tempfile.name, "--force",
            "-a", "-v", "DEBUG"
        ]
        converter.main()
def test_indirect_conversion_without_argument():
    import sys
    infile = bioconvert_data("fastqutils_1.fastq")
    with TempFile(suffix=".clustal") as tempfile:
        sys.argv = [
            "bioconvert", "fastq2clustal", infile, tempfile.name, "--force"
        ]
        # For now we want the user to explicitly indicate that (s)he agrees with an indirect conversion
        try:
            converter.main()
            assert False
        except SystemExit as e:
            assert e.code == 2
        except:
            assert False
Beispiel #11
0
def test_sra2fastq_gz_single(method):
    infile = "SRR6477205"
    outfile = bioconvert_data("SRR6477205.fastq")

    with TempFile(suffix=".fastq.gz") as tempfile:
        converter = SRA2FASTQ(infile, tempfile.name, True)
        converter(method=method)

        outbasename = os.path.splitext(tempfile.name)[0]
        with gzip.open(tempfile.name,
                       'rb') as f_in, open(outbasename + ".fastq",
                                           'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)

        # Check that the output is correct with a checksum
        assert md5(outbasename + ".fastq") == md5(outfile)
Beispiel #12
0
def test_conv(method):
    infile = bioconvert_data("JB409847.embl")

    with TempFile(suffix=".gbk") as tempfile:
        converter = EMBL2GENBANK(infile, tempfile.name)
        converter(method=method)

        # Check that the output is correct with a checksum
        if method == "biopython":
            assert md5(tempfile.name) == "cdd34902975a68e58ad5f105b44ff495"
        elif method == "squizz":
            pass
            # TODO
            # embl input is not understood by squizz if generated by biopython
            #     assert md5(tempfile.name) == "????"
        else:
            raise NotImplementedError
Beispiel #13
0
    def install_tool(self, executable):
        """Install the given tool, using the script:
        bioconvert/install_script/install_executable.sh
        if the executable is not already present

        :param executable to install
        :return: nothing
        """
        import bioconvert
        from bioconvert import bioconvert_data

        if shutil.which(executable) is None:
            logger.info("Installing tool : " + executable)
            bioconvert_path = bioconvert.__path__[0]
            script = bioconvert_data('install_' + executable + '.sh',
                                     where="../misc")
            subprocess.call(['sh', script])
Beispiel #14
0
def test_bam2tsv():
    #your code here
    # you will need data for instance "mydata.fastq and mydata.fasta".
    # Put it in bioconvert/bioconvert/data
    # you can then use ::
    infile = bioconvert_data("test_measles.sorted.bam")
    #expected_outfile = bioconvert_data("test_measles.tsv")
    with TempFile(suffix=".tsv") as tempfile:
        convert = BAM2TSV(infile, tempfile.name)
        convert(method="pysam")
        # impossible to track down why this test fails on python3.6
        # looks like pytest changes the behaviour of the BAM2TSV class
        # when saving the file. Only the header is saved and the content
        # of the BAM file is not...su
        if sys.version_info[0] == 3 and sys.version_info[1] == 6:
            pass
        else:
            assert md5(tempfile.name) == "4c5f3336be8a03c95a6c56be28581fb7"
        convert(method="samtools")
        assert md5(tempfile.name) == "4c5f3336be8a03c95a6c56be28581fb7"
Beispiel #15
0
    def install_tool(self, executable):
        """Install the given tool, using the script:
        bioconvert/install_script/install_executable.sh
        if the executable is not already present

        :param executable: executable to install
        :return: nothing

        """
        # imported but not unused (when we don't have bioconvert_path)
        # import bioconvert
        from bioconvert import bioconvert_data

        if shutil.which(executable) is None:
            logger.info("Installing tool : " + executable)
            # Assigned but never used, says flake8
            # bioconvert_path = bioconvert.__path__[0]
            script = bioconvert_data(
                'install_' + executable + '.sh', where="../misc")
            subprocess.call(['sh', script])
def test_conv():
    infile = bioconvert_data("test_measles.sorted.bam")
    with TempFile(suffix=".fa") as tempfile:
        convert = BAM2Fasta(infile, tempfile.name)
        convert(method="bamtools")

        # Check that the output is correct with a checksum
        # Note that we cannot test the md5 on a gzip file but only
        # on the original data. This check sum was computed
        # fro the unzipped version of biokit/data/converters/measles.bed
        assert md5(tempfile.name) == "ea5511c3c8913626be152609887c8c4d"

        convert = BAM2Fasta(infile, tempfile.name)
        convert(method="samtools")
        # samtools 1.6 / hstlib 1.6 gives different results on travis and
        # locally
        assert md5(tempfile.name) in [
            "0924d2a11b43094680d1a7374aaaa45e",
            "cc9afcef458f3402fbdef1a091e05c39"
        ]
Beispiel #17
0
def test_conv():
    infile = bioconvert_data("test_measles.sorted.bam")
    #outfile = biokit_data("converters/measles.sam")
    with TempFile(suffix=".bam") as tempfile:
        convert = BAM2SAM(infile, tempfile.name)
        convert()

        # Check that the output is correct with a checksum
        # Note that we cannot test the md5 on a gzip file but only
        # on the original data. This check sum was computed
        # fro the unzipped version of biokit/data/converters/measles.bed
        #assert md5(tempfile.name) == md5(outfile)
        # output is a SAM that can be read and must have
        import pysam
        sam = pysam.AlignmentFile(tempfile.name)
        assert sam.count() == 60
        convert(method="pysam")

        convert = BAM2SAM(infile, tempfile.name)
        convert(method="sambamba")
        assert md5(tempfile.name) == "ad83af4d159005a77914c5503bc43802"
Beispiel #18
0
def test_gz2dsrc():
    """
    Test that fastq gz file is converted as expected to a fastq .dsrc file
    """
    from bioconvert import bioconvert_data
    infile = bioconvert_data("test_SP1.fq.dsrc")

    with TempFile(suffix=".fq.gz") as tempfile:
        converter = DSRC2GZ(infile, tempfile.name)
        converter()

        # uncompress the createdfile, and compare uncompressed file
        # to the expected md5. We do not directly compare dsrc or gz files as
        # it is not deterministic
        assert os.path.isfile(tempfile.name)

        cmd = "gunzip -c {} | md5sum -".format(tempfile.name)
        res = subprocess.check_output(cmd, shell=True)
        res = res.split()[0].decode()

        # Check that the output is correct with a checksum
        assert res == "d41d8cd98f00b204e9800998ecf8427e"
def test_nx2phy_biopython(method):
    infile = bioconvert_data(method + ".nexus")
    outfile = bioconvert_data(method + ".phylip")
    with TempFile(suffix=".phylip") as tempfile:
        converter = NEXUS2PHYLIP(infile, tempfile.name)
        converter(method=method)
Beispiel #20
0
def test_converter():

    infile = bioconvert_data("test_measles.sorted.bam")
    with TempFile(suffix=".bed") as tempfile:
        cmd = "bioconvert %s %s --force" % (infile, tempfile.name)
        subprocess.Popen(cmd, shell=True)
Beispiel #21
0
def test_converter1():
    infile = bioconvert_data("test_measles.sorted.bam")
    with TempFile(suffix=".bed") as tempfile:
        import sys
        sys.argv = ["bioconvert", infile, tempfile.name, "--force"]
        converter.main()
def test_load():
    infile = bioconvert_data("GFF3/gff3_example.gff")
    reader_gff3 = Gff3(infile)

    for expected_value, file_value in zip(expected_values, reader_gff3.read()):
        assert expected_value == file_value
Beispiel #23
0
def test_conv(method):
    infile = bioconvert_data("test_bcf2vcf_v1.bcf")
    with TempFile(suffix=".vcf") as tempfile:
        convert = BCF2VCF(infile, tempfile.name)
        convert(method=method)
Beispiel #24
0
def test_xmfa2phy(method):
    infile = bioconvert_data("test_phylip2xmfa.xmfa")
    #outfile = bioconvert_data("test_phylip2xmfa.xmfa")
    with TempFile(suffix=".xmfa") as tempfile:
        converter = XMFA2PHYLIP(infile, tempfile.name)
        converter(method=method)
Beispiel #25
0
def test_convbase():
    infile = bioconvert_data("test_measles.fa")

    with TempFile(suffix=".bed") as outfile:
        Bam2Bed(infile, outfile.name)

    # Wrong name
    try:

        class TEST(ConvBase):
            input_ext = ".fa"
            output_ext = ".fq"

            def __call__(self):
                pass

        assert False
    except:
        assert True

    # add dot
    class in2out(ConvBase):
        input_ext = "in"
        output_ext = "out"

        def __call__(self):
            pass

    # wrong input extension (int)
    try:

        class int2out(ConvBase):
            input_ext = [1]
            output_ext = ".out"

            def __call__(self):
                pass

        assert False
    except:
        assert True

    # add dot  mix case
    class in2out(ConvBase):
        input_ext = ["in", ".in2"]
        output_ext = "out"

        def __call__(self):
            pass

    try:

        class in2out(ConvBase):
            input_ext = 1
            output_ext = 2

            def __call__(self):
                pass

        assert False
    except:
        assert True

    class in2out(ConvBase):
        input_ext = [".fa"]
        output_ext = [".fq"]

        def __call__(self):
            self.execute("ls")

    this = in2out("test.fa", "test.fq")
    assert this.name == "in2out"
    this()
def test_conv(method):
    infile = bioconvert_data("test_fastq2fasta_v1.fastq")

    #expected_outfile = bioconvert_data("test_fastq2qual_v1.qual")
    with TempFile(suffix=".fasta") as fout:
        FASTQ2QUAL(infile, fout.name)
Beispiel #27
0
import os
import bioconvert
from bioconvert import cram2bam
from bioconvert.cram2bam import CRAM2BAM
from bioconvert import bioconvert_data
from easydev import TempFile, md5
import pytest
from mock import patch

reference = bioconvert_data("test_measles.fa")


@patch('bioconvert.cram2bam.input', return_value=reference)
def test_conv(x):
    infile = bioconvert_data("test_measles.bam")
    outfile = bioconvert_data("test_measles.cram")

    with TempFile(suffix=".bam") as tempfile:
        convert = CRAM2BAM(infile, tempfile.name)
        convert(method="samtools", reference=reference)

    with TempFile(suffix=".bam") as tempfile:
        convert = CRAM2BAM(infile, tempfile.name)
        convert(method="samtools")


@patch('bioconvert.cram2bam.input', return_value="not_found")
def test_conv_error(x):
    infile = bioconvert_data("test_measles.cram")
    outfile = bioconvert_data("test_measles.bam")
    with TempFile(suffix=".bam") as tempfile:
def test_conv(method):
    infile = bioconvert_data("GFF2/gff2_example.gff")
    with TempFile(suffix=".tsv") as tempfile:
        convert = GFF22GFF3(infile, tempfile.name)
        convert(method=method)
Converter benchmarking
===========================

Converter have a default method.

Notem however, that several methods may be available.
Moreover, you may have a method that you want to compare
with the implemented one. To do so you will need to 
implement your method first. Then, simply use our benchmarking
framework as follows.

"""
#################################################
#
from bioconvert import Benchmark
from bioconvert import bioconvert_data
from bioconvert.bam2bed import BAM2BED

#####################################################
# Get the convert you wish to benchmark
input_file = bioconvert_data("test_measles.sorted.bam")
conv = BAM2BED(input_file, "test.bed")

#####################################################
# Get the Benchmark instance
bench = Benchmark(conv)
bench.plot()

# You can now see the different methods implemented in this
# converter and which one is the fastest.
Beispiel #30
0
def test_conv():
    infile = bioconvert_data("test_vcf2bcf_v1.vcf")
    with TempFile(suffix=".bcf") as tempfile:
        convert = VCF2BCF(infile, tempfile.name)
        convert(method="bcftools")