import unittest
import re
import os
import subprocess
from unittest.mock import patch
from tests.unit.unittest_helpers import relative_file_path, file_contents
from idseq_dag.steps.generate_phylo_tree import PipelineStepGeneratePhyloTree
import idseq_dag.util.command as command
import idseq_dag.util.command_patterns as command_patterns

ASSEMBLY_SUMMARY_FILE = relative_file_path(
    __file__, "../../../examples/fixtures/assembly_summary.txt")
EXAMPLE_VCF_FILE = relative_file_path(
    __file__, "../../../examples/fixtures/example.vcf")
TMP_VCF_OUT_FILE = "/tmp/tmp_generatephylotree_testcase.vcf"


class GeneratePyhloTreeTestCase(unittest.TestCase):
    '''Tests for idseq_dag/steps/generate_pyhlo_tree.py module'''
    def test_get_taxid_genomes(self):
        results = PipelineStepGeneratePhyloTree.get_taxid_genomes(
            ASSEMBLY_SUMMARY_FILE, 10298, 2)

        self.assertEqual(results, [
            'GCA_000859985.2\t10298\t10298\tHuman alphaherpesvirus 1\tftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/859/985/GCA_000859985.2_ViralProj15217',
            'GCA_003052245.1\t10298\t10298\tHuman alphaherpesvirus 1\tftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/052/245/GCA_003052245.1_ASM305224v1'
        ])

    @staticmethod
    def _ncbi_output_stub(cmd):
        if isinstance(cmd, command_patterns.CommandPattern):
Example #2
0
import unittest
from unittest.mock import patch, ANY
import os
import subprocess
import idseq_dag.util.command as command
import idseq_dag.util.command_patterns as command_patterns
from tests.unit.unittest_helpers import file_contents, relative_file_path, MATCH_RE

TMP_FILE_NAME = 'command_test_tmp_file.tmp'
TMP_FILE = f'/tmp/{TMP_FILE_NAME}'
TMP_FOLDER = '/tmp/command_test_tmp_folder'
TMP_PARENT_FOLDER = TMP_FOLDER
TMP_SRC_FOLDER = f'{TMP_PARENT_FOLDER}/command_test_tmp_src'
TMP_DEST_FOLDER = f'{TMP_PARENT_FOLDER}/command_test_tmp_dst'
TMP_SOURCE_FILE_PATH = os.path.join(TMP_SRC_FOLDER, TMP_FILE_NAME)
TESTFILE_ABC_TXT = relative_file_path(__file__, "dummy testfile abc.txt")
TESTFILE_BCD_TXT = relative_file_path(__file__, "dummy testfile bcd.txt")
TESTSCRIPT_HAPPY_PY = relative_file_path(__file__, "dummy testscript happy.py")


class CommandModuleMethodsExecuteCommands(unittest.TestCase):
    '''Tests for idseq_dag/util/command.py module methods to execute commands'''
    @patch('idseq_dag.util.command.log.write')
    def test_execute_legacy_format(self, _mock_log):
        '''WHEN command is a string, THEN execute and log a warning'''
        result = command.execute_with_output("echo 123")

        self.assertEqual(result, "123\n")
        _mock_log.assert_any_call(
            warning=True,
            message=MATCH_RE(".*legacy.*Use.*command_patterns"),
import os
import time
import unittest

from tests.unit.unittest_helpers import relative_file_path

from idseq_dag.steps.generate_lz4 import PipelineStepGenerateLZ4

INPUT_FILE = relative_file_path(__file__, 'doesnotexist')

class TestPipelineStepGenerateLZ4(unittest.TestCase):

    def setUp(self):
        self.step = PipelineStepGenerateLZ4(
            name='test_generate_lz4',
            input_files=[[INPUT_FILE]],
            output_files=[],
            output_dir_local='',
            ref_dir_local='',
            output_dir_s3='',
            additional_files={},
            additional_attributes={},
        )

    def test_get_command(self):
        command = self.step.get_command(INPUT_FILE)
        self.assertEqual('lz4', command.cmd)
        self.assertSequenceEqual(
            ['-9', '-f', INPUT_FILE, INPUT_FILE + '.lz4'],
            command.args
        )
    def test_call_hits_m8(self):
        # This tests the logic based on a small sample. For development and performance benchmarking you can use real m8 outputs
        #   as well as the real taxid-lineages.db and accession2taxid.db (from s3://idseq-public-references/taxonomy or s3://idseq-public-references/alignment_data).

        # Generated by taking a random sample from a gsnap output on benchmark sample 5
        #   The random output took a cluster of five rows as it's smallest unit to make a more
        #   realistic file as similar rows are clustured.
        input_m8 = relative_file_path(__file__, 'm8-test/sample.m8')

        lineages = relative_file_path(__file__, 'm8-test/taxid-lineages.db')
        accession2taxid = relative_file_path(__file__, 'm8-test/accession2taxid.db')

        lineages_db = shelve.open(lineages.replace('.db', ''), 'c')
        accession2taxid_db = shelve.open(accession2taxid.replace('.db', ''), 'c')

        # Generated by running this test the full versions of these dicts and printing which items were needed
        accession2taxid_db["MK468611"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MK468612"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MK468613"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MK468615"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MK468617"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MH124576"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MH124577"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MH124578"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MH124579"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MH124580"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MK286896"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MK370031"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MK370032"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MK370033"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MK468608"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["CP015500"] = "573"
        lineages_db["573"] = ('573', '570', '543')
        accession2taxid_db["CP015822"] = "573"
        lineages_db["573"] = ('573', '570', '543')
        accession2taxid_db["CP015990"] = "573"
        lineages_db["573"] = ('573', '570', '543')
        accession2taxid_db["CP016813"] = "573"
        lineages_db["573"] = ('573', '570', '543')
        accession2taxid_db["CP016814"] = "573"
        lineages_db["573"] = ('573', '570', '543')
        accession2taxid_db["CP018140"] = "573"
        lineages_db["573"] = ('573', '570', '543')
        accession2taxid_db["CP018337"] = "573"
        lineages_db["573"] = ('573', '570', '543')
        accession2taxid_db["CP018352"] = "573"
        lineages_db["573"] = ('573', '570', '543')
        accession2taxid_db["CP018356"] = "573"
        lineages_db["573"] = ('573', '570', '543')
        accession2taxid_db["CP018364"] = "573"
        lineages_db["573"] = ('573', '570', '543')
        accession2taxid_db["MK468618"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MK468619"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MK468620"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MK468621"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MK468622"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MF740874"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MF773566"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MF774614"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MF774615"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["MF774616"] = "37124"
        lineages_db["37124"] = ('37124', '11019', '11018')
        accession2taxid_db["CP010295"] = "1280"
        lineages_db["1280"] = ('1280', '1279', '90964')
        accession2taxid_db["CP010296"] = "1280"
        lineages_db["1280"] = ('1280', '1279', '90964')
        accession2taxid_db["CP010297"] = "1280"
        lineages_db["1280"] = ('1280', '1279', '90964')
        accession2taxid_db["CP010298"] = "1280"
        lineages_db["1280"] = ('1280', '1279', '90964')
        accession2taxid_db["CP010299"] = "1280"
        lineages_db["1280"] = ('1280', '1279', '90964')
        accession2taxid_db["NC_038358"] = "2065052"
        lineages_db["2065052"] = ('2065052', '687333', '687329')
        accession2taxid_db["CP017682"] = "1280"
        lineages_db["1280"] = ('1280', '1279', '90964')
        accession2taxid_db["CP017804"] = "1280"
        lineages_db["1280"] = ('1280', '1279', '90964')
        accession2taxid_db["AF325855"] = "1280"
        lineages_db["1280"] = ('1280', '1279', '90964')
        accession2taxid_db["AM990992"] = "523796"
        lineages_db["523796"] = ('1280', '1279', '90964')
        accession2taxid_db["AP014652"] = "46170"
        lineages_db["46170"] = ('1280', '1279', '90964')

        lineages_db.close()
        accession2taxid_db.close()

        output_m8 = relative_file_path(__file__, 'm8-test/test.m8')
        output_summary = relative_file_path(__file__, 'm8-test/test.hitsummary.tab')

        call_hits_m8(
            input_m8,
            lineages,
            accession2taxid,
            output_m8,
            output_summary,
            36,
        )

        in_size = os.stat(input_m8).st_size
        out_size = os.stat(output_m8).st_size

        # File should shrink due to deduping
        self.assertLessEqual(out_size, in_size)

        # Generated by running this test then manually inspected
        sample_deduped_m8 = relative_file_path(__file__, 'm8-test/sample.deduped.m8')
        sample_summary = relative_file_path(__file__, 'm8-test/sample.hitsummary.tab')
        
        self.assertEqual(file_contents(output_m8), file_contents(sample_deduped_m8))
        self.assertEqual(file_contents(output_summary), file_contents(sample_summary))

        os.remove(output_m8)
        os.remove(output_summary)
        os.remove(lineages)
        os.remove(accession2taxid)