import unittest import re import os import subprocess from unittest.mock import patch from tests.unit.unittest_helpers import relative_file_path, file_contents from idseq_dag.steps.generate_phylo_tree import PipelineStepGeneratePhyloTree import idseq_dag.util.command as command import idseq_dag.util.command_patterns as command_patterns ASSEMBLY_SUMMARY_FILE = relative_file_path( __file__, "../../../examples/fixtures/assembly_summary.txt") EXAMPLE_VCF_FILE = relative_file_path( __file__, "../../../examples/fixtures/example.vcf") TMP_VCF_OUT_FILE = "/tmp/tmp_generatephylotree_testcase.vcf" class GeneratePyhloTreeTestCase(unittest.TestCase): '''Tests for idseq_dag/steps/generate_pyhlo_tree.py module''' def test_get_taxid_genomes(self): results = PipelineStepGeneratePhyloTree.get_taxid_genomes( ASSEMBLY_SUMMARY_FILE, 10298, 2) self.assertEqual(results, [ 'GCA_000859985.2\t10298\t10298\tHuman alphaherpesvirus 1\tftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/859/985/GCA_000859985.2_ViralProj15217', 'GCA_003052245.1\t10298\t10298\tHuman alphaherpesvirus 1\tftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/052/245/GCA_003052245.1_ASM305224v1' ]) @staticmethod def _ncbi_output_stub(cmd): if isinstance(cmd, command_patterns.CommandPattern):
import unittest from unittest.mock import patch, ANY import os import subprocess import idseq_dag.util.command as command import idseq_dag.util.command_patterns as command_patterns from tests.unit.unittest_helpers import file_contents, relative_file_path, MATCH_RE TMP_FILE_NAME = 'command_test_tmp_file.tmp' TMP_FILE = f'/tmp/{TMP_FILE_NAME}' TMP_FOLDER = '/tmp/command_test_tmp_folder' TMP_PARENT_FOLDER = TMP_FOLDER TMP_SRC_FOLDER = f'{TMP_PARENT_FOLDER}/command_test_tmp_src' TMP_DEST_FOLDER = f'{TMP_PARENT_FOLDER}/command_test_tmp_dst' TMP_SOURCE_FILE_PATH = os.path.join(TMP_SRC_FOLDER, TMP_FILE_NAME) TESTFILE_ABC_TXT = relative_file_path(__file__, "dummy testfile abc.txt") TESTFILE_BCD_TXT = relative_file_path(__file__, "dummy testfile bcd.txt") TESTSCRIPT_HAPPY_PY = relative_file_path(__file__, "dummy testscript happy.py") class CommandModuleMethodsExecuteCommands(unittest.TestCase): '''Tests for idseq_dag/util/command.py module methods to execute commands''' @patch('idseq_dag.util.command.log.write') def test_execute_legacy_format(self, _mock_log): '''WHEN command is a string, THEN execute and log a warning''' result = command.execute_with_output("echo 123") self.assertEqual(result, "123\n") _mock_log.assert_any_call( warning=True, message=MATCH_RE(".*legacy.*Use.*command_patterns"),
import os import time import unittest from tests.unit.unittest_helpers import relative_file_path from idseq_dag.steps.generate_lz4 import PipelineStepGenerateLZ4 INPUT_FILE = relative_file_path(__file__, 'doesnotexist') class TestPipelineStepGenerateLZ4(unittest.TestCase): def setUp(self): self.step = PipelineStepGenerateLZ4( name='test_generate_lz4', input_files=[[INPUT_FILE]], output_files=[], output_dir_local='', ref_dir_local='', output_dir_s3='', additional_files={}, additional_attributes={}, ) def test_get_command(self): command = self.step.get_command(INPUT_FILE) self.assertEqual('lz4', command.cmd) self.assertSequenceEqual( ['-9', '-f', INPUT_FILE, INPUT_FILE + '.lz4'], command.args )
def test_call_hits_m8(self): # This tests the logic based on a small sample. For development and performance benchmarking you can use real m8 outputs # as well as the real taxid-lineages.db and accession2taxid.db (from s3://idseq-public-references/taxonomy or s3://idseq-public-references/alignment_data). # Generated by taking a random sample from a gsnap output on benchmark sample 5 # The random output took a cluster of five rows as it's smallest unit to make a more # realistic file as similar rows are clustured. input_m8 = relative_file_path(__file__, 'm8-test/sample.m8') lineages = relative_file_path(__file__, 'm8-test/taxid-lineages.db') accession2taxid = relative_file_path(__file__, 'm8-test/accession2taxid.db') lineages_db = shelve.open(lineages.replace('.db', ''), 'c') accession2taxid_db = shelve.open(accession2taxid.replace('.db', ''), 'c') # Generated by running this test the full versions of these dicts and printing which items were needed accession2taxid_db["MK468611"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MK468612"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MK468613"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MK468615"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MK468617"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MH124576"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MH124577"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MH124578"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MH124579"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MH124580"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MK286896"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MK370031"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MK370032"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MK370033"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MK468608"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["CP015500"] = "573" lineages_db["573"] = ('573', '570', '543') accession2taxid_db["CP015822"] = "573" lineages_db["573"] = ('573', '570', '543') accession2taxid_db["CP015990"] = "573" lineages_db["573"] = ('573', '570', '543') accession2taxid_db["CP016813"] = "573" lineages_db["573"] = ('573', '570', '543') accession2taxid_db["CP016814"] = "573" lineages_db["573"] = ('573', '570', '543') accession2taxid_db["CP018140"] = "573" lineages_db["573"] = ('573', '570', '543') accession2taxid_db["CP018337"] = "573" lineages_db["573"] = ('573', '570', '543') accession2taxid_db["CP018352"] = "573" lineages_db["573"] = ('573', '570', '543') accession2taxid_db["CP018356"] = "573" lineages_db["573"] = ('573', '570', '543') accession2taxid_db["CP018364"] = "573" lineages_db["573"] = ('573', '570', '543') accession2taxid_db["MK468618"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MK468619"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MK468620"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MK468621"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MK468622"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MF740874"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MF773566"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MF774614"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MF774615"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["MF774616"] = "37124" lineages_db["37124"] = ('37124', '11019', '11018') accession2taxid_db["CP010295"] = "1280" lineages_db["1280"] = ('1280', '1279', '90964') accession2taxid_db["CP010296"] = "1280" lineages_db["1280"] = ('1280', '1279', '90964') accession2taxid_db["CP010297"] = "1280" lineages_db["1280"] = ('1280', '1279', '90964') accession2taxid_db["CP010298"] = "1280" lineages_db["1280"] = ('1280', '1279', '90964') accession2taxid_db["CP010299"] = "1280" lineages_db["1280"] = ('1280', '1279', '90964') accession2taxid_db["NC_038358"] = "2065052" lineages_db["2065052"] = ('2065052', '687333', '687329') accession2taxid_db["CP017682"] = "1280" lineages_db["1280"] = ('1280', '1279', '90964') accession2taxid_db["CP017804"] = "1280" lineages_db["1280"] = ('1280', '1279', '90964') accession2taxid_db["AF325855"] = "1280" lineages_db["1280"] = ('1280', '1279', '90964') accession2taxid_db["AM990992"] = "523796" lineages_db["523796"] = ('1280', '1279', '90964') accession2taxid_db["AP014652"] = "46170" lineages_db["46170"] = ('1280', '1279', '90964') lineages_db.close() accession2taxid_db.close() output_m8 = relative_file_path(__file__, 'm8-test/test.m8') output_summary = relative_file_path(__file__, 'm8-test/test.hitsummary.tab') call_hits_m8( input_m8, lineages, accession2taxid, output_m8, output_summary, 36, ) in_size = os.stat(input_m8).st_size out_size = os.stat(output_m8).st_size # File should shrink due to deduping self.assertLessEqual(out_size, in_size) # Generated by running this test then manually inspected sample_deduped_m8 = relative_file_path(__file__, 'm8-test/sample.deduped.m8') sample_summary = relative_file_path(__file__, 'm8-test/sample.hitsummary.tab') self.assertEqual(file_contents(output_m8), file_contents(sample_deduped_m8)) self.assertEqual(file_contents(output_summary), file_contents(sample_summary)) os.remove(output_m8) os.remove(output_summary) os.remove(lineages) os.remove(accession2taxid)