def test_main_1(self, run_pipeline_mock): """Verify that main runs correctly with: valid pipeline and valid default options.""" unparsed_args = ["pdm_utils.run", "import"] run.main(unparsed_args) with self.subTest(): self.assertTrue(run_pipeline_mock.called)
def test_main_1(self, alchemy_mock): """Verify update data and final data are retrieved.""" # Testing the update flag and final flag have been merged so that # PhagesDB is only queried once for all data in the genome, since # it is time-intensive. alchemy_mock.return_value = self.alchemist # If final=True, any genome in database will be checked on PhagesDB # regardless of AnnotationAuthor unparsed_args = get_unparsed_args(update=True, final=True) run.main(unparsed_args) count1 = count_files(updates_folder) count2 = count_files(phagesdb_genome_folder) count3 = count_rows(update_table) count4 = count_rows(phagesdb_import_table) with self.subTest(): self.assertEqual(count1, 1) with self.subTest(): # It's not clear how stable the storage of any particular final # flat file is on PhagesDB. There is possibility that for any # genome the associated flat file will be removed. So this is # one area of testing that could be improved. For now, simply # verify that 1 or more files have been retrieved. self.assertTrue(count2 > 0) with self.subTest(): # There should be several rows of updates. self.assertTrue(count3 > 0) with self.subTest(): self.assertEqual(count2, count4)
def test_main_1(self, alchemy_mock, gpd_mock, rud_mock): """Verify compare runs successfully with: MySQL, PhagesDB, and GenBank records saved, a duplicate MySQL accession (for D29 and Trixie), an invalid accession (for L5), a duplicate phage.Name (for Constance and Et2Brutus), a PhagesDB name unmatched to MySQL (for 'unmatched').""" alchemy_mock.return_value = self.alchemist gpd_mock.return_value = self.pdb_json_results rud_mock.return_value = FASTA_FILE # Make modifications to cause errors. stmts = [] stmts.append(create_update("phage", "Accession", L5_ACC + "1", "L5")) stmts.append(create_update("phage", "Accession", TRIXIE_ACC, "D29")) stmts.append(create_update("phage", "Name", "Dupe", "Constance")) stmts.append(create_update("phage", "Name", "Dupe", "Et2Brutus")) stmts.append(create_update("phage", "PhageID", "Dupe", "Constance")) stmts.append( create_update("phage", "PhageID", "Dupe_Draft", "Et2Brutus")) for stmt in stmts: test_db_utils.execute(stmt) run.main(self.unparsed_args) count = count_files(test_folder) # input("check") with self.subTest(): self.assertTrue(count > 0) with self.subTest(): gpd_mock.assert_called() with self.subTest(): rud_mock.assert_called()
def test_main_2(self, getpass_mock): """Verify frozen database is created from database with all genomes removed.""" getpass_mock.side_effect = [USER, PWD] self.unparsed_args.extend(["-f", "phage.Status != draft"]) run.main(self.unparsed_args) count2 = test_db_utils.get_data(COUNT_PHAGE, db=DB2) self.assertEqual(count2[0]["count"], 0)
def test_main_1(self, alchemy_mock): """Verify no GenBank record is retrieved.""" alchemy_mock.return_value = self.alchemist run.main(self.unparsed_args) count = count_files(results_path) with self.subTest(): self.assertTrue(results_path.exists()) with self.subTest(): self.assertEqual(count, 0)
def test_main_1(self, getpass_mock): """Verify database is installed from file.""" getpass_mock.side_effect = [USER, PWD] unparsed_args = get_unparsed_args(option="file") run.main(unparsed_args) # Query for version data. This verifies that the databases exists # and that it contains a pdm_utils schema with data. version_data = test_db_utils.get_data( test_db_utils.version_table_query) self.assertEqual(len(version_data), 1)
def test_main_11(self, getpass_mock): """Verify frozen database is created from database using quoted filter value.""" getpass_mock.side_effect = [USER, PWD] stmt = create_update("phage", "Status", "final", "Trixie") test_db_utils.execute(stmt) self.unparsed_args.extend(["-f", "phage.Status != 'draft'"]) run.main(self.unparsed_args) count2 = test_db_utils.get_data(COUNT_PHAGE, db=DB2) self.assertEqual(count2[0]["count"], 1)
def test_main_3(self, getpass_mock): """Verify frozen database is created from database with only one 'final' genome.""" getpass_mock.side_effect = [USER, PWD] stmt = create_update("phage", "Status", "final", "Trixie") test_db_utils.execute(stmt) self.unparsed_args.extend(["-f", "phage.Status!=draft"]) print(self.unparsed_args) run.main(self.unparsed_args) count2 = test_db_utils.get_data(COUNT_PHAGE, db=DB2) self.assertEqual(count2[0]["count"], 1)
def test_main_6(self, getpass_mock): """Verify frozen database is created from database with one genome based on two filters from two tables.""" getpass_mock.side_effect = [USER, PWD] stmt = create_update("phage", "Status", "final", "Trixie") test_db_utils.execute(stmt) filters = "phage.Status != draft AND gene.Notes = repressor" self.unparsed_args.extend(["-f", filters]) run.main(self.unparsed_args) count2 = test_db_utils.get_data(COUNT_PHAGE, db=DB2) self.assertEqual(count2[0]["count"], 1)
def test_main_2(self, alchemy_mock): """Verify one GenBank record is retrieved.""" alchemy_mock.return_value = self.alchemist stmt = create_update("phage", "Accession", TRIXIE_ACC, "Trixie") test_db_utils.execute(stmt) run.main(self.unparsed_args) count = count_files(results_path) with self.subTest(): self.assertTrue(results_path.exists()) with self.subTest(): self.assertEqual(count, 1)
def test_main_5(self, getpass_mock): """Verify frozen database is created from database with no genomes based on two filters.""" getpass_mock.side_effect = [USER, PWD] stmt = create_update("phage", "Status", "final", "Trixie") test_db_utils.execute(stmt) filters = "phage.Status != draft AND phage.HostGenus = Gordonia" self.unparsed_args.extend(["-f", filters]) run.main(self.unparsed_args) count2 = test_db_utils.get_data(COUNT_PHAGE, db=DB2) self.assertEqual(count2[0]["count"], 0)
def test_main_5(self, alchemy_mock, mg_mock): """Verify draft data already in database is retrieved with force_download.""" # Create a list of 2 matched genomes, only one of which has # status = draft. matched_genomes = create_matched_genomes() alchemy_mock.return_value = self.alchemist mg_mock.return_value = (matched_genomes, {"EagleEye"}) unparsed_args = get_unparsed_args(draft=True, force_download=True) run.main(unparsed_args) count = count_files(pecaan_genomes_folder) self.assertEqual(count, 2)
def test_main_3(self, alchemy_mock): """Verify no GenBank record is retrieved based on one filter.""" alchemy_mock.return_value = self.alchemist stmt = create_update("phage", "Accession", TRIXIE_ACC, "Trixie") test_db_utils.execute(stmt) self.unparsed_args.extend(["-f", f"phage.Accession!={TRIXIE_ACC}"]) run.main(self.unparsed_args) count = count_files(results_path) with self.subTest(): self.assertTrue(results_path.exists()) with self.subTest(): self.assertEqual(count, 0)
def test_main_13(self, getpass_mock): """Verify data is changed when there is NO data in the database and reset = True.""" getpass_mock.side_effect = [USER, PWD] self.unparsed_args.extend(["-f", "phage.Status != draft", "-r"]) run.main(self.unparsed_args) count = test_db_utils.get_data(COUNT_PHAGE, db=DB2) version = test_db_utils.get_data(test_db_utils.version_table_query, db=DB2) with self.subTest(): self.assertEqual(count[0]["count"], 0) with self.subTest(): self.assertEqual(version[0]["Version"], 0)
def test_main_3(self, alchemy_mock, mg_mock): """Verify draft data is retrieved.""" matched_genomes = create_matched_genomes() alchemy_mock.return_value = self.alchemist mg_mock.return_value = (matched_genomes, {"EagleEye"}) unparsed_args = get_unparsed_args(draft=True) run.main(unparsed_args) count1 = count_files(pecaan_genomes_folder) count2 = count_rows(pecaan_import_table) with self.subTest(): self.assertEqual(count1, 1) with self.subTest(): self.assertEqual(count1, count2)
def test_main_1(self, getpass_mock): """Verify frozen database is created from database with no change in genome count when no filters are provided.""" getpass_mock.side_effect = [USER, PWD] run.main(self.unparsed_args) count1 = test_db_utils.get_data(COUNT_PHAGE, db=DB) count2 = test_db_utils.get_data(COUNT_PHAGE, db=DB2) version = test_db_utils.get_data(test_db_utils.version_table_query, db=DB2) with self.subTest(): self.assertEqual(count1[0]["count"], count2[0]["count"]) with self.subTest(): self.assertEqual(version[0]["Version"], 1)
def test_main_1(self, getpass_mock): """Verify step-by-step database conversion round trip is successful and generates identical empty schema files.""" # Downgrade one schema version at a time and generate empty schema file. # If current = 4, down = [3, 2, 1, 0] down_versions = list(reversed(range(0, self.current))) for step in down_versions: getpass_mock.side_effect = [USER, PWD] unparsed_args = get_unparsed_convert_args(str(step)) schema_filename = f"down_{unparsed_args[-1]}.sql" schema_filepath = Path(results_path, schema_filename) run.main(unparsed_args) test_db_utils.create_schema_file(schema_filepath) # Now upgrade one schema version at a time and generate empty schema file. # If current = 4, up = [1, 2, 3, 4] up_versions = list(range(1, self.current + 1)) for step in up_versions: getpass_mock.side_effect = [USER, PWD] unparsed_args = get_unparsed_convert_args(str(step)) schema_filename = f"up_{unparsed_args[-1]}.sql" schema_filepath = Path(results_path, schema_filename) run.main(unparsed_args) test_db_utils.create_schema_file(schema_filepath) # Compare the empty schema file representing the same schema version, # generated from a downgrade (e.g. 7 to 6) and an upgrade (e.g. 5 to 6). # If current = 4, compare = [1, 2, 3] compare_versions = list(range(1, self.current)) diff_file_list = [] for step in compare_versions: filepath1 = Path(results_path, f"up_{step}.sql") filepath2 = Path(results_path, f"down_{step}.sql") diff_filepath = Path(results_path, f"diff_{step}.txt") diff_file_list.append(diff_filepath) diff_files(filepath1, filepath2, diff_filepath) # Get the number of lines in each diff file. If there is no # difference, there should be 0 lines. results = [] for diff_filepath in diff_file_list: with open(diff_filepath, "r") as handle: lines = handle.readlines() results.append(len(lines)) # Test the results. for result in results: with self.subTest(): self.assertEqual(result, 0)
def test_main_3(self, alchemy_mock): """Verify version data is updated.""" alchemy_mock.return_value = self.alchemist unparsed_args = get_unparsed_args(version=True) run.main(unparsed_args) version_table = test_db_utils.get_data(test_db_utils.version_table_query) phage_table = test_db_utils.get_data(test_db_utils.phage_table_query) data_dict = phage_id_dict(phage_table) alice = data_dict["Alice"] with self.subTest(): self.assertEqual(version_table[0]["Version"], 1) # Just confirm that only version data was changed. with self.subTest(): self.assertEqual(alice["HostGenus"], "unknown")
def test_main_4(self, alchemy_mock): """Verify one GenBank record is retrieved based on one filter.""" alchemy_mock.return_value = self.alchemist stmt1 = create_update("phage", "Accession", TRIXIE_ACC, "Trixie") test_db_utils.execute(stmt1) stmt2 = create_update("phage", "Status", "final", "Trixie") test_db_utils.execute(stmt2) self.unparsed_args.extend(["-f", "phage.Status!=draft"]) run.main(self.unparsed_args) count = count_files(results_path) with self.subTest(): self.assertTrue(results_path.exists()) with self.subTest(): self.assertEqual(count, 1)
def test_main_5(self, getpass_mock): """Verify database is installed from file and overwrites existing database.""" getpass_mock.side_effect = [USER, PWD] # First install a database with data. Then delete version table. test_db_utils.create_filled_test_db() test_db_utils.execute("DROP TABLE version") unparsed_args = get_unparsed_args(option="file") run.main(unparsed_args) # Now query for version data. This verifies that it replaced # the first database. version_data = test_db_utils.get_data( test_db_utils.version_table_query) self.assertEqual(len(version_data), 1)
def test_main_2(self, alchemy_mock, gpd_mock): """Verify duplicate PhagesDB names are identified.""" # Clear accessions so that GenBank is not queried. No need for that. stmt = create_update("phage", "Accession", "") test_db_utils.execute(stmt) alchemy_mock.return_value = self.alchemist gpd_mock.return_value = ({}, {"L5"}, {"D29"}) run.main(self.unparsed_args) count = count_files(test_folder) # input("check") with self.subTest(): self.assertTrue(count > 0) with self.subTest(): gpd_mock.assert_called()
def test_main_4(self, alchemy_mock): """Verify final data with very recent date are retrieved with force_download.""" alchemy_mock.return_value = self.alchemist stmt = create_update("phage", "DateLastModified", "2200-01-01") test_db_utils.execute(stmt) unparsed_args = get_unparsed_args(final=True, force_download=True) run.main(unparsed_args) count = count_files(phagesdb_genome_folder) with self.subTest(): # It's not clear how stable the storage of any particular final # flat file is on PhagesDB. There is possibility that for any # genome the associated flat file will be removed. So this is # one area of testing that could be improved. For now, simply # verify that 1 or more files have been retrieved. self.assertTrue(count > 0)
def test_main_4(self, alchemy_mock): """Verify version data and phage table data are updated.""" alchemy_mock.return_value = self.alchemist host_genus = "Mycobacterium" tkt = get_alice_ticket("HostGenus", host_genus) create_update_table([tkt], update_table) unparsed_args = get_unparsed_args(file=update_table, version=True) run.main(unparsed_args) version_table = test_db_utils.get_data(test_db_utils.version_table_query) phage_table = test_db_utils.get_data(test_db_utils.phage_table_query) data_dict = phage_id_dict(phage_table) alice = data_dict["Alice"] with self.subTest(): self.assertEqual(alice["HostGenus"], host_genus) with self.subTest(): self.assertEqual(version_table[0]["Version"], 1)
def test_main_1(self, alchemy_mock): """Verify update runs with empty ticket table.""" alchemy_mock.return_value = self.alchemist create_update_table([], update_table) unparsed_args = get_unparsed_args(file=update_table) run.main(unparsed_args) version_table = test_db_utils.get_data(test_db_utils.version_table_query) phage_table = test_db_utils.get_data(test_db_utils.phage_table_query) data_dict = phage_id_dict(phage_table) alice = data_dict["Alice"] trixie = data_dict["Trixie"] # Nothing should be different. with self.subTest(): self.assertEqual(alice["HostGenus"], "unknown") with self.subTest(): self.assertEqual(trixie["HostGenus"], "unknown") with self.subTest(): self.assertEqual(version_table[0]["Version"], 0)
def test_main_2(self, alchemy_mock): """Verify genbank data is retrieved.""" alchemy_mock.return_value = self.alchemist stmt1 = create_update("phage", "RetrieveRecord", "1", phage_id="Trixie") test_db_utils.execute(stmt1) stmt2 = create_update("phage", "Accession", TRIXIE_ACC, phage_id="Trixie") test_db_utils.execute(stmt2) unparsed_args = get_unparsed_args(genbank=True, genbank_results=True) run.main(unparsed_args) query = "SELECT COUNT(*) FROM phage" count1 = test_db_utils.get_data(query)[0]["COUNT(*)"] count2 = count_files(genbank_genomes_folder) count3 = count_rows(genbank_import_table) count4 = count_rows(genbank_results_table) with self.subTest(): self.assertEqual(count2, 1) with self.subTest(): self.assertEqual(count2, count3) with self.subTest(): self.assertEqual(count1, count4)
def test_main_3(self, getpass_mock): """Verify database with new name is created.""" getpass_mock.side_effect = [USER, PWD] # Downgrade one step. step = self.current - 1 unparsed_args = get_unparsed_convert_args(str(step), new_db=DB2) run.main(unparsed_args) # Get data from both databases. data1 = test_db_utils.get_data(QUERY) v1 = data1[0]["SchemaVersion"] data2 = test_db_utils.get_data(QUERY, db=DB2) v2 = data2[0]["SchemaVersion"] # Test with self.subTest(): self.assertEqual(v1, self.current) with self.subTest(): self.assertEqual(v2, step)
def test_main_3(self, getpass_mock): """Verify database sql and version files are downloaded from server.""" getpass_mock.side_effect = [USER, PWD] # If the entire Actinobacteriophage database is being downloaded for testing, # be sure to only download the SQL file and do NOT install it, # else it will overwrite the existing Actinobacteriophage database. # Since the pdm_anon user is calling this pipeline, and since # this user should not have MySQL privileges to do anything other # than select data from Actinobacteriophage, this shouldn't be a problem. unparsed_args = get_unparsed_args(db=DB2, option="server", download=True, version=True, output_folder=output_path) run.main(unparsed_args) file1 = Path(results_path, f"{DB2}.sql") file2 = Path(results_path, f"{DB2}.version") with self.subTest(): self.assertTrue(file1.exists()) with self.subTest(): self.assertTrue(file2.exists())
def test_main_2(self, getpass_mock): """Verify non-step-by-step database conversion round trip is successful and generates identical empty schema files.""" getpass_mock.side_effect = [USER, PWD, USER, PWD] # Get current schema file. schema_filepath1 = Path(results_path, "before.sql") test_db_utils.create_schema_file(schema_filepath1) # Downgrade to schema version 0. step = 0 unparsed_args = get_unparsed_convert_args(str(step)) run.main(unparsed_args) # Now upgrade to the original schema version. unparsed_args = get_unparsed_convert_args(str(self.current)) run.main(unparsed_args) # Get data and schema file after round trip. schema_filepath2 = Path(results_path, "after.sql") test_db_utils.create_schema_file(schema_filepath2) new_version_data = test_db_utils.get_data(QUERY) new_schema_version = new_version_data[0]["SchemaVersion"] # Compare the two empty schema files representing the same schema version, # generated from a downgrade to 0 and upgrade. diff_filepath = Path(results_path, "diff.txt") diff_files(schema_filepath1, schema_filepath2, diff_filepath) # Get the number of lines in each diff file. If there is no # difference, there should be 0 lines. with open(diff_filepath, "r") as handle: lines = handle.readlines() result = len(lines) # Test the results. self.assertEqual(result, 0)
def test_main_2(self, alchemy_mock): """Verify update runs with five tickets in ticket table.""" alchemy_mock.return_value = self.alchemist host_genus = "Mycobacterium" cluster = "A" subcluster = "A2" status = "final" accession = "ABC123" tkt1 = get_alice_ticket("HostGenus", host_genus) tkt2 = get_alice_ticket("Cluster", cluster) tkt3 = get_alice_ticket("Subcluster", subcluster) tkt4 = get_alice_ticket("Status", status) tkt5 = get_alice_ticket("Accession", accession) tkts = [tkt1, tkt2, tkt3, tkt4, tkt5] create_update_table(tkts, update_table) unparsed_args = get_unparsed_args(file=update_table) run.main(unparsed_args) version_table = test_db_utils.get_data(test_db_utils.version_table_query) phage_table = test_db_utils.get_data(test_db_utils.phage_table_query) data_dict = phage_id_dict(phage_table) alice = data_dict["Alice"] trixie = data_dict["Trixie"] with self.subTest(): self.assertEqual(alice["HostGenus"], host_genus) with self.subTest(): self.assertEqual(alice["Cluster"], cluster) with self.subTest(): self.assertEqual(alice["Subcluster"], subcluster) with self.subTest(): self.assertEqual(alice["Accession"], accession) with self.subTest(): self.assertEqual(alice["Status"], status) # Just confirm that only Alice data was changed. with self.subTest(): self.assertEqual(trixie["HostGenus"], "unknown") with self.subTest(): self.assertEqual(version_table[0]["Version"], 0)
"""Controls pipelines from command line.""" import sys from pdm_utils import run run.main(sys.argv)