def setUp(self): test_db_utils.create_empty_test_db() self.alchemist = AlchemyHandler(database=DB, username=USER, password=PWD) self.alchemist.build_engine() self.engine = self.alchemist.engine
def setUp(self): if not test_db_utils.check_if_exists(): test_db_utils.create_empty_test_db() alchemist = AlchemyHandler(username=USER, password=PWD, database=DB) alchemist.connect() self.engine = alchemist.engine
def setUp(self): self.alchemist1 = AlchemyHandler(database=DB, username=USER, password=PWD) self.alchemist1.build_engine() self.engine1 = self.alchemist1.engine self.alchemist2 = AlchemyHandler(database=DB2, username=USER, password=PWD) self.alchemist2.build_engine() self.engine2 = self.alchemist2.engine
def setUp(self): self.alchemist = AlchemyHandler() self.alchemist.username = USER self.alchemist.password = PWD self.alchemist.database = DB self.alchemist.connect(ask_database=True, login_attempts=0) self.resubmit_test_dir = self.test_dir.joinpath("resubmit_test_dir")
def establish_database_connection(database_name: str): if not isinstance(database_name, str): print("establish_database_connection requires string input") raise TypeError alchemist = AlchemyHandler(database=database_name) alchemist.connect() return alchemist
def setUp(self): stmt1 = "UPDATE version SET Version = 1" test_db_utils.execute(stmt1) stmt2 = "UPDATE version SET Version = 0" test_db_utils.execute(stmt2, db=DB2) self.alchemist = AlchemyHandler(database=DB, username=USER, password=PWD) self.alchemist.build_engine() self.engine = self.alchemist.engine
def setUp(self): if not test_db_utils.check_if_exists(): test_db_utils.create_empty_test_db() else: if len(test_db_utils.execute(TABLES_QUERY.format(DB))) == 0: test_db_utils.install_db(test_db_utils.SCHEMA_FILEPATH) self.alchemist = AlchemyHandler(username=USER, password=PWD) self.alchemist.build_engine() self.engine = self.alchemist.engine
def main(unparsed_args): """Runs the complete update pipeline.""" args = parse_args(unparsed_args[2:]) # Verify database connection and schema compatibility. print("Connecting to the MySQL database...") # Create config object with data obtained from file and/or defaults. config = configfile.build_complete_config(args.config_file) mysql_creds = config["mysql"] alchemist = AlchemyHandler(database=args.database, username=mysql_creds["user"], password=mysql_creds["password"]) alchemist.connect(pipeline=True) engine = alchemist.engine mysqldb.check_schema_compatibility(engine, "the update pipeline") if args.version is True: mysqldb.change_version(engine) print("Database version updated.") if args.ticket_table is not None: update_table_path = basic.set_path(args.ticket_table, kind="file", expect=True) # Iterate through the tickets and process them sequentially. list_of_update_tickets = [] with update_table_path.open(mode='r') as f: file_reader = csv.DictReader(f) for dict in file_reader: list_of_update_tickets.append(dict) # Variables to be used for end summary processed = 0 succeeded = 0 failed = 0 for dict in list_of_update_tickets: status = update_field(alchemist, dict) if status == 1: processed += 1 succeeded += 1 else: processed += 1 failed += 1 engine.dispose() print("\nDone iterating through tickets.") if succeeded > 0: print(f"{succeeded} / {processed} tickets successfully handled.") if failed > 0: print(f"{failed} / {processed} tickets failed to be handled.")
def setUp(self): self.alchemist = AlchemyHandler() self.alchemist.username=USER self.alchemist.password=PWD self.alchemist.database=DB self.alchemist.connect(ask_database=True, login_attempts=0) self.alchemist.build_graph() self.db_filter = Filter(alchemist=self.alchemist) self.export_test_dir = self.test_dir.joinpath("export_test_dir")
def setUp(self): # Create test database that contains data for several phages. test_db_utils.create_filled_test_db() self.alchemist = AlchemyHandler(database=DB, username=USER, password=PWD) self.alchemist.build_engine() self.engine = self.alchemist.engine self.temp_dir = "/tmp/pdm_utils_tests_phamerate"
class TestMySQLdbBasic3(unittest.TestCase): @classmethod def setUpClass(self): test_db_utils.create_filled_test_db() test_db_utils.create_empty_test_db(db=DB2) @classmethod def tearDownClass(self): test_db_utils.remove_db() test_db_utils.remove_db(db=DB2) def setUp(self): stmt1 = "UPDATE version SET Version = 1" test_db_utils.execute(stmt1) stmt2 = "UPDATE version SET Version = 0" test_db_utils.execute(stmt2, db=DB2) self.alchemist = AlchemyHandler(database=DB, username=USER, password=PWD) self.alchemist.build_engine() self.engine = self.alchemist.engine def tearDown(self): self.engine.dispose() def test_get_mysql_dbs_2(self): """Verify set of databases is retrieved when engine is connected to a specific database.""" databases = mysqldb_basic.get_mysql_dbs(self.engine) self.assertTrue(DB in databases) def test_get_tables_2(self): """Verify set of tables is retrieved when engine is connected to the same database.""" tables = mysqldb_basic.get_tables(self.engine, DB) self.assertTrue(TABLE in tables) def test_get_tables_3(self): """Verify set of tables is retrieved when engine is connected to a different database.""" tables = mysqldb_basic.get_tables(self.engine, DB2) self.assertTrue(TABLE in tables) def test_get_columns_2(self): """Verify set of columns is retrieved when engine is not connected to the same database.""" columns = mysqldb_basic.get_columns(self.engine, DB, TABLE) self.assertTrue(COLUMN in columns) def test_get_columns_3(self): """Verify set of columns is retrieved when engine is not connected to a different database.""" columns = mysqldb_basic.get_columns(self.engine, DB2, TABLE) self.assertTrue(COLUMN in columns)
def setUp(self): self.alchemist = AlchemyHandler() self.alchemist.username = USER self.alchemist.password = PWD self.alchemist.database = DB self.alchemist.connect(ask_database=True, login_attempts=0) self.alchemist.build_graph() self.fileio_test_dir = self.test_dir.joinpath("fileio_test_dir") self.fileio_test_dir.mkdir() self.data_dict_file = self.fileio_test_dir.joinpath("table.csv") self.fasta_file = self.fileio_test_dir.joinpath("translations.fasta")
def setUp(self): self.review_test_dir = self.test_dir.joinpath("review_test_dir") self.alchemist = AlchemyHandler() self.alchemist.username = USER self.alchemist.password = PWD self.alchemist.database = DB self.alchemist.connect(ask_database=True, login_attempts=0) self.db_filter = Filter(alchemist=self.alchemist) self.db_filter.add(review.BASE_CONDITIONALS) self.db_filter.key = "gene.PhamID"
def install_db(database, db_filepath=None, schema_version=None): """Install database. If database already exists, it is first removed.""" # No need to specify database yet, since it needs to first check if the # database exists. alchemist1 = AlchemyHandler(database="") alchemist1.connect(pipeline=True) engine1 = alchemist1.engine result = mysqldb_basic.drop_create_db(engine1, database) if result != 0: print("Unable to create new, empty database.") else: alchemist2 = AlchemyHandler(database=database, username=engine1.url.username, password=engine1.url.password) alchemist2.connect(pipeline=True) engine2 = alchemist2.engine if engine2 is None: print(f"No connection to the {database} database due " "to invalid credentials or database.") else: if db_filepath is not None: mysqldb_basic.install_db(engine2, db_filepath) else: mysqldb.execute_transaction(engine2, db_schema_0.STATEMENTS) convert_args = [ "pdm_utils.run", "convert", database, "-s", str(schema_version) ] convert_db.main(convert_args, engine2) # Close up all connections in the connection pool. engine2.dispose() # Close up all connections in the connection pool. engine1.dispose()
def setUp(self): test_folder.mkdir() self.alchemist = AlchemyHandler(database=DB, username=USER, password=PWD) self.alchemist.build_engine() # Standardize values in certain fields to define the data stmt1 = create_update("phage", "Status", "draft") test_db_utils.execute(stmt1) stmt2 = create_update("phage", "HostGenus", "Mycobacterium") test_db_utils.execute(stmt2) stmt3 = create_update("phage", "Accession", "") test_db_utils.execute(stmt3) stmt4 = create_update("gene", "Notes", "repressor") test_db_utils.execute(stmt4) stmt5 = "UPDATE version SET Version = 1" test_db_utils.execute(stmt5) self.unparsed_args = get_unparsed_args()
def setUp(self): alchemist = AlchemyHandler() alchemist.username = "******" alchemist.password = "******" alchemist.database = "test_db" alchemist.connect() alchemist.build_graph() self.alchemist = alchemist self.db_filter = Filter(alchemist=self.alchemist) phageid = self.alchemist.get_column("phage.PhageID") self.phageid = phageid
def setUp(self): self.alchemist = AlchemyHandler() self.alchemist.username = USER self.alchemist.password = PWD self.alchemist.database = DB self.alchemist.connect(ask_database=True, login_attempts=0) self.revise_test_dir = self.test_dir.joinpath("revise_test_dir") self.fr_input_file_path = self.test_dir.joinpath("FunctionReport.csv") self.csv_input_file_path = self.revise_test_dir.joinpath("gene.csv") fileio.export_data_dict(TEST_FR_DATA, self.fr_input_file_path, REVIEW_HEADER, include_headers=True) self.assertTrue(self.fr_input_file_path.is_file())
def setUp(self): self.alchemist = AlchemyHandler(database=DB, username=USER, password=PWD) self.alchemist.build_engine() test_folder.mkdir() # Standardize values in certain fields to define the data stmt1 = create_update("phage", "Status", "unknown") test_db_utils.execute(stmt1) stmt2 = create_update("phage", "HostGenus", "unknown") test_db_utils.execute(stmt2) stmt3 = create_update("phage", "Accession", "") test_db_utils.execute(stmt3) stmt4 = create_update("phage", "DateLastModified", "1900-01-01") test_db_utils.execute(stmt4) stmt5 = create_update("phage", "RetrieveRecord", "0") test_db_utils.execute(stmt5)
def setUp(self): test_folder.mkdir() # Standardize values in certain fields to define the data stmts = [] stmts.append(create_update("phage", "Status", "draft")) stmts.append(create_update("phage", "Accession", "")) stmts.append(create_update("phage", "AnnotationAuthor", "0")) stmts.append(create_update("phage", "Accession", TRIXIE_ACC, "Trixie")) stmts.append(create_update("phage", "Accession", ALICE_ACC, "Alice")) stmts.append(create_update("phage", "Accession", L5_ACC, "L5")) stmts.append(create_update("phage", "Accession", TRIXIE_ACC, "D29")) stmts.append(create_update("phage", "Status", "final", "Trixie")) stmts.append(create_update("phage", "Status", "final", "Alice")) stmts.append(create_update("phage", "Status", "final", "L5")) stmts.append(create_update("phage", "Status", "final", "D29")) stmts.append(create_update("phage", "AnnotationAuthor", "1", "Trixie")) stmts.append(create_update("phage", "AnnotationAuthor", "1", "Alice")) stmts.append(create_update("phage", "AnnotationAuthor", "1", "L5")) stmts.append(create_update("phage", "AnnotationAuthor", "1", "D29")) for stmt in stmts: test_db_utils.execute(stmt) self.unparsed_args = get_unparsed_args() self.alchemist = AlchemyHandler(database=DB, username=USER, password=PWD) self.alchemist.build_engine() self.pdb_data1 = get_pdb_dict() self.pdb_data2 = get_pdb_dict() self.pdb_data3 = get_pdb_dict() self.pdb_data1["phage_name"] = "Trixie" self.pdb_data2["phage_name"] = "L5" self.pdb_data3["phage_name"] = "unmatched" json_results = [self.pdb_data1, self.pdb_data2, self.pdb_data3] self.pdb_json_data = get_pdb_json_data() self.pdb_json_data["results"] = json_results self.pdb_json_results = json_results
def main(unparsed_args_list): """Uses parsed args to run the entirety of the review pipeline. :param unparsed_args_list: Input a list of command line args. :type unparsed_args_list: list[str] """ args = parse_review(unparsed_args_list) alchemist = AlchemyHandler(database=args.database) alchemist.connect(ask_database=True, pipeline=True) values = export_db.parse_value_input(args.input) execute_review(alchemist, args.folder_path, args.folder_name, review=args.review, values=values, filters=args.filters, groups=args.groups, sort=args.sort, g_reports=args.gene_reports, s_report=args.summary_report, verbose=args.verbose)
def setUp(self): self.alchemist = AlchemyHandler(database=DB, username=USER, password=PWD) self.alchemist.build_engine() test_folder.mkdir() # Standardize values in certain fields to define the data stmt1 = create_update("phage", "Status", "unknown") test_db_utils.execute(stmt1) stmt2 = create_update("phage", "HostGenus", "unknown") test_db_utils.execute(stmt2) stmt3 = create_update("phage", "Accession", "") test_db_utils.execute(stmt3) stmt4 = create_update("phage", "Cluster", "Z") test_db_utils.execute(stmt4) stmt5 = create_update("phage", "Subcluster", "Z1") test_db_utils.execute(stmt5) stmt6 = "UPDATE version SET Version = 0" test_db_utils.execute(stmt6)
def main(unparsed_args_list): """Uses parsed args to run the entirety of the resubmit pipeline. :param unparsed_args_list: Input a list of command line args. :type unparsed_args_list: list[str] """ args = parse_resubmit(unparsed_args_list) alchemist = AlchemyHandler(database=args.database) alchemist.connect(ask_database=True, pipeline=True) revisions_data_dicts = basic.retrieve_data_dict(args.revisions_file) execute_resubmit(alchemist, revisions_data_dicts, args.folder_path, args.folder_name, filters=args.filters, groups=args.groups, verbose=args.verbose)
def connect(self, alchemist=None): """Connect Filter object to a database with an AlchemyHandler. :param alchemist: An AlchemyHandler object. :type alchemist: AlchemyHandler """ if alchemist != None: self.link(alchemist) return if self._connected: return alchemist = AlchemyHandler() alchemist.connect(ask_database=True) self._engine = alchemist.engine self._graph = alchemist.graph self._session = alchemist.session self._mapper = alchemist.mapper self._connected = True
def main(unparsed_args_list): """Run main get_gb_records pipeline.""" # Parse command line arguments args = parse_args(unparsed_args_list) # Filters input: phage.Status=draft AND phage.HostGenus=Mycobacterium # Args structure: [['phage.Status=draft'], ['phage.HostGenus=Mycobacterium']] filters = args.filters ncbi_cred_dict = ncbi.get_ncbi_creds(args.ncbi_credentials_file) output_folder = basic.set_path(args.output_folder, kind="dir", expect=True) working_dir = pathlib.Path(RESULTS_FOLDER) working_path = basic.make_new_dir(output_folder, working_dir, attempt=50) if working_path is None: print(f"Invalid working directory '{working_dir}'") sys.exit(1) # Verify database connection and schema compatibility. print("Connecting to the MySQL database...") alchemist = AlchemyHandler(database=args.database) alchemist.connect(pipeline=True) engine = alchemist.engine mysqldb.check_schema_compatibility(engine, "the get_gb_records pipeline") # Get SQLAlchemy metadata Table object # table_obj.primary_key.columns is a # SQLAlchemy ColumnCollection iterable object # Set primary key = 'phage.PhageID' alchemist.build_metadata() table = querying.get_table(alchemist.metadata, TARGET_TABLE) for column in table.primary_key.columns: primary_key = column # Create filter object and then add command line filter strings db_filter = Filter(alchemist=alchemist, key=primary_key) db_filter.values = [] # Attempt to add filters and exit if needed. add_filters(db_filter, filters) # Performs the query db_filter.update() # db_filter.values now contains list of PhageIDs that pass the filters. # Get the accessions associated with these PhageIDs. keep_set = set(db_filter.values) # Create data sets print("Retrieving accessions from the database...") query = construct_accession_query(keep_set) list_of_dicts = mysqldb_basic.query_dict_list(engine, query) id_acc_dict = get_id_acc_dict(list_of_dicts) acc_id_dict = get_acc_id_dict(id_acc_dict) engine.dispose() if len(acc_id_dict.keys()) > 0: get_data(working_path, acc_id_dict, ncbi_cred_dict) else: print("There are no records to retrieve.")
def setUpClass(self): base_dir = Path(TMPDIR_BASE) self.test_dir = base_dir.joinpath(TMPDIR_PREFIX) test_db_utils.create_filled_test_db() if self.test_dir.is_dir(): shutil.rmtree(self.test_dir) self.test_dir.mkdir() self.alchemist = AlchemyHandler() self.alchemist.username = USER self.alchemist.password = PWD self.alchemist.database = DB self.alchemist.connect(ask_database=True, login_attempts=0) self.acc_id_dict = get_acc_id_dict(self.alchemist) accession_list = list(self.acc_id_dict.keys()) ncbi_handle = Entrez.efetch(db="nucleotide", rettype="ft", id=",".join(accession_list), retmode="text") copy_gb_ft_files(ncbi_handle, self.acc_id_dict, self.test_dir)
def setUp(self): alchemist = AlchemyHandler() alchemist.username=user alchemist.password=pwd alchemist.database=db alchemist.connect() self.alchemist = alchemist self.db_filter = Filter(alchemist=self.alchemist) self.phage = self.alchemist.metadata.tables["phage"] self.gene = self.alchemist.metadata.tables["gene"] self.trna = self.alchemist.metadata.tables["trna"] self.PhageID = self.phage.c.PhageID self.Cluster = self.phage.c.Cluster self.Subcluster = self.phage.c.Subcluster self.Notes = self.gene.c.Notes
def main(unparsed_args_list): """Uses parsed args to run the entirety of the file export pipeline. :param unparsed_args_list: Input a list of command line args. :type unparsed_args_list: list[str] """ #Returns after printing appropriate error message from parsing/connecting. args = parse_export(unparsed_args_list) alchemist = AlchemyHandler(database=args.database) alchemist.connect(ask_database=True, pipeline=True) alchemist.build_graph() # Exporting as a SQL file is not constricted by schema version. if args.pipeline != "sql": mysqldb.check_schema_compatibility(alchemist.engine, "export") values = [] if args.pipeline in FILTERABLE_PIPELINES: values = parse_value_input(args.input) if not args.pipeline in PIPELINES: print("ABORTED EXPORT: Unknown pipeline option discrepency.\n" "Pipeline parsed from command line args is not supported") sys.exit(1) if args.pipeline != "I": execute_export(alchemist, args.folder_path, args.folder_name, args.pipeline, table=args.table, values=values, filters=args.filters, groups=args.groups, sort=args.sort, include_columns=args.include_columns, exclude_columns=args.exclude_columns, sequence_columns=args.sequence_columns, raw_bytes=args.raw_bytes, concatenate=args.concatenate, verbose=args.verbose) else: pass
class TestPhamerationFunctions(unittest.TestCase): def setUp(self): # Create test database that contains data for several phages. test_db_utils.create_filled_test_db() self.alchemist = AlchemyHandler(database=DB, username=USER, password=PWD) self.alchemist.build_engine() self.engine = self.alchemist.engine self.temp_dir = "/tmp/pdm_utils_tests_phamerate" def tearDown(self): self.engine.dispose() test_db_utils.remove_db() run_dir = Path.cwd() err_file = run_dir.joinpath("error.log") if err_file.exists(): print("Found leftover blastclust file... removing") err_file.unlink() def test_1_get_pham_geneids(self): """Verify we get back a dictionary""" old_phams = get_pham_geneids(self.engine) # old_phams should be a dict self.assertEqual(type(old_phams), type(dict())) def test_2_get_pham_colors(self): """Verify we get back a dictionary""" old_colors = get_pham_colors(self.engine) # old_colors should be a dict self.assertEqual(type(old_colors), type(dict())) def test_3_get_pham_geneids_and_colors(self): """Verify both dictionaries have the same keys""" old_phams = get_pham_geneids(self.engine) old_colors = get_pham_colors(self.engine) # Can't have same keys without the same number of keys... with self.subTest(): self.assertEqual(len(old_phams), len(old_colors)) # Intersection should be equal to either set of keys - check against old_phams with self.subTest(): self.assertEqual( set(old_phams.keys()).intersection(set(old_colors.keys())), set(old_phams.keys())) def test_4_get_unphamerated_genes(self): """Verify we get back a set of length 0""" unphamerated = get_new_geneids(self.engine) # unphamerated should be a set with self.subTest(): self.assertEqual(type(unphamerated), type(set())) # pdm_test_db has 0 unphamerated genes with self.subTest(): self.assertEqual(len(unphamerated), 0) def test_5_map_geneids_to_translations(self): """Verify we get back a dictionary""" gs_to_ts = map_geneids_to_translations(self.engine) command = "SELECT distinct(GeneID) FROM gene" results = mysqldb_basic.query_dict_list(self.engine, command) # gs_to_ts should be a dictionary with self.subTest(): self.assertEqual(type(gs_to_ts), type(dict())) # gs_to_ts should have the right number of geneids with self.subTest(): self.assertEqual(len(gs_to_ts), len(results)) def test_6_map_translations_to_geneids(self): """Verify we get back a dictionary""" ts_to_gs = map_translations_to_geneids(self.engine) command = "SELECT distinct(CONVERT(Translation USING utf8)) FROM gene" results = mysqldb_basic.query_dict_list(self.engine, command) # ts_to_gs should be a dictionary with self.subTest(): self.assertEqual(type(ts_to_gs), type(dict())) # ts_to_gs should have the right number of translations with self.subTest(): self.assertEqual(len(ts_to_gs), len(results)) def test_7_refresh_tempdir_1(self): """Verify if no temp_dir, refresh can make one""" if not os.path.exists(self.temp_dir): refresh_tempdir(self.temp_dir) self.assertTrue(os.path.exists(self.temp_dir)) def test_8_refresh_tempdir_2(self): """Verify if temp_dir with something, refresh makes new empty one""" filename = f"{self.temp_dir}/test.txt" if not os.path.exists(self.temp_dir): refresh_tempdir(self.temp_dir) f = open(filename, "w") f.write("test\n") f.close() # Our test file should now exist with self.subTest(): self.assertTrue(os.path.exists(filename)) # Refresh temp_dir refresh_tempdir(self.temp_dir) # temp_dir should now exist, but test file should not with self.subTest(): self.assertTrue(os.path.exists(self.temp_dir)) with self.subTest(): self.assertFalse(os.path.exists(filename)) def test_9_write_fasta(self): """Verify file gets written properly""" filename = f"{self.temp_dir}/input.fasta" # refresh_tempdir refresh_tempdir(self.temp_dir) # Get translations to geneid mappings ts_to_gs = map_translations_to_geneids(self.engine) # Write fasta write_fasta(ts_to_gs, self.temp_dir) # Read fasta, make sure number of lines is 2x number of unique translations with open(filename, "r") as fh: lines = fh.readlines() with self.subTest(): self.assertEqual(len(lines), 2 * len(ts_to_gs)) # all odd-index lines should map to a key in ts_to_gs for i in range(len(lines)): if i % 2 == 1: with self.subTest(): self.assertTrue( lines[i].lstrip(">").rstrip() in ts_to_gs.keys()) # TODO: comment out this method if you don't have blast_2.2.14 binaries def test_10_create_blastdb(self): """Verify blastclust database gets made""" refresh_tempdir(self.temp_dir) db_file = f"{self.temp_dir}/sequenceDB" ts_to_gs = map_translations_to_geneids(self.engine) write_fasta(ts_to_gs, self.temp_dir) create_clusterdb("blast", self.temp_dir) # Check that database files were made for ext in ["phr", "pin", "psd", "psi", "psq"]: with self.subTest(): self.assertTrue(os.path.exists(f"{db_file}.{ext}")) def test_11_create_mmseqsdb(self): """Verify mmseqs database gets made""" refresh_tempdir(self.temp_dir) db_file = f"{self.temp_dir}/sequenceDB" ts_to_gs = map_translations_to_geneids(self.engine) write_fasta(ts_to_gs, self.temp_dir) create_clusterdb("mmseqs", self.temp_dir) # Check that database file was made self.assertTrue(os.path.exists(db_file)) def test_12_create_clusterdb(self): """Verify no database file gets made""" refresh_tempdir(self.temp_dir) db_file = f"{self.temp_dir}/sequenceDB" ts_to_gs = map_translations_to_geneids(self.engine) write_fasta(ts_to_gs, self.temp_dir) create_clusterdb("unknown", self.temp_dir) # Check that database file was not made self.assertFalse(os.path.exists(db_file)) # TODO: comment out this method if you don't have blast_2.2.14 binaries def test_13_phamerate_blast(self): """Verify we can phamerate with blastclust""" refresh_tempdir(self.temp_dir) ts_to_gs = map_translations_to_geneids(self.engine) write_fasta(ts_to_gs, self.temp_dir) create_clusterdb("blast", self.temp_dir) phamerate(get_program_params("blast"), "blast", self.temp_dir) # Make sure clustering output file exists self.assertTrue(os.path.exists(f"{self.temp_dir}/output.txt")) def test_14_phamerate_mmseqs(self): """Verify we can phamerate with mmseqs2""" refresh_tempdir(self.temp_dir) ts_to_gs = map_translations_to_geneids(self.engine) write_fasta(ts_to_gs, self.temp_dir) create_clusterdb("mmseqs", self.temp_dir) phamerate(get_program_params("mmseqs"), "mmseqs", self.temp_dir) # Make sure clustering output file exists self.assertTrue(os.path.exists(f"{self.temp_dir}/clusterDB.index")) def test_15_phamerate_unknown(self): """Verify we cannot phamerate with unknown""" refresh_tempdir(self.temp_dir) ts_to_gs = map_translations_to_geneids(self.engine) write_fasta(ts_to_gs, self.temp_dir) create_clusterdb("unknown", self.temp_dir) phamerate(get_program_params("unknown"), "unknown", self.temp_dir) # Make sure clustering output file does not exist self.assertFalse(os.path.exists(f"{self.temp_dir}/clusterDB")) # TODO: comment out this method if you don't have blast_2.2.14 binaries def test_16_parse_blast_output(self): """Verify we can open and parse blastclust output""" refresh_tempdir(self.temp_dir) ts_to_gs = map_translations_to_geneids(self.engine) write_fasta(ts_to_gs, self.temp_dir) create_clusterdb("blast", self.temp_dir) phamerate(get_program_params("blast"), "blast", self.temp_dir) phams = parse_output("blast", self.temp_dir) # The number of phams should be greater than 0 and less than or equal to # the number of distinct translations with self.subTest(): self.assertEqual(type(phams), type(dict())) with self.subTest(): self.assertGreater(len(phams), 0) with self.subTest(): self.assertLessEqual(len(phams), len(ts_to_gs)) def test_17_parse_mmseqs_output(self): """Verify we can open and parse MMseqs2 output""" refresh_tempdir(self.temp_dir) ts_to_gs = map_translations_to_geneids(self.engine) write_fasta(ts_to_gs, self.temp_dir) create_clusterdb("mmseqs", self.temp_dir) phamerate(get_program_params("mmseqs"), "mmseqs", self.temp_dir) phams = parse_output("mmseqs", self.temp_dir) # The number of phams should be greater than 0 and less than or equal to # the number of distinct translations with self.subTest(): self.assertEqual(type(phams), type(dict())) with self.subTest(): self.assertGreater(len(phams), 0) with self.subTest(): self.assertLessEqual(len(phams), len(ts_to_gs)) def test_18_parse_unknown_output(self): """Verify we cannot open and parse unknown output""" refresh_tempdir(self.temp_dir) ts_to_gs = map_translations_to_geneids(self.engine) write_fasta(ts_to_gs, self.temp_dir) create_clusterdb("unknown", self.temp_dir) phamerate(get_program_params("unknown"), "unknown", self.temp_dir) phams = parse_output("unknown", self.temp_dir) # The number of phams should be greater than 0 and less than or equal to # the number of distinct translations with self.subTest(): self.assertEqual(type(phams), type(dict())) with self.subTest(): self.assertEqual(len(phams), 0) def test_19_reintroduce_duplicates(self): """Verify that we can put de-duplicated GeneIDs back together""" refresh_tempdir(self.temp_dir) gs_to_ts = map_geneids_to_translations(self.engine) ts_to_gs = map_translations_to_geneids(self.engine) write_fasta(ts_to_gs, self.temp_dir) create_clusterdb("mmseqs", self.temp_dir) phamerate(get_program_params("mmseqs"), "mmseqs", self.temp_dir) new_phams = parse_output("mmseqs", self.temp_dir) re_duped_phams = reintroduce_duplicates(new_phams=new_phams, trans_groups=ts_to_gs, genes_and_trans=gs_to_ts) geneid_total = 0 for key in re_duped_phams.keys(): geneid_total += len(re_duped_phams[key]) # All geneids should be represented in the re_duped_phams self.assertEqual(geneid_total, len(gs_to_ts.keys())) def test_20_preserve_phams(self): """Verify that pham preservation seems to be working""" refresh_tempdir(self.temp_dir) old_phams = get_pham_geneids(self.engine) old_colors = get_pham_colors(self.engine) unphamerated = get_new_geneids(self.engine) gs_to_ts = map_geneids_to_translations(self.engine) ts_to_gs = map_translations_to_geneids(self.engine) write_fasta(ts_to_gs, self.temp_dir) create_clusterdb("mmseqs", self.temp_dir) phamerate(get_program_params("mmseqs"), "mmseqs", self.temp_dir) new_phams = parse_output("mmseqs", self.temp_dir) new_phams = reintroduce_duplicates(new_phams=new_phams, trans_groups=ts_to_gs, genes_and_trans=gs_to_ts) final_phams, new_colors = preserve_phams(old_phams=old_phams, new_phams=new_phams, old_colors=old_colors, new_genes=unphamerated) # Final phams should be a dict with same number of keys as new_phams # since we aren't re-dimensioning, just renaming some keys with self.subTest(): self.assertEqual(type(final_phams), type(dict())) with self.subTest(): self.assertEqual(len(final_phams), len(new_phams)) # New colors should be a dict with the same number of keys as # final_phams with self.subTest(): self.assertEqual(type(new_colors), type(dict())) with self.subTest(): self.assertEqual(len(new_colors), len(final_phams)) # Can't compare the keys or phams since there's no guarantee that # any of the phams were preserved but we can make sure all genes are # accounted for genes_1_count = len(unphamerated) for key in old_phams.keys(): genes_1_count += len(old_phams[key]) genes_2_count = 0 for key in new_phams.keys(): genes_2_count += len(new_phams[key]) with self.subTest(): self.assertEqual(genes_1_count, genes_2_count)
class TestUpdate(unittest.TestCase): @classmethod def setUpClass(self): test_db_utils.create_filled_test_db() @classmethod def tearDownClass(self): test_db_utils.remove_db() def setUp(self): self.alchemist = AlchemyHandler(database=DB, username=USER, password=PWD) self.alchemist.build_engine() test_folder.mkdir() # Standardize values in certain fields to define the data stmt1 = create_update("phage", "Status", "unknown") test_db_utils.execute(stmt1) stmt2 = create_update("phage", "HostGenus", "unknown") test_db_utils.execute(stmt2) stmt3 = create_update("phage", "Accession", "") test_db_utils.execute(stmt3) stmt4 = create_update("phage", "Cluster", "Z") test_db_utils.execute(stmt4) stmt5 = create_update("phage", "Subcluster", "Z1") test_db_utils.execute(stmt5) stmt6 = "UPDATE version SET Version = 0" test_db_utils.execute(stmt6) def tearDown(self): shutil.rmtree(test_folder) @patch("pdm_utils.pipelines.update_field.AlchemyHandler") def test_main_1(self, alchemy_mock): """Verify update runs with empty ticket table.""" alchemy_mock.return_value = self.alchemist create_update_table([], update_table) unparsed_args = get_unparsed_args(file=update_table) run.main(unparsed_args) version_table = test_db_utils.get_data(test_db_utils.version_table_query) phage_table = test_db_utils.get_data(test_db_utils.phage_table_query) data_dict = phage_id_dict(phage_table) alice = data_dict["Alice"] trixie = data_dict["Trixie"] # Nothing should be different. with self.subTest(): self.assertEqual(alice["HostGenus"], "unknown") with self.subTest(): self.assertEqual(trixie["HostGenus"], "unknown") with self.subTest(): self.assertEqual(version_table[0]["Version"], 0) @patch("pdm_utils.pipelines.update_field.AlchemyHandler") def test_main_2(self, alchemy_mock): """Verify update runs with five tickets in ticket table.""" alchemy_mock.return_value = self.alchemist host_genus = "Mycobacterium" cluster = "A" subcluster = "A2" status = "final" accession = "ABC123" tkt1 = get_alice_ticket("HostGenus", host_genus) tkt2 = get_alice_ticket("Cluster", cluster) tkt3 = get_alice_ticket("Subcluster", subcluster) tkt4 = get_alice_ticket("Status", status) tkt5 = get_alice_ticket("Accession", accession) tkts = [tkt1, tkt2, tkt3, tkt4, tkt5] create_update_table(tkts, update_table) unparsed_args = get_unparsed_args(file=update_table) run.main(unparsed_args) version_table = test_db_utils.get_data(test_db_utils.version_table_query) phage_table = test_db_utils.get_data(test_db_utils.phage_table_query) data_dict = phage_id_dict(phage_table) alice = data_dict["Alice"] trixie = data_dict["Trixie"] with self.subTest(): self.assertEqual(alice["HostGenus"], host_genus) with self.subTest(): self.assertEqual(alice["Cluster"], cluster) with self.subTest(): self.assertEqual(alice["Subcluster"], subcluster) with self.subTest(): self.assertEqual(alice["Accession"], accession) with self.subTest(): self.assertEqual(alice["Status"], status) # Just confirm that only Alice data was changed. with self.subTest(): self.assertEqual(trixie["HostGenus"], "unknown") with self.subTest(): self.assertEqual(version_table[0]["Version"], 0) @patch("pdm_utils.pipelines.update_field.AlchemyHandler") def test_main_3(self, alchemy_mock): """Verify version data is updated.""" alchemy_mock.return_value = self.alchemist unparsed_args = get_unparsed_args(version=True) run.main(unparsed_args) version_table = test_db_utils.get_data(test_db_utils.version_table_query) phage_table = test_db_utils.get_data(test_db_utils.phage_table_query) data_dict = phage_id_dict(phage_table) alice = data_dict["Alice"] with self.subTest(): self.assertEqual(version_table[0]["Version"], 1) # Just confirm that only version data was changed. with self.subTest(): self.assertEqual(alice["HostGenus"], "unknown") @patch("pdm_utils.pipelines.update_field.AlchemyHandler") def test_main_4(self, alchemy_mock): """Verify version data and phage table data are updated.""" alchemy_mock.return_value = self.alchemist host_genus = "Mycobacterium" tkt = get_alice_ticket("HostGenus", host_genus) create_update_table([tkt], update_table) unparsed_args = get_unparsed_args(file=update_table, version=True) run.main(unparsed_args) version_table = test_db_utils.get_data(test_db_utils.version_table_query) phage_table = test_db_utils.get_data(test_db_utils.phage_table_query) data_dict = phage_id_dict(phage_table) alice = data_dict["Alice"] with self.subTest(): self.assertEqual(alice["HostGenus"], host_genus) with self.subTest(): self.assertEqual(version_table[0]["Version"], 1)
class TestMysqldbBasic6(unittest.TestCase): @classmethod def setUpClass(self): test_db_utils.create_empty_test_db(db=DB2) test_db_utils.create_empty_test_db() phage_data1 = test_data_utils.get_trixie_phage_data() phage_data2 = test_data_utils.get_trixie_phage_data() phage_data3 = test_data_utils.get_trixie_phage_data() phage_data1["PhageID"] = "L5" phage_data2["PhageID"] = "Trixie" phage_data3["PhageID"] = "D29" phage_data1["HostGenus"] = "Mycobacterium" phage_data2["HostGenus"] = "Mycobacterium" phage_data3["HostGenus"] = "Gordonia" phage_data1["Accession"] = "ABC123" phage_data2["Accession"] = "XYZ456" phage_data3["Accession"] = "" phage_data1["Cluster"] = "A" phage_data2["Cluster"] = "B" phage_data3["Cluster"] = "NULL" phage_data1["Subcluster"] = "A1" phage_data2["Subcluster"] = "NULL" phage_data3["Subcluster"] = "NULL" phage_data1["Sequence"] = "atcg" phage_data2["Sequence"] = "AATT" phage_data3["Sequence"] = "GGCC" phage_data1["Length"] = 6 phage_data2["Length"] = 4 phage_data3["Length"] = 5 phage_data1["DateLastModified"] = constants.EMPTY_DATE phage_data2["DateLastModified"] = constants.EMPTY_DATE phage_data3["DateLastModified"] = constants.EMPTY_DATE phage_data_list = [phage_data1, phage_data2, phage_data3] for phage_data in phage_data_list: test_db_utils.insert_data(PHAGE, phage_data) gene_data1 = test_data_utils.get_trixie_gene_data() gene_data2 = test_data_utils.get_trixie_gene_data() gene_data3 = test_data_utils.get_trixie_gene_data() gene_data4 = test_data_utils.get_trixie_gene_data() gene_data1["PhageID"] = "Trixie" gene_data2["PhageID"] = "Trixie" gene_data3["PhageID"] = "Trixie" gene_data4["PhageID"] = "D29" gene_data1["GeneID"] = "Trixie_1" gene_data2["GeneID"] = "Trixie_2" gene_data3["GeneID"] = "Trixie_3" gene_data4["GeneID"] = "D29_1" gene_data_list = [gene_data1, gene_data2, gene_data3, gene_data4] for gene_data in gene_data_list: test_db_utils.insert_data(GENE, gene_data) @classmethod def tearDownClass(self): test_db_utils.remove_db() test_db_utils.remove_db(db=DB2) def setUp(self): self.alchemist1 = AlchemyHandler(database=DB, username=USER, password=PWD) self.alchemist1.build_engine() self.engine1 = self.alchemist1.engine self.alchemist2 = AlchemyHandler(database=DB2, username=USER, password=PWD) self.alchemist2.build_engine() self.engine2 = self.alchemist2.engine def tearDown(self): self.engine1.dispose() self.engine2.dispose() def test_get_distinct_1(self): """Retrieve a set of all distinct values when data is not present.""" result = mysqldb_basic.get_distinct(self.engine2, "phage", "PhageID") exp = set() self.assertEqual(result, exp) def test_get_distinct_2(self): """Retrieve a set of all distinct values when data is present.""" result1 = mysqldb_basic.get_distinct( self.engine1, "phage", "PhageID") result2 = mysqldb_basic.get_distinct( self.engine1, "phage", "HostGenus", null="test") result3 = mysqldb_basic.get_distinct( self.engine1, "phage", "Accession") result4 = mysqldb_basic.get_distinct( self.engine1, "phage", "Cluster", null="Singleton") result5 = mysqldb_basic.get_distinct( self.engine1, "phage", "Subcluster", null="none") exp_phage_id = {"L5", "Trixie", "D29"} exp_host_genus = {"Mycobacterium", "Gordonia"} exp_accession = {"ABC123", "XYZ456", ""} exp_cluster = {"A", "B", "Singleton"} exp_subcluster = {"A1", "none"} with self.subTest(): self.assertEqual(result1, exp_phage_id) with self.subTest(): self.assertEqual(result2, exp_host_genus) with self.subTest(): self.assertEqual(result3, exp_accession) with self.subTest(): self.assertEqual(result4, exp_cluster) with self.subTest(): self.assertEqual(result5, exp_subcluster) def test_retrieve_data_1(self): """Verify that a dictionary of data is retrieved for a valid PhageID.""" result_list = mysqldb_basic.retrieve_data( self.engine1, column="PhageID", id_list=["L5"], query=PHAGE_QUERY) with self.subTest(): self.assertEqual(len(result_list[0].keys()), 14) with self.subTest(): self.assertEqual(result_list[0]["PhageID"], "L5") def test_retrieve_data_2(self): """Verify that an empty list is retrieved for an invalid PhageID.""" result_list = mysqldb_basic.retrieve_data( self.engine1, column="PhageID", id_list=["EagleEye"], query=PHAGE_QUERY) self.assertEqual(len(result_list), 0) def test_retrieve_data_3(self): """Verify that dictionaries of data are retrieved for a list of two valid PhageIDs.""" result_list = mysqldb_basic.retrieve_data( self.engine1, column="PhageID", id_list=["L5","Trixie"], query=PHAGE_QUERY) self.assertEqual(len(result_list), 2) def test_retrieve_data_4(self): """Verify that dictionaries of data are retrieved for a list of three valid PhageIDs and one invalid PhageID.""" result_list = mysqldb_basic.retrieve_data( self.engine1, column="PhageID", id_list=["L5","Trixie","EagleEye","D29"], query=PHAGE_QUERY) self.assertEqual(len(result_list), 3) def test_retrieve_data_5(self): """Verify that dictionaries of data are retrieved for multiple valid PhageIDs when no list is provided.""" result_list = mysqldb_basic.retrieve_data( self.engine1, query=PHAGE_QUERY) self.assertEqual(len(result_list), 3) def test_retrieve_data_6(self): """Verify that a list of CDS data is retrieved for a valid PhageID.""" result_list = mysqldb_basic.retrieve_data( self.engine1, column="PhageID", id_list=["Trixie"], query=GENE_QUERY) with self.subTest(): self.assertEqual(len(result_list), 3) with self.subTest(): self.assertEqual(len(result_list[0].keys()), 13) with self.subTest(): self.assertEqual(result_list[0]["PhageID"], "Trixie") def test_retrieve_data_7(self): """Verify that an empty list of CDS data is retrieved for an invalid PhageID.""" result_list = mysqldb_basic.retrieve_data( self.engine1, column="PhageID", id_list=["L5"], query=GENE_QUERY) self.assertEqual(len(result_list), 0) def test_retrieve_data_8(self): """Verify that a list of all CDS data is retrieved when no PhageID is provided.""" result_list = mysqldb_basic.retrieve_data( self.engine1, query=GENE_QUERY) self.assertEqual(len(result_list), 4)