def test_get_table_1(self, translate_table_mock): """Verify translate_table() is called with the correct parameters. """ translate_table_mock.return_value = "phage" querying.get_table(self.metadata, "phage") translate_table_mock.assert_called_with(self.metadata, "phage")
def parse_feature_data(alchemist, values=[], limit=8000): """Returns Cds objects containing data parsed from a MySQL database. :param alchemist: A connected and fully built AlchemyHandler object. :type alchemist: AlchemyHandler :param values: List of GeneIDs upon which the query can be conditioned. :type values: list[str] """ gene_table = querying.get_table(alchemist.metadata, "gene") primary_key = list(gene_table.primary_key.columns)[0] cds_data_columns = list(gene_table.c) cds_data_query = querying.build_select(alchemist.graph, cds_data_columns) cds_data = querying.execute(alchemist.engine, cds_data_query, in_column=primary_key, values=values, limit=limit) cds_list = [] for data_dict in cds_data: cds_ftr = mysqldb.parse_gene_table_data(data_dict) cds_list.append(cds_ftr) return cds_list
def key(self, key): if isinstance(key, Column): self._key = key elif isinstance(key, str): if self.graph is None: raise ValueError("String key input requires MySQL connection.") metadata = self.graph.graph["metadata"] try: self._key = q.get_column(self.graph.graph["metadata"], key) except: try: table_obj = q.get_table(metadata, key) except: raise ValueError("Inputted string key is neither a valid " "MySQL column or table.") self._key = list(table_obj.primary_key.columns)[0] else: raise TypeError("Filter key value is invalid." "Filter key must be one of the following: \n" "SQLAlchemy Column\n" "MySQL column string\n" "MySQL table string\n")
def main(unparsed_args_list): """Run main get_gb_records pipeline.""" # Parse command line arguments args = parse_args(unparsed_args_list) # Filters input: phage.Status=draft AND phage.HostGenus=Mycobacterium # Args structure: [['phage.Status=draft'], ['phage.HostGenus=Mycobacterium']] filters = args.filters ncbi_cred_dict = ncbi.get_ncbi_creds(args.ncbi_credentials_file) output_folder = basic.set_path(args.output_folder, kind="dir", expect=True) working_dir = pathlib.Path(RESULTS_FOLDER) working_path = basic.make_new_dir(output_folder, working_dir, attempt=50) if working_path is None: print(f"Invalid working directory '{working_dir}'") sys.exit(1) # Verify database connection and schema compatibility. print("Connecting to the MySQL database...") alchemist = AlchemyHandler(database=args.database) alchemist.connect(pipeline=True) engine = alchemist.engine mysqldb.check_schema_compatibility(engine, "the get_gb_records pipeline") # Get SQLAlchemy metadata Table object # table_obj.primary_key.columns is a # SQLAlchemy ColumnCollection iterable object # Set primary key = 'phage.PhageID' alchemist.build_metadata() table = querying.get_table(alchemist.metadata, TARGET_TABLE) for column in table.primary_key.columns: primary_key = column # Create filter object and then add command line filter strings db_filter = Filter(alchemist=alchemist, key=primary_key) db_filter.values = [] # Attempt to add filters and exit if needed. add_filters(db_filter, filters) # Performs the query db_filter.update() # db_filter.values now contains list of PhageIDs that pass the filters. # Get the accessions associated with these PhageIDs. keep_set = set(db_filter.values) # Create data sets print("Retrieving accessions from the database...") query = construct_accession_query(keep_set) list_of_dicts = mysqldb_basic.query_dict_list(engine, query) id_acc_dict = get_id_acc_dict(list_of_dicts) acc_id_dict = get_acc_id_dict(id_acc_dict) engine.dispose() if len(acc_id_dict.keys()) > 0: get_data(working_path, acc_id_dict, ncbi_cred_dict) else: print("There are no records to retrieve.")
def test_execute_2(self): """Verify execute() retrieves expected data. """ where_clause = querying.build_where_clause(self.graph, "phage.Cluster=A") phage_table = querying.get_table(self.metadata, "phage") select = querying.build_select(self.graph, phage_table, where=where_clause) results = querying.execute(self.engine, select) for result in results: self.assertEqual(result["Cluster"], "A")
def test_execute_1(self): """Verify execute() correctly executes SQLAlchemy select objects. """ where_clause = querying.build_where_clause(self.graph, "phage.Cluster=A") phage_table = querying.get_table(self.metadata, "phage") select = querying.build_select(self.graph, phage_table, where=where_clause) results = querying.execute(self.engine, select) result_keys = results[0].keys() self.assertTrue("PhageID" in result_keys) self.assertTrue("Cluster" in result_keys) self.assertTrue("Subcluster" in result_keys)
def test_execute_value_subqueries(self): """Verify execute_value_subqueries() retrieves expected data. """ where_clause = querying.build_where_clause(self.graph, "phage.Cluster=A") phage_table = querying.get_table(self.metadata, "phage") phageid = querying.get_column(self.metadata, "phage.PhageID") select = querying.build_select(self.graph, phage_table, where=where_clause) results = querying.execute_value_subqueries( self.engine, select, phageid, ["Trixie", "D29", "Alice", "Myrna"], limit=2) for result in results: self.assertEqual(result["Cluster"], "A")
def test_get_table_3(self): """Verify get_table() raises ValueError from invalid table name. """ with self.assertRaises(ValueError): querying.get_table(self.metadata, "not_a_table")
def test_get_table_2(self): """Verify get_table() operates case insensitive. """ self.assertEqual(querying.get_table(self.metadata, "pHAgE"), self.phage)
def test_get_table_1(self): """Verify get_table() retrieves correct Table. """ self.assertEqual(querying.get_table(self.metadata, "phage"), self.phage)
def test_get_table_2(self): """Verify get_table() returns the correct Table object. """ table_obj = querying.get_table(self.metadata, "phage") self.assertEqual(table_obj, self.phage)
def main(unparsed_args_list): """Run main freeze database pipeline.""" args = parse_args(unparsed_args_list) ref_database = args.database reset = args.reset new_database = args.new_database_name prefix = args.prefix # Filters input: phage.Status=draft AND phage.HostGenus=Mycobacterium # Args structure: [['phage.Status=draft'], ['phage.HostGenus=Mycobacterium']] filters = args.filters # Create config object with data obtained from file and/or defaults. config = configfile.build_complete_config(args.config_file) mysql_creds = config["mysql"] # Verify database connection and schema compatibility. print("Connecting to the MySQL database...") alchemist1 = AlchemyHandler(database=ref_database, username=mysql_creds["user"], password=mysql_creds["password"]) alchemist1.connect(pipeline=True) engine1 = alchemist1.engine mysqldb.check_schema_compatibility(engine1, "the freeze pipeline") # Get SQLAlchemy metadata Table object # table_obj.primary_key.columns is a # SQLAlchemy ColumnCollection iterable object # Set primary key = 'phage.PhageID' alchemist1.build_metadata() table = querying.get_table(alchemist1.metadata, TARGET_TABLE) for column in table.primary_key.columns: primary_key = column # Create filter object and then add command line filter strings db_filter = Filter(alchemist=alchemist1, key=primary_key) db_filter.values = [] # Attempt to add filters and exit if needed. add_filters(db_filter, filters) # Performs the query db_filter.update() # db_filter.values now contains list of PhageIDs that pass the filters. # Get the number of genomes that will be retained and build the # MYSQL DELETE statement. keep_set = set(db_filter.values) delete_stmt = construct_delete_stmt(TARGET_TABLE, primary_key, keep_set) count_query = construct_count_query(TARGET_TABLE, primary_key, keep_set) phage_count = mysqldb_basic.scalar(alchemist1.engine, count_query) # Determine the name of the new database. if new_database is None: if prefix is None: prefix = get_prefix() new_database = f"{prefix}_{phage_count}" # Create the new database, but prevent overwriting of current database. if engine1.url.database != new_database: result = mysqldb_basic.drop_create_db(engine1, new_database) else: print( "Error: names of the reference and frozen databases are the same.") print("No database will be created.") result = 1 # Copy database. if result == 0: print(f"Reference database: {ref_database}") print(f"New database: {new_database}") result = mysqldb_basic.copy_db(engine1, new_database) if result == 0: print(f"Deleting genomes...") alchemist2 = AlchemyHandler(database=new_database, username=engine1.url.username, password=engine1.url.password) alchemist2.connect(pipeline=True) engine2 = alchemist2.engine engine2.execute(delete_stmt) if reset: engine2.execute(RESET_VERSION) # Close up all connections in the connection pool. engine2.dispose() else: print("Unable to copy the database.") # Close up all connections in the connection pool. engine1.dispose() else: print(f"Error creating new database: {new_database}.") print("Freeze database script completed.")
def test_get_table_2(self): table_obj = querying.get_table(self.metadata, "phage") self.assertEqual(table_obj, self.phage)
def test_get_table_1(self, TranslateTable): TranslateTable.return_value = "phage" querying.get_table(self.metadata, "phage") TranslateTable.assert_called_with(self.metadata, "phage")