def setUp(self, is_instance_mock): is_instance_mock.return_value = True alchemist = Mock(spec=AlchemyHandler) engine = Mock(spec=Engine) graph = Mock(spec=Graph) session = Mock(spec=Session) mapper = Mock(spec=DeclarativeMeta) key = Mock(spec=Column) proxy = Mock() type(alchemist).engine = PropertyMock(return_value=engine) type(alchemist).graph = PropertyMock(return_value=graph) type(alchemist).session = PropertyMock(return_value=session) type(alchemist).mapper = PropertyMock(return_value=mapper) type(alchemist).connected = PropertyMock(return_value=True) engine.execute.return_value = proxy proxy.fetchall.return_value = [] self.mock_isinstance = is_instance_mock self.mock_alchemist = alchemist self.mock_engine = engine self.mock_graph = graph self.mock_session = session self.mock_mapper = mapper self.mock_key = key self.db_filter = Filter(alchemist=alchemist, key=key) self.assertEqual(self.db_filter.engine, self.mock_engine) self.assertEqual(self.db_filter.graph, self.mock_graph)
def get_cds_seqrecords(alchemist, values, data_cache=None, nucleotide=False, verbose=False): if data_cache is None: data_cache = {} cds_list = parse_feature_data(alchemist, values=values) db_filter = Filter(alchemist) db_filter.key = 'gene.GeneID' if verbose: print("...Converting SQL data...") seqrecords = [] for cds in cds_list: parent_genome = data_cache.get(cds.genome_id) if parent_genome is None: parent_genome = get_single_genome(alchemist, cds.genome_id, data_cache=data_cache) cds.genome_length = parent_genome.length cds.set_seqfeature() db_filter.values = [cds.id] gene_domains = db_filter.select(CDD_DATA_COLUMNS) record = flat_files.cds_to_seqrecord(cds, parent_genome, gene_domains=gene_domains) seqrecords.append(record) return seqrecords
def main(unparsed_args_list): """Run main get_gb_records pipeline.""" # Parse command line arguments args = parse_args(unparsed_args_list) # Filters input: phage.Status=draft AND phage.HostGenus=Mycobacterium # Args structure: [['phage.Status=draft'], ['phage.HostGenus=Mycobacterium']] filters = args.filters ncbi_cred_dict = ncbi.get_ncbi_creds(args.ncbi_credentials_file) output_folder = basic.set_path(args.output_folder, kind="dir", expect=True) working_dir = pathlib.Path(RESULTS_FOLDER) working_path = basic.make_new_dir(output_folder, working_dir, attempt=50) if working_path is None: print(f"Invalid working directory '{working_dir}'") sys.exit(1) # Verify database connection and schema compatibility. print("Connecting to the MySQL database...") alchemist = AlchemyHandler(database=args.database) alchemist.connect(pipeline=True) engine = alchemist.engine mysqldb.check_schema_compatibility(engine, "the get_gb_records pipeline") # Get SQLAlchemy metadata Table object # table_obj.primary_key.columns is a # SQLAlchemy ColumnCollection iterable object # Set primary key = 'phage.PhageID' alchemist.build_metadata() table = querying.get_table(alchemist.metadata, TARGET_TABLE) for column in table.primary_key.columns: primary_key = column # Create filter object and then add command line filter strings db_filter = Filter(alchemist=alchemist, key=primary_key) db_filter.values = [] # Attempt to add filters and exit if needed. add_filters(db_filter, filters) # Performs the query db_filter.update() # db_filter.values now contains list of PhageIDs that pass the filters. # Get the accessions associated with these PhageIDs. keep_set = set(db_filter.values) # Create data sets print("Retrieving accessions from the database...") query = construct_accession_query(keep_set) list_of_dicts = mysqldb_basic.query_dict_list(engine, query) id_acc_dict = get_id_acc_dict(list_of_dicts) acc_id_dict = get_acc_id_dict(id_acc_dict) engine.dispose() if len(acc_id_dict.keys()) > 0: get_data(working_path, acc_id_dict, ncbi_cred_dict) else: print("There are no records to retrieve.")
def setUp(self): self.review_test_dir = self.test_dir.joinpath("review_test_dir") self.alchemist = AlchemyHandler() self.alchemist.username = USER self.alchemist.password = PWD self.alchemist.database = DB self.alchemist.connect(ask_database=True, login_attempts=0) self.db_filter = Filter(alchemist=self.alchemist) self.db_filter.add(review.BASE_CONDITIONALS) self.db_filter.key = "gene.PhamID"
def setUp(self): alchemist = AlchemyHandler() alchemist.username = "******" alchemist.password = "******" alchemist.database = "test_db" alchemist.connect() alchemist.build_graph() self.alchemist = alchemist self.db_filter = Filter(alchemist=self.alchemist) phageid = self.alchemist.get_column("phage.PhageID") self.phageid = phageid
def build_filter(alchemist, key, filters, values=None, verbose=False): """Applies MySQL WHERE clause filters using a Filter. :param alchemist: A connected and fully built AlchemyHandler object. :type alchemist: AlchemyHandler :param table: MySQL table name. :type table: str :param filters: A list of lists with filter values, grouped by ORs. :type filters: list[list[str]] :param groups: A list of supported MySQL column names. :type groups: list[str] :returns: filter-Loaded Filter object. :rtype: Filter """ db_filter = Filter(alchemist=alchemist) db_filter.key = key db_filter.values = values if filters != "": try: db_filter.add(filters) except: print("Please check your syntax for the conditional string: " f"{filters}") exit(1) db_filter.parenthesize() return db_filter
def setUp(self, IsInstance): IsInstance.return_value = True alchemist = Mock() engine = Mock() graph = Mock() key = Mock() alchemist.engine.return_value = engine alchemist.graph.return_value = graph alchemist.connected.return_value = True self.mock_isinstance = IsInstance self.mock_alchemist = alchemist self.mock_engine = engine self.mock_graph = graph self.mock_key = key self.db_filter = Filter(alchemist=alchemist, key=key)
def setUp(self): alchemist = AlchemyHandler() alchemist.username=user alchemist.password=pwd alchemist.database=db alchemist.connect() self.alchemist = alchemist self.db_filter = Filter(alchemist=self.alchemist) self.phage = self.alchemist.metadata.tables["phage"] self.gene = self.alchemist.metadata.tables["gene"] self.trna = self.alchemist.metadata.tables["trna"] self.PhageID = self.phage.c.PhageID self.Cluster = self.phage.c.Cluster self.Subcluster = self.phage.c.Subcluster self.Notes = self.gene.c.Notes
def setUp(self): self.alchemist = AlchemyHandler() self.alchemist.username=USER self.alchemist.password=PWD self.alchemist.database=DB self.alchemist.connect(ask_database=True, login_attempts=0) self.alchemist.build_graph() self.db_filter = Filter(alchemist=self.alchemist) self.export_test_dir = self.test_dir.joinpath("export_test_dir")
def get_acc_id_dict(alchemist): """Test helper function to retrieve accessions of database entries. """ db_filter = Filter(alchemist=alchemist) db_filter.key = "phage.PhageID" db_filter.values = db_filter.build_values() groups = db_filter.group("phage.Accession") return groups
class TestFilter(unittest.TestCase): @patch("pdm_utils.classes.filter.isinstance") def setUp(self, is_instance_mock): is_instance_mock.return_value = True alchemist = Mock(spec=AlchemyHandler) engine = Mock(spec=Engine) graph = Mock(spec=Graph) session = Mock(spec=Session) mapper = Mock(spec=DeclarativeMeta) key = Mock(spec=Column) proxy = Mock() type(alchemist).engine = PropertyMock(return_value=engine) type(alchemist).graph = PropertyMock(return_value=graph) type(alchemist).session = PropertyMock(return_value=session) type(alchemist).mapper = PropertyMock(return_value=mapper) type(alchemist).connected = PropertyMock(return_value=True) engine.execute.return_value = proxy proxy.fetchall.return_value = [] self.mock_isinstance = is_instance_mock self.mock_alchemist = alchemist self.mock_engine = engine self.mock_graph = graph self.mock_session = session self.mock_mapper = mapper self.mock_key = key self.db_filter = Filter(alchemist=alchemist, key=key) self.assertEqual(self.db_filter.engine, self.mock_engine) self.assertEqual(self.db_filter.graph, self.mock_graph) def test_updated_1(self): """Verify that the upload property portrays Filter._upload. """ self.db_filter._updated = True self.assertTrue(self.db_filter.updated) def test_updated_2(self): """Verify that upload property is immutable. """ with self.assertRaises(AttributeError): self.db_filter.updated = True def test_values_valid_1(self): """Verify that the values_valid property portrays Filter._values_valid. """ self.db_filter._values_valid = True self.assertTrue(self.db_filter.values_valid) def test_values_valid_2(self): """Verify that the values_valid property is immutable. """ with self.assertRaises(AttributeError): self.db_filter.values_valid = True def test_connected_1(self): """Verify that the connected property portrays Filter._connected. """ self.db_filter._connected = True self.assertTrue(self.db_filter.connected) def test_connected_2(self): """Verify that the connected property is immutable. """ with self.assertRaises(AttributeError): self.db_filter.connected = True def test_engine_1(self): """Verify that the engine property portrays Filter._engine. """ self.db_filter._engine = self.mock_engine self.assertEqual(self.db_filter.engine, self.mock_engine) def test_graph_1(self): """Verify that the graph property portrays Filter._graph. """ self.db_filter._graph = self.mock_graph self.assertEqual(self.db_filter.graph, self.mock_graph) def test_session_1(self): """Verify that the session property portrays Filter._session. """ self.db_filter._session = self.mock_session self.assertEqual(self.db_filter.session, self.mock_session) def test_mapper_1(self): """Verify that the mapper property portrays Filter._mapper. """ self.db_filter._mapper = self.mock_mapper self.assertEqual(self.db_filter.mapper, self.mock_mapper) def test_values_1(self): """Verify that the values property portrays Filter._values. """ self.db_filter._values = ["Test1", "Test2"] self.assertEqual(self.db_filter.values, ["Test1", "Test2"]) def test_values_2(self): """Verify that the values property can set Filter._values. """ self.db_filter.values = ["Test1", "Test2"] self.assertEqual(self.db_filter.values, ["Test1", "Test2"]) def test_values_3(self): """Verify that the values property modifies the values_valid property. """ self.db_filter.values = ["Test1", "Test2"] self.assertFalse(self.db_filter.values_valid) def test_values_2(self): """Verify that the values property raises TypeError on invalid input. """ with self.assertRaises(TypeError): self.db_filter.values = "Hello" def test_key_1(self): """Verify that the key property modifies the Filter._key """ self.db_filter.key = self.mock_key self.assertEqual(self.db_filter.key, self.mock_key) def test_key_2(self): """Verify that the key property raises TypeError on invalid input. """ with self.assertRaises(TypeError): self.db_filter.key = Mock() @patch("pdm_utils.classes.filter.AlchemyHandler") def test_connect_1(self, alchemyhandler_mock): """Verfiy that connect() returns when Filter is already connected. """ alchemyhandler_mock.return_value = self.mock_alchemist self.db_filter._connected = True self.db_filter.connect() alchemyhandler_mock.assert_not_called() @patch("pdm_utils.classes.filter.AlchemyHandler") def test_connect_2(self, alchemyhandler_mock): """Verify that the Filter creates an AlchemyHandler. """ alchemyhandler_mock.return_value = self.mock_alchemist self.db_filter._connected = False self.db_filter.connect() alchemyhandler_mock.assert_called() @patch("pdm_utils.classes.filter.AlchemyHandler") def test_connect_3(self, alchemyhandler_mock): """Verify that the Filter uses an AlchemyHandler to connect. """ alchemyhandler_mock.return_value = self.mock_alchemist self.db_filter._connected = False self.db_filter.connect() self.mock_alchemist.connect.assert_called() @patch("pdm_utils.classes.filter.isinstance") def test_link_1(self, isinstance_mock): """Verify link() calls isinstance(). """ self.db_filter.link(self.mock_alchemist) isinstance_mock.assert_called() def test_link_2(self): """Verify link() raises TypeError upon bad alchemist input. """ with self.assertRaises(TypeError): self.db_filter.link("Bad input") def test_link_3(self): """Verify function structure of link(). """ type(self.mock_alchemist).connected = PropertyMock(return_value=False) type(self.mock_alchemist).graph = PropertyMock(return_value=None) self.db_filter.link(self.mock_alchemist) self.mock_alchemist.connect.assert_called() @patch("pdm_utils.classes.filter.Filter.connect") def test_check_1(self, connect_mock): """Verify that the Filter will connect if not connected. """ self.db_filter._connected = False self.db_filter.check() connect_mock.assert_called() @patch("pdm_utils.classes.filter.isinstance") def test_check_2(self, is_instance_mock): """Verify that the Filter calls isinstance() with correct paremeters. """ is_instance_mock.return_value = True self.db_filter.check() is_instance_mock.assert_any_call(self.mock_engine, Engine) is_instance_mock.assert_any_call(self.mock_graph, Graph) is_instance_mock.assert_any_call(self.mock_key, Column) def test_check_3(self): """Verify that the Filter raises AttributeError with an invalid engine. """ self.db_filter._engine = "Not a valid engine" with self.assertRaises(AttributeError): self.db_filter.check() def test_check_4(self): """Verify that the Filter raises AttributeError with an invalid graph. """ self.db_filter._graph = "Not a valid graph" with self.assertRaises(AttributeError): self.db_filter.check() def test_check_5(self): """Verify that the Filter raises AttibuteError with an invalid key. """ self.db_filter._key = "Not a valid key" with self.assertRaises(AttributeError): self.db_filter.check() @patch("pdm_utils.classes.filter.q.build_distinct") @patch("pdm_utils.classes.filter.isinstance") def test_build_values_1(self, is_instance_mock, build_distinct_mock): """Verify that build_distinct() is called with correct parameters. """ is_instance_mock.return_value = True self.db_filter.build_values(where="Not a list") build_distinct_mock.assert_called_with(self.mock_graph, self.mock_key, where="Not a list", add_in=self.mock_key) @patch("pdm_utils.classes.filter.Filter.check") def test_transpose_1(self, check_mock): """Verify that transpose() returns without values. """ self.db_filter.transpose("gene.Notes") check_mock.assert_called() @patch("pdm_utils.classes.filter.Filter.check") def test_mass_transpose_1(self, check_mock): """Verify that mass_tranpose() returns without values. """ self.db_filter.mass_transpose("Column") check_mock.assert_called() @patch("pdm_utils.classes.filter.q.build_distinct") @patch("pdm_utils.classes.filter.Filter.check") def test_retrieve_1(self, check_mock, build_distinct_mock): """Verify that retrieve() returns without values. """ self.db_filter.retrieve("Column") check_mock.assert_called() build_distinct_mock.assert_not_called() @patch("pdm_utils.classes.filter.Filter.check") @patch("pdm_utils.classes.filter.Filter.build_values") def test_refresh_1(self, build_values_mock, check_mock): """Verify that refresh() returns without values. """ self.db_filter.refresh() check_mock.assert_called() build_values_mock.assert_not_called() @patch("pdm_utils.classes.filter.Filter.check") @patch("pdm_utils.classes.filter.Filter.build_values") def test_refresh_2(self, build_values_mock, check_mock): """Verify that refresh() calls build_values() and conserves values. """ build_values_mock.return_value = ["Phage"] self.db_filter._values_valid = False self.db_filter.refresh() check_mock.assert_called() build_values_mock.assert_called() self.assertTrue(self.db_filter.values_valid) self.assertEqual(self.db_filter.values, ["Phage"]) @patch("pdm_utils.classes.filter.Filter.check") @patch("pdm_utils.classes.filter.Filter.refresh") @patch("pdm_utils.classes.filter.Filter.build_values") def test_update_1(self, build_values_mock, Refresh, check_mock): """Verify update() returns without values. """ self.db_filter.update() check_mock.assert_called() Refresh.assert_not_called() build_values_mock.assert_not_called() @patch("pdm_utils.classes.filter.Filter.check") @patch("pdm_utils.classes.filter.Filter.refresh") @patch("pdm_utils.classes.filter.Filter.build_values") def test_update_2(self, build_values_mock, Refresh, check_mock): """Verify update() refreshes values before updating. """ self.db_filter._values_valid = False self.db_filter.update() check_mock.assert_called() Refresh.assert_called() build_values_mock.assert_not_called() @patch("pdm_utils.classes.filter.Filter.check") @patch("pdm_utils.classes.filter.Filter.refresh") @patch("pdm_utils.classes.filter.Filter.build_values") def test_update_3(self, build_values_mock, Refresh, check_mock): """Verify function structure of update(). """ self.db_filter._values_valid = False self.db_filter._updated = False self.db_filter.update() check_mock.assert_called() Refresh.assert_called() build_values_mock.assert_called() self.assertTrue(self.db_filter._values_valid) self.assertTrue(self.db_filter._values_valid) @patch("pdm_utils.classes.filter.q.first_column") @patch("pdm_utils.classes.filter.q.build_select") @patch("pdm_utils.classes.filter.Filter.get_column") def test_sort_1(self, get_column_mock, build_select_mock, first_column_mock): """Verify function structure of sort(). """ self.db_filter._values_valid = False first_column_mock.return_value = ["Phage"] self.db_filter.sort("column") get_column_mock.assert_called() build_select_mock.assert_called() first_column_mock.assert_called() self.assertTrue(self.db_filter._values_valid) self.assertEqual(self.db_filter.values, ["Phage"]) def test_sort_2(self): """Verify that sort() raises TypeError at bad ORDER BY input. """ with self.assertRaises(TypeError): self.db_filter.sort(None) def test_reset_1(self): """Verify that reset() clears filters. """ self.db_filter._filters = [{"Expression": "Some Whereclause"}] self.db_filter.reset() self.assertEqual(self.db_filter.filters, []) def test_reset_2(self): """Verify that reset() clears values. """ self.db_filter._values = ["Phage1", "Phage2", "Phage3"] self.db_filter.reset() self.assertEqual(self.db_filter.values, []) def test_reset_3(self): """Verify that reset() sets values_valid Filter property. """ self.db_filter._values_valid = False self.db_filter.reset() self.assertTrue(self.db_filter.values_valid) def test_reset_4(self): """Verify that reset() sets updated Filter property. """ self.db_filter._updated = False self.db_filter.reset() self.assertTrue(self.db_filter.updated) def test_reset_5(self): """Verify that reset sets on_index property. """ self.db_filter._on_index = 5 self.db_filter.reset() self.assertEqual(self.db_filter.or_index, -1) def test_hits_1(self): """Verify that hits() accurately portrays no values. """ self.assertEqual(self.db_filter.hits(), 0) def test_hits_2(self): """Verify that hits() accurately portrays a number of values. """ self.db_filter._values = ["Phage1", "Phage2", "Phage3"] self.assertEqual(self.db_filter.hits(), 3) def test_copy_1(self): """Verify that copy() reflects a Filter's updated property. """ self.db_filter._updated = False copy_filter = self.db_filter.copy() self.assertEqual(copy_filter.updated, self.db_filter.updated) def test_copy_2(self): """Verify that copy() reflects a Filter's values_valid property. """ copy_filter = self.db_filter.copy() self.assertEqual(copy_filter.values_valid, self.db_filter.values_valid) def test_copy_3(self): """Verify that copy() reflects a Filter's filters property. """ copy_filter = self.db_filter.copy() self.assertEqual(copy_filter.filters, self.db_filter.filters) def test_copy_4(self): """Verify that copy() reflects a Filter's engine property. """ copy_filter = self.db_filter.copy() self.assertEqual(copy_filter.engine, self.db_filter.engine) def test_copy_5(self): """Verify that copy() reflects a Filter's graph property. """ copy_filter = self.db_filter.copy() self.assertEqual(copy_filter.graph, self.db_filter.graph) def test_copy_6(self): """Verify that copy() reflects a Fitler's key property. """ copy_filter = self.db_filter.copy() self.assertEqual(copy_filter.key, self.db_filter.key) def test_copy_7(self): """Verify that copy() reflects a Filter's values property. """ self.db_filter._values = ["Values"] copy_filter = self.db_filter.copy() self.assertEqual(copy_filter.values, self.db_filter.values) def test_copy_8(self): """Verify that copy() reflects a Filter's connected property. """ self.db_filter._connected = True copy_filter = self.db_filter.copy() self.assertEqual(copy_filter._connected, self.db_filter._connected) def test_copy_filters_1(self): """Verify that copy_filters() replicates filters. """ self.db_filter._filters = [{ "Filter1": "Filter1", "Filter2": "Filter2", "Filter3": "Filter3" }] filters_copy = self.db_filter.copy_filters() self.assertEqual(filters_copy, self.db_filter._filters) def test_copy_filters_2(self): """Verify that copy_filters() creates new address for copied filters. """ self.db_filter._filters = [{ "Filter1": "Filter1", "Filter2": "Filter2", "Filter3": "Filter3" }] filters_copy = self.db_filter.copy_filters() self.db_filter._filters[0].update({"Filter2": "filter2"}) self.assertNotEqual(filters_copy, self.db_filter._filters)
class TestFilter(unittest.TestCase): @patch("pdm_utils.classes.filter.isinstance") def setUp(self, IsInstance): IsInstance.return_value = True alchemist = Mock() engine = Mock() graph = Mock() key = Mock() alchemist.engine.return_value = engine alchemist.graph.return_value = graph alchemist.connected.return_value = True self.mock_isinstance = IsInstance self.mock_alchemist = alchemist self.mock_engine = engine self.mock_graph = graph self.mock_key = key self.db_filter = Filter(alchemist=alchemist, key=key) def test_constructor_1(self): db_filter = Filter() def test_constructor_2(self): self.mock_isinstance.assert_any_call(self.mock_alchemist, AlchemyHandler) self.mock_isinstance.assert_any_call(self.mock_key, Column) def test_values_1(self): self.db_filter.values = ["Test1", "Test2"] self.assertFalse(self.db_filter.values_valid) def test_values_2(self): with self.assertRaises(TypeError): self.db_filter.values = "Hello" @patch("pdm_utils.classes.filter.Filter.check") @patch("pdm_utils.classes.filter.q.build_distinct") def test_transpose_2(self, BuildDistinct, Check): empty_values = self.db_filter.transpose("gene.Notes") self.assertEqual(empty_values, []) BuildDistinct.assert_not_called() Check.assert_called() @patch("pdm_utils.classes.filter.Filter.check") @patch("pdm_utils.classes.filter.Filter.transpose") def test_retrieve_1(self, Transpose, Check): empty_data = self.db_filter.retrieve("Error") self.assertEqual(empty_data, {}) Transpose.assert_not_called() Check.assert_called() @patch("pdm_utils.classes.filter.Filter.check") @patch("pdm_utils.classes.filter.Filter.build_values") def test_refresh_1(self, BuildValues, Check): self.db_filter.refresh() Check.assert_called() BuildValues.assert_not_called() @patch("pdm_utils.classes.filter.Filter.check") @patch("pdm_utils.classes.filter.Filter.build_values") def test_refresh_2(self, BuildValues, Check): BuildValues.return_value = ["Phage"] self.db_filter._values_valid = False self.db_filter.refresh() Check.assert_called() BuildValues.assert_called() self.assertTrue(self.db_filter.values_valid) self.assertEqual(self.db_filter.values, ["Phage"]) @patch("pdm_utils.classes.filter.Filter.check") @patch("pdm_utils.classes.filter.Filter.refresh") @patch("pdm_utils.classes.filter.Filter.build_values") def test_update_1(self, BuildValues, Refresh, Check): self.db_filter.update() Check.assert_called() Refresh.assert_not_called() BuildValues.assert_not_called() @patch("pdm_utils.classes.filter.Filter.check") @patch("pdm_utils.classes.filter.Filter.refresh") @patch("pdm_utils.classes.filter.Filter.build_values") def test_update_2(self, BuildValues, Refresh, Check): self.db_filter._values_valid = False self.db_filter.update() Check.assert_called() Refresh.assert_called() BuildValues.assert_not_called() @patch("pdm_utils.classes.filter.Filter.check") @patch("pdm_utils.classes.filter.Filter.refresh") @patch("pdm_utils.classes.filter.Filter.build_values") def test_update_3(self, BuildValues, Refresh, Check): self.db_filter._values_valid = False self.db_filter._updated = False self.db_filter.update() Check.assert_called() Refresh.assert_called() BuildValues.assert_called() self.assertTrue(self.db_filter._values_valid) self.assertTrue(self.db_filter._values_valid) @patch("pdm_utils.classes.filter.isinstance") @patch("pdm_utils.classes.filter.Filter.build_values") def test_sort_1(self, BuildValues, IsInstance): IsInstance.return_value = True BuildValues.return_value = ["Phage"] self.db_filter._values_valid = False self.db_filter.sort("column") IsInstance.assert_called() BuildValues.assert_called() self.assertTrue(self.db_filter._values_valid) self.assertEqual(self.db_filter.values, ["Phage"]) @patch("pdm_utils.classes.filter.isinstance") @patch("pdm_utils.classes.filter.Filter.build_values") def test_sort_1(self, BuildValues, IsInstance): IsInstance.return_value = False with self.assertRaises(TypeError): self.db_filter.sort() def test_reset_1(self): self.db_filter._filters = {"Expression": "Some Whereclause"} self.db_filter.reset() self.assertEqual(self.db_filter.filters, {}) def test_reset_2(self): self.db_filter._values = ["Phage1", "Phage2", "Phage3"] self.db_filter.reset() self.assertEqual(self.db_filter.values, []) def test_reset_3(self): self.db_filter._values_valid = False self.db_filter.reset() self.assertTrue(self.db_filter.values_valid) def test_reset_4(self): self.db_filter._updated = False self.db_filter.reset() self.assertTrue(self.db_filter.updated) def test_hits_1(self): self.assertEqual(self.db_filter.hits(), 0) def test_hits_2(self): self.db_filter._values = ["Phage1", "Phage2", "Phage3"] self.assertEqual(self.db_filter.hits(), 3) def test_copy_1(self): self.db_filter._updated = False copy_filter = self.db_filter.copy() self.assertEqual(copy_filter.updated, self.db_filter.updated) def test_copy_2(self): copy_filter = self.db_filter.copy() self.assertEqual(copy_filter.values_valid, self.db_filter.values_valid) def test_copy_3(self): copy_filter = self.db_filter.copy() self.assertEqual(copy_filter.filters, self.db_filter.filters) def test_copy_4(self): copy_filter = self.db_filter.copy() self.assertEqual(copy_filter.engine, self.db_filter.engine) def test_copy_5(self): copy_filter = self.db_filter.copy() self.assertEqual(copy_filter.graph, self.db_filter.graph) def test_copy_6(self): copy_filter = self.db_filter.copy() self.assertEqual(copy_filter.key, self.db_filter.key) def test_copy_7(self): self.db_filter._values = ["Values"] copy_filter = self.db_filter.copy() self.assertEqual(copy_filter.values, self.db_filter.values) def test_copy_filters_1(self): self.db_filter._filters = { "Filter1": ["Filter1"], "Filter2": ["Filter2"], "Filter3": ["Filter3"] } filters_copy = self.db_filter.copy_filters() self.assertEqual(filters_copy, self.db_filter._filters) def test_copy_filters_2(self): self.db_filter._filters = { "Filter1": ["Filter1"], "Filter2": ["Filter2"], "Filter3": ["Filter3"] } filters_copy = self.db_filter.copy_filters() self.db_filter._filters.update({"Filter2": []}) self.assertNotEqual(filters_copy, self.db_filter._filters)
class TestFilter(unittest.TestCase): @classmethod def setUpClass(self): test_db_utils.create_filled_test_db() @classmethod def tearDownClass(self): test_db_utils.remove_db() def setUp(self): alchemist = AlchemyHandler() alchemist.username=user alchemist.password=pwd alchemist.database=db alchemist.connect() self.alchemist = alchemist self.db_filter = Filter(alchemist=self.alchemist) self.phage = self.alchemist.metadata.tables["phage"] self.gene = self.alchemist.metadata.tables["gene"] self.trna = self.alchemist.metadata.tables["trna"] self.PhageID = self.phage.c.PhageID self.Cluster = self.phage.c.Cluster self.Subcluster = self.phage.c.Subcluster self.Notes = self.gene.c.Notes def tearDown(self): self.alchemist.clear() def test_and__1(self): """Verify that and_() creates a dictionary key as expected. """ self.db_filter.and_("phage.PhageID=Myrna") or_block = self.db_filter.filters[0] self.assertTrue("phage.PhageID=Myrna" in or_block.keys()) def test_and__2(self): """Verify that and_() stores BinaryExpression data as expected. """ self.db_filter.and_("phage.PhageID=Myrna") or_block = self.db_filter.filters[0] self.assertTrue(isinstance(or_block, dict)) where_clauses = or_block["phage.PhageID=Myrna"] self.assertTrue(isinstance(where_clauses, BinaryExpression)) def test_and_3(self): """Verify that and_() recognizes previous and_() data. """ self.db_filter.and_("phage.PhageID = Myrna") self.db_filter.and_("phage.PhageID=Myrna") self.db_filter.and_("phage.PhageID=D29") or_block = self.db_filter.filters[0] self.assertEqual(len(or_block), 2) def test_remove_1(self): """Verify that remove() removes dictionary entry after depleted. """ self.db_filter.and_("phage.PhageID=Myrna") self.db_filter.remove("phage.PhageID=Myrna") self.assertEqual(self.db_filter.filters, [{}]) def test_remove_2(self): """Verify that remove() conserves dictionary entry if not depleted. """ self.db_filter.and_("phage.PhageID=Myrna") self.db_filter.and_("phage.PhageID=D29") self.db_filter.remove("phage.PhageID=Myrna") or_block = self.db_filter.filters[0] where_clauses = or_block["phage.PhageID=D29"] self.assertEqual(where_clauses.right.value, "D29") def test_add_1(self): """Verify that add() creates a dictionary key as expected. """ self.db_filter.add("phage.PhageID=Myrna") or_block = self.db_filter.filters[0] self.assertTrue("phage.PhageID=Myrna" in or_block.keys()) def test_add_2(self): """Verify that add() creates multiple keys as expected. """ self.db_filter.add("phage.PhageID=Myrna AND phage.PhageID = Trixie") or_block = self.db_filter.filters[0] self.assertTrue(len(or_block) == 2) self.assertTrue("phage.PhageID=Myrna" in or_block.keys()) self.assertTrue("phage.PhageID=Trixie" in or_block.keys()) def test_add_3(self): """Verify that add() creates multiple or blocks as expected. """ self.db_filter.add("phage.PhageID=Myrna OR phage.PhageID = Trixie") self.assertTrue(len(self.db_filter.filters) == 2) first_or_block = self.db_filter.filters[0] second_or_block = self.db_filter.filters[1] self.assertTrue("phage.PhageID=Myrna" in first_or_block.keys()) self.assertFalse("phage.PhageID=Trixie" in first_or_block.keys()) self.assertFalse("phage.PhageID=Myrna" in second_or_block.keys()) self.assertTrue("phage.PhageID=Trixie" in second_or_block.keys()) def test_get_column_1(self): """Verify that get_column() converts string column input. """ self.db_filter.key = self.Cluster column = self.db_filter.get_column("phage.PhageID") self.assertEqual(column, self.PhageID) def test_get_column_2(self): """Verify that get_column() conserves Column input. """ self.db_filter.key = self.Cluster column = self.db_filter.get_column(self.PhageID) self.assertEqual(column, self.PhageID) def test_get_column_3(self): """Verify that get_column() raises TypeError. get_column() should raise TypeError when column input is neither a string or a Column. """ self.db_filter.key = self.Cluster with self.assertRaises(TypeError): self.db_filter.get_column(None) def test_build_where_clauses_1(self): """Verify that build_where_clauses() forms list of expected length. """ self.db_filter.and_("phage.PhageID=Myrna") self.db_filter.and_("phage.PhageID=D29") queries = self.db_filter.build_where_clauses() self.assertEqual(len(queries[0]), 2) def test_build_where_clauses_2(self): """Verify that build_where_clauses() forms list of BinaryExpressions. """ self.db_filter.and_("phage.PhageID=Myrna") self.db_filter.and_("phage.PhageID=D29") queries = self.db_filter.build_where_clauses() for query in queries: self.assertTrue(isinstance(query, BooleanClauseList)) def test_build_values_1(self): """Verify that build_values() does not exclude values as expected. """ self.db_filter.key = self.PhageID values = self.db_filter.build_values() self.assertTrue("Myrna" in values) self.assertTrue("D29" in values) self.assertTrue("Alice" in values) self.assertTrue("Trixie" in values) def test_build_values_2(self): """Verify that build_values() utilizes WHERE clauses as expected. """ self.db_filter.key = self.PhageID where_clause = (self.Cluster == "A") values = self.db_filter.build_values(where=where_clause) self.assertTrue("D29" in values) self.assertTrue("Trixie" in values) self.assertFalse("Myrna" in values) self.assertFalse("Alice" in values) def test_build_values_3(self): """Verify that build_values() creates DISTINCT values as expected. """ self.db_filter.key = self.Cluster where_clause = (self.Subcluster == "A2") values = self.db_filter.build_values(where=where_clause) self.assertEqual(len(values), 1) self.assertEqual(values, ["A"]) def test_build_values_4(self): """Verify that build_values() recognizes bytes-type column data. """ self.db_filter.key = self.Notes values = self.db_filter.build_values() self.assertTrue(isinstance(values[0], str)) def test_query_1(self): """Verify that query() creates instances as expected. """ self.db_filter.key = "phage.PhageID" self.db_filter.values = ["Trixie", "D29"] self.db_filter.refresh() instances = self.db_filter.query("phage") instance_ids = [] for instance in instances: instance_ids.append(instance.PhageID) self.assertTrue("Trixie" in instance_ids) self.assertTrue("D29" in instance_ids) self.assertFalse("Myrna" in instance_ids) def test_query_2(self): """Verify that query() creates instances as expected. """ self.db_filter.key = "phage.PhageID" self.db_filter.values = ["Trixie", "D29"] self.db_filter.refresh() instances = self.db_filter.query("gene") instance_ids = set() for instance in instances: instance_ids.add(instance.phage.PhageID) instance_ids = list(instance_ids) self.assertTrue("Trixie" in instance_ids) self.assertTrue("D29" in instance_ids) self.assertFalse("Myrna" in instance_ids) def test_transpose_1(self): """Verify that transpose() utilizes Filter values as expected. """ self.db_filter.values = ["Myrna"] self.db_filter.key = self.PhageID self.db_filter.refresh() clusters = self.db_filter.transpose("phage.Cluster") self.assertEqual(clusters, ["C"]) def test_transpose_2(self): """Verify that transpose() can optionally create dict return value. """ self.db_filter.values = ["Myrna"] self.db_filter.key = self.PhageID self.db_filter.refresh() clusters_dict = self.db_filter.transpose(self.Cluster, return_dict=True) self.assertEqual(clusters_dict["Cluster"], ["C"]) def test_transpose_3(self): """Verify that transpose() can alter Filter properties as expected. """ self.db_filter.values = ["Myrna"] self.db_filter.key = self.PhageID self.db_filter.refresh() self.db_filter.transpose("phage.Cluster", set_values=True) self.assertEqual(self.db_filter.key, self.Cluster) self.assertEqual(self.db_filter.values, ["C"]) def test_transpose_4(self): """Verify that transpose() filter parameter functions as expected. """ self.db_filter.values = ["Myrna", "D29"] self.db_filter.key = self.PhageID self.db_filter.add("gene.GeneID = Myrna_CDS_28") values = self.db_filter.transpose("gene.GeneID", filter=True) self.assertEqual(len(values), 1) self.assertEqual(values[0], "Myrna_CDS_28") def test_mass_transpose_1(self): """Verify that mass_tranpose() returns DISTINCT values as expected. """ self.db_filter.values = ["Myrna"] self.db_filter.key = self.PhageID self.db_filter.refresh() myrna_data = self.db_filter.mass_transpose(["phage.HostGenus", "phage.Cluster", "gene.Notes"]) self.assertTrue(len(myrna_data) == 3 ) self.assertTrue(isinstance(myrna_data, dict)) self.assertEqual(myrna_data["HostGenus"], ["Mycobacterium"]) self.assertEqual(myrna_data["Cluster"], ["C"]) def test_mass_transpose_2(self): """Verify that mass_tranpose() utilizes all values as expected. """ self.db_filter.values = ["Myrna", "Trixie"] self.db_filter.key = self.PhageID self.db_filter.refresh() data = self.db_filter.mass_transpose(["phage.HostGenus", "phage.Cluster", "gene.Notes"]) self.assertTrue(len(data) == 3) self.assertTrue(isinstance(data, dict)) self.assertEqual(data["HostGenus"], ["Mycobacterium"]) self.assertEqual(data["Cluster"], ["C", "A"]) def test_retrieve_1(self): """Verify that retrieve() separates data as expected. """ self.db_filter.values = ["Myrna", "Trixie"] self.db_filter.key = self.PhageID self.db_filter.refresh() data = self.db_filter.retrieve(["phage.HostGenus", "phage.Cluster"]) myrna_data = data["Myrna"] self.assertEqual(myrna_data["HostGenus"], ["Mycobacterium"]) self.assertEqual(myrna_data["Cluster"], ["C"]) trixie_data = data["Trixie"] self.assertEqual(trixie_data["HostGenus"], ["Mycobacterium"]) self.assertEqual(trixie_data["Cluster"], ["A"]) def test_retrieve_2(self): """Verify that retrieve() separates data as expected. """ self.db_filter.values = ["A", "C"] self.db_filter.key = self.Cluster self.db_filter.refresh() data = self.db_filter.retrieve(["phage.Cluster", "phage.PhageID"]) a_data = data["A"] self.assertEqual(a_data["Cluster"], ["A"]) self.assertTrue("Trixie" in a_data["PhageID"]) self.assertFalse("Myrna" in a_data["PhageID"]) c_data = data["C"] self.assertEqual(c_data["Cluster"], ["C"]) self.assertFalse("Trixie" in c_data["PhageID"]) self.assertTrue("Myrna" in c_data["PhageID"]) def test_refresh_1(self): """Verify that refresh() eliminates invalid data. """ self.db_filter.key = self.PhageID self.db_filter.values = ["Myrna", "D29", "Sheetz"] self.db_filter.refresh() self.assertTrue("Myrna" in self.db_filter.values) self.assertTrue("D29" in self.db_filter.values) self.assertFalse("Sheetz" in self.db_filter.values) def test_update_1(self): """Verify that update() filters out values. """ self.db_filter.key = self.PhageID self.db_filter.values = ["Myrna", "D29"] self.db_filter.and_("phage.PhageID=Myrna") self.db_filter.update() self.assertTrue("Myrna" in self.db_filter.values) self.assertFalse("D29" in self.db_filter.values) def test_sort_1(self): """Verify that sort() orders values as expected. """ self.db_filter.key = self.PhageID self.db_filter.values = ["Myrna", "D29"] self.db_filter.sort(self.PhageID) self.assertTrue("Myrna" in self.db_filter.values) self.assertTrue("D29" in self.db_filter.values) self.assertEqual(self.db_filter.values[0], "D29") def test_sort_2(self): """Verify that sort() orders values with multiple sort columns. """ self.db_filter.key = self.PhageID self.db_filter.values = ["Myrna", "D29", "Alice"] self.db_filter.sort([self.Cluster, self.PhageID]) self.assertTrue("Myrna" in self.db_filter.values) self.assertTrue("D29" in self.db_filter.values) self.assertTrue("Alice" in self.db_filter.values) self.assertEqual(self.db_filter.values[0], "D29") self.assertEqual(self.db_filter.values[1], "Alice") def test_group_1(self): """Verify that group() creates separate groups as expected. """ self.db_filter.key = self.PhageID self.db_filter.values = ["Myrna", "D29"] group_results = self.db_filter.group(self.PhageID) self.assertTrue("Myrna" in group_results.keys()) self.assertTrue("Myrna" in group_results["Myrna"]) self.assertTrue("D29" in group_results.keys()) self.assertTrue("D29" in group_results["D29"]) def test_group_2(self): """Verify that group() recognizes similarities in values as expected. """ self.db_filter.key = self.PhageID self.db_filter.values = ["Myrna", "D29"] group_results = self.db_filter.group("phage.HostGenus") self.assertTrue("Mycobacterium" in group_results.keys()) self.assertTrue("Myrna" in group_results["Mycobacterium"]) self.assertTrue("D29" in group_results["Mycobacterium"]) def test_group_3(self): """Verify that group() recognizes differences in values as expected. """ self.db_filter.key = self.PhageID self.db_filter.values = ["Myrna", "D29", "Trixie"] group_results = self.db_filter.group("phage.Cluster") self.assertTrue("A" in group_results.keys()) self.assertTrue("C" in group_results.keys()) self.assertTrue("Myrna" in group_results["C"]) self.assertTrue("D29" in group_results["A"]) self.assertTrue("Trixie" in group_results["A"])
def test_constructor_1(self): db_filter = Filter(alchemist=self.alchemist)
def execute_review(alchemist, folder_path, folder_name, review=True, values=[], filters="", groups=[], sort=[], g_reports=False, s_report=False, verbose=False): """Executes the entirety of the pham review pipeline. :param alchemist: A connected and fully built AlchemyHandler object. :type alchemist: AlchemyHandler :param folder_path: Path to a valid dir for new dir creation. :type folder_path: Path :param folder_name: A name for the export folder. :type folder_name: str :param csv_title: Title for an appended csv file prefix. :type csv_title: str :param review: A boolean to toggle filtering of phams by pham discrepancies. :type review: bool :param values: List of values to filter database results. :type values: list[str] :param filters: A list of lists with filter values, grouped by ORs. :type filters: list[list[str]] :param groups: A list of supported MySQL column names to group by. :type groups: list[str] :param sort: A list of supported MySQL column names to sort by. :param g_reports: A boolean to toggle export of additional pham information. :type g_reports: bool :param verbose: A boolean value to toggle progress print statements. :type verbose: bool """ db_filter = Filter(alchemist=alchemist) db_filter.key = ("gene.PhamID") if values: db_filter.values = values if verbose: print(f"Identified {len(values)} phams to review...") if filters != "": try: db_filter.add(filters) except: print("Please check your syntax for the conditional string:\n" f"{filters}") sys.exit(1) finally: db_filter.update() db_filter._filters = [] db_filter._updated = False db_filter._or_index = -1 db_filter.add(BASE_CONDITIONALS) db_filter.update() if not db_filter.values: print("Current settings produced no database hits.") sys.exit(1) if review: review_phams(db_filter, verbose=verbose) if sort: db_filter.sort(sort) if verbose: print("Creating export folder...") export_path = folder_path.joinpath(folder_name) export_path = basic.make_new_dir(folder_path, export_path, attempt=50) conditionals_map = {} export_db.build_groups_map(db_filter, export_path, conditionals_map, groups=groups, verbose=verbose) if verbose: print("Prepared query and path structure, beginning review export...") original_phams = db_filter.values total_g_data = {} for mapped_path in conditionals_map.keys(): conditionals = conditionals_map[mapped_path] db_filter.values = original_phams db_filter.values = db_filter.build_values(where=conditionals) pf_data = get_pf_data(alchemist, db_filter, verbose=verbose) write_report(pf_data, mapped_path, PF_HEADER, csv_name=f"FunctionReport", verbose=verbose) if g_reports: execute_g_report_export(alchemist, db_filter, mapped_path, total_g_data=total_g_data, verbose=verbose) if s_report: execute_s_report_export(alchemist, db_filter, conditionals, mapped_path, verbose=verbose)
class TestFilter(unittest.TestCase): @classmethod def setUpClass(self): setup_test_db() def setUp(self): alchemist = AlchemyHandler() alchemist.username = "******" alchemist.password = "******" alchemist.database = "test_db" alchemist.connect() alchemist.build_graph() self.alchemist = alchemist self.db_filter = Filter(alchemist=self.alchemist) phageid = self.alchemist.get_column("phage.PhageID") self.phageid = phageid def test_constructor_1(self): db_filter = Filter(alchemist=self.alchemist) def test_constructor_2(self): self.assertTrue(isinstance(self.db_filter.graph, Graph)) self.assertTrue(isinstance(self.db_filter.engine, Engine)) def test_add_1(self): self.db_filter.add("phage.PhageID=Myrna") self.assertTrue("phage.PhageID=" in self.db_filter.filters.keys()) def test_add_2(self): self.db_filter.add("phage.PhageID=Myrna") where_clauses = self.db_filter.filters["phage.PhageID="] self.assertTrue(isinstance(where_clauses, list)) self.assertTrue(isinstance(where_clauses[0], BinaryExpression)) def test_add_3(self): self.db_filter.add("phage.PhageID=Myrna") self.db_filter.add("phage.PhageID=D29") where_clauses = self.db_filter.filters["phage.PhageID="] self.assertEqual(len(where_clauses), 2) def test_remove_1(self): self.db_filter.add("phage.PhageID=Myrna") self.db_filter.remove("phage.PhageID=Myrna") self.assertEqual(self.db_filter.filters, {}) def test_remove_2(self): self.db_filter.add("phage.PhageID=Myrna") self.db_filter.add("phage.PhageID=D29") self.db_filter.remove("phage.PhageID=Myrna") where_clauses = self.db_filter.filters["phage.PhageID="] self.assertTrue(len(where_clauses) == 1) self.assertEqual(where_clauses[0].right.value, "D29") def test_build_where_clauses_1(self): self.db_filter.add("phage.PhageID=Myrna") self.db_filter.add("phage.PhageID=D29") queries = self.db_filter.build_where_clauses() self.assertEqual(len(queries), 2) def test_build_where_clauses_2(self): self.db_filter.add("phage.PhageID=Myrna") self.db_filter.add("phage.PhageID=D29") queries = self.db_filter.build_where_clauses() for query in queries: self.assertTrue(isinstance(query, BinaryExpression)) def test_build_values_1(self): self.db_filter.key = self.phageid self.db_filter.values = ["Myrna", "D29"] values = self.db_filter.build_values() self.assertTrue("Myrna" in values) self.assertTrue("D29" in values) def test_transpose_1(self): self.db_filter.values = ["Myrna"] self.db_filter.key = self.phageid self.db_filter.refresh() host_genera = self.db_filter.transpose("phage.HostGenus") self.assertEqual(host_genera, ["Mycobacterium"]) def test_retrieve_1(self): self.db_filter.values = ["Myrna"] self.db_filter.key = self.phageid self.db_filter.refresh() myrna_data = self.db_filter.retrieve( ["phage.HostGenus", "phage.Cluster", "gene.Notes"]) self.assertTrue(len(myrna_data) == 3) self.assertTrue(isinstance(myrna_data, dict)) self.assertEqual(myrna_data["HostGenus"], ["Mycobacterium"]) self.assertEqual(myrna_data["Cluster"], ["C"]) def test_refresh_2(self): self.db_filter.key = self.phageid self.db_filter.values = ["Myrna", "D29", "Sheetz"] self.db_filter.refresh() self.assertTrue("Myrna" in self.db_filter.values) self.assertTrue("D29" in self.db_filter.values) self.assertFalse("Sheetz" in self.db_filter.values) def test_update_1(self): self.db_filter.key = self.phageid self.db_filter.values = ["Myrna", "D29"] self.db_filter.add("phage.PhageID=Myrna") self.db_filter.update() self.assertTrue("Myrna" in self.db_filter.values) self.assertFalse("D29" in self.db_filter.values) def test_sort_1(self): self.db_filter.key = self.phageid self.db_filter.values = ["Myrna", "D29"] self.db_filter.sort(self.phageid) self.assertTrue("Myrna" in self.db_filter.values) self.assertTrue("D29" in self.db_filter.values) self.assertEqual(self.db_filter.values[0], "D29") def test_group_1(self): self.db_filter.key = self.phageid self.db_filter.values = ["Myrna", "D29"] group_results = self.db_filter.group(self.phageid) self.assertTrue("Myrna" in group_results.keys()) self.assertTrue("Myrna" in group_results["Myrna"]) self.assertTrue("D29" in group_results.keys()) self.assertTrue("D29" in group_results.keys()) def test_group_2(self): self.db_filter.key = self.phageid self.db_filter.values = ["Myrna", "D29"] group_results = self.db_filter.group("phage.HostGenus") self.assertTrue("Mycobacterium" in group_results.keys()) self.assertTrue("Myrna" in group_results["Mycobacterium"]) self.assertTrue("D29" in group_results["Mycobacterium"]) @classmethod def tearDownClass(self): teardown_test_db()
def main(unparsed_args_list): """Run main freeze database pipeline.""" args = parse_args(unparsed_args_list) ref_database = args.database reset = args.reset new_database = args.new_database_name prefix = args.prefix # Filters input: phage.Status=draft AND phage.HostGenus=Mycobacterium # Args structure: [['phage.Status=draft'], ['phage.HostGenus=Mycobacterium']] filters = args.filters # Create config object with data obtained from file and/or defaults. config = configfile.build_complete_config(args.config_file) mysql_creds = config["mysql"] # Verify database connection and schema compatibility. print("Connecting to the MySQL database...") alchemist1 = AlchemyHandler(database=ref_database, username=mysql_creds["user"], password=mysql_creds["password"]) alchemist1.connect(pipeline=True) engine1 = alchemist1.engine mysqldb.check_schema_compatibility(engine1, "the freeze pipeline") # Get SQLAlchemy metadata Table object # table_obj.primary_key.columns is a # SQLAlchemy ColumnCollection iterable object # Set primary key = 'phage.PhageID' alchemist1.build_metadata() table = querying.get_table(alchemist1.metadata, TARGET_TABLE) for column in table.primary_key.columns: primary_key = column # Create filter object and then add command line filter strings db_filter = Filter(alchemist=alchemist1, key=primary_key) db_filter.values = [] # Attempt to add filters and exit if needed. add_filters(db_filter, filters) # Performs the query db_filter.update() # db_filter.values now contains list of PhageIDs that pass the filters. # Get the number of genomes that will be retained and build the # MYSQL DELETE statement. keep_set = set(db_filter.values) delete_stmt = construct_delete_stmt(TARGET_TABLE, primary_key, keep_set) count_query = construct_count_query(TARGET_TABLE, primary_key, keep_set) phage_count = mysqldb_basic.scalar(alchemist1.engine, count_query) # Determine the name of the new database. if new_database is None: if prefix is None: prefix = get_prefix() new_database = f"{prefix}_{phage_count}" # Create the new database, but prevent overwriting of current database. if engine1.url.database != new_database: result = mysqldb_basic.drop_create_db(engine1, new_database) else: print( "Error: names of the reference and frozen databases are the same.") print("No database will be created.") result = 1 # Copy database. if result == 0: print(f"Reference database: {ref_database}") print(f"New database: {new_database}") result = mysqldb_basic.copy_db(engine1, new_database) if result == 0: print(f"Deleting genomes...") alchemist2 = AlchemyHandler(database=new_database, username=engine1.url.username, password=engine1.url.password) alchemist2.connect(pipeline=True) engine2 = alchemist2.engine engine2.execute(delete_stmt) if reset: engine2.execute(RESET_VERSION) # Close up all connections in the connection pool. engine2.dispose() else: print("Unable to copy the database.") # Close up all connections in the connection pool. engine1.dispose() else: print(f"Error creating new database: {new_database}.") print("Freeze database script completed.")
def test_constructor_1(self): db_filter = Filter()
class TestPhamReview(unittest.TestCase): @classmethod def setUpClass(self): test_db_utils.create_filled_test_db() self.test_dir = Path(TEST_DIR) if self.test_dir.is_dir(): shutil.rmtree(TEST_DIR) self.test_dir.mkdir() @classmethod def tearDownClass(self): test_db_utils.remove_db() shutil.rmtree(TEST_DIR) def setUp(self): self.review_test_dir = self.test_dir.joinpath("review_test_dir") self.alchemist = AlchemyHandler() self.alchemist.username = USER self.alchemist.password = PWD self.alchemist.database = DB self.alchemist.connect(ask_database=True, login_attempts=0) self.db_filter = Filter(alchemist=self.alchemist) self.db_filter.add(review.BASE_CONDITIONALS) self.db_filter.key = "gene.PhamID" def tearDown(self): if self.review_test_dir.is_dir(): shutil.rmtree(str(self.review_test_dir)) def test_execute_review_1(self): """Verify execute_review() creates new directory as expected. """ review.execute_review(self.alchemist, self.test_dir, self.review_test_dir.name) self.assertTrue(self.review_test_dir.is_dir()) def test_execute_review_2(self): """Verify execute_review() filter parameter functions as expected. """ review.execute_review(self.alchemist, self.test_dir, self.review_test_dir.name, filters=("phage.Cluster='A' " "AND phage.Subcluster='A2'")) self.assertTrue(self.review_test_dir.is_dir()) def test_execute_review_3(self): """Verify execute_review() group parameter functions as expected. """ review.execute_review(self.alchemist, self.test_dir, self.review_test_dir.name, groups=["phage.Cluster"]) self.assertTrue(self.review_test_dir.is_dir()) clusterA_dir = self.review_test_dir.joinpath("A") self.assertTrue(clusterA_dir.is_dir()) def test_execute_review_4(self): """Verify execute_review() sort parameter functions as expected. """ review.execute_review(self.alchemist, self.test_dir, self.review_test_dir.name, sort=["gene.Name"]) self.assertTrue(self.review_test_dir.is_dir()) def test_execute_review_5(self): """Verify execute_review() review parameter functions as expected. """ review.execute_review(self.alchemist, self.test_dir, self.review_test_dir.name, review=False) self.assertTrue(self.review_test_dir.is_dir()) def test_execute_review_6(self): """Verify execute_review() g_reports parameter functions as expected. """ review.execute_review(self.alchemist, self.test_dir, self.review_test_dir.name, g_reports=True) self.assertTrue(self.review_test_dir.is_dir()) gene_report_dir = self.review_test_dir.joinpath("GeneReports") self.assertTrue(gene_report_dir.is_dir()) def test_execute_review_7(self): """Verify execute_review() s_report parameter functions as expected. """ review.execute_review(self.alchemist, self.test_dir, self.review_test_dir.name, s_report=True) self.assertTrue(self.review_test_dir.is_dir()) summary_report_file = self.review_test_dir.joinpath( "SummaryReport.txt") self.assertTrue(summary_report_file.is_file()) def test_review_phams_1(self): """Verify review_phams() correctly identifies disrepencies. """ self.db_filter.values = self.db_filter.build_values( where=self.db_filter.build_where_clauses()) review.review_phams(self.db_filter) self.assertFalse(39854 in self.db_filter.values) self.assertTrue(40481 in self.db_filter.values) def test_get_pf_data_1(self): """Verify get_pf_data() retrieves and returns data as expected. """ self.db_filter.values = [40481] pf_data = review.get_pf_data(self.alchemist, self.db_filter) self.assertTrue(isinstance(pf_data, list)) for header in review.PF_HEADER: with self.subTest(header=header): self.assertTrue(header in pf_data[0].keys()) self.assertFalse(isinstance(pf_data[0][header], list)) def test_get_g_data_1(self): """Verify get_g_data() retreives and retrusn data as expected. """ self.db_filter.values = [40481] g_data = review.get_g_data(self.alchemist, self.db_filter, 40481) self.assertTrue(isinstance(g_data, list)) for header in review.PG_HEADER: with self.subTest(header=header): self.assertTrue(header in g_data[0].keys()) self.assertFalse(isinstance(g_data[0][header], list))
def execute_resubmit(alchemist, revisions_data_dicts, folder_path, folder_name, filters="", groups=[], verbose=False): """Executes the entirety of the genbank resubmit pipeline. :param alchemist: A connected and fully built AlchemyHandler object. :type alchemist: AlchemyHandler :param revisions_data_dicts: Data dictionaries containing pham/notes data. :type revisions_data_dicts: list[dict] :param folder_path: Path to a valid dir for new dir creation. :type folder_path: Path :param folder_name: A name for the export folder. :type folder_name: str :param verbose: A boolean value to toggle progress print statements. :type verbose: bool """ db_filter = Filter(alchemist=alchemist) db_filter.key = "gene.PhamID" db_filter.add(BASE_CONDITIONALS) if filters != "": try: db_filter.add(filters) except: print("Please check your syntax for the conditional string:\n" f"{filters}") resubmit_columns = db_filter.get_columns(RESUBMIT_COLUMNS) phams = [] for data_dict in revisions_data_dicts: phams.append(data_dict["Pham"]) db_filter.values = phams if verbose: print("Creating export folder...") export_path = folder_path.joinpath(folder_name) export_path = basic.make_new_dir(folder_path, export_path, attempt=50) conditionals_map = {} export_db.build_groups_map(db_filter, export_path, conditionals_map, groups=groups, verbose=verbose) if verbose: print("Prepared query and path structure, beginning review export...") for mapped_path in conditionals_map.keys(): if verbose: print("Retreiving phage data for pham revisions...") export_dicts = [] for data_dict in revisions_data_dicts: if verbose: print(f"...Retrieving data for pham {data_dict['Pham']}...") conditionals = conditionals_map[mapped_path] final_call = data_dict["Final Call"] if final_call == "Hypothetical Protein": final_call = "" conditionals.append( querying.build_where_clause(alchemist.graph, f"gene.Notes!={final_call}")) query = querying.build_select(alchemist.graph, resubmit_columns, where=conditionals) results = querying.execute(alchemist.engine, query, in_column=db_filter.key, values=[data_dict["Pham"]]) for result in results: format_resubmit_data(result, data_dict["Final Call"]) export_dicts.append(result) if not export_dicts: if verbose: print("'{mapped_path.name}' data selected for resubmision " "matches selected call; no resubmision exported...") mapped_path.rmdir() continue export_dicts = sorted(export_dicts, key=lambda export_dict: export_dict["Phage"]) if verbose: print(f"Writing {CSV_NAME} in {mapped_path.name}...") file_path = mapped_path.joinpath(CSV_NAME) basic.export_data_dict(export_dicts, file_path, RESUBMIT_HEADER, include_headers=True)
def execute_export(alchemist, output_path, output_name, values=[], verbose=False, csv_export=False, ffile_export=None, db_export=False, table="phage", filters=[], groups=[]): """Executes the entirety of the file export pipeline. :param sql_handle: Input a valid SQLAlchemy Engine object. :type sql_handle: Engine: :param export_path: Input a valid path to place export folder. :type export_path: Path :param folder_name: Input a name for the export folder. :type folder_name: str :param phage_filter_list: Input a list of phageIDs. :type phage_filter_list: List[str] :param verbose: Input a boolean value for verbose option. :type verbose: boolean :param csv_export: Input a boolean value to toggle csv_export. :type csv_export: boolean :param ffile_export: Input a SeqIO supported file format to toggle ffile_export. :type ffile_export: str :param db_export: Input a boolean value to toggle db_export. :type db_export: boolean :param filters: Input a list of lists with filter values :type filters: List[List[str]] :param groups: Input a list of supported group values. :type groups: List[str] """ if verbose: print("Retrieving database version...") db_version = mysqldb.get_version_table_data(alchemist.engine) if verbose: print("Creating export folder...") export_path = output_path.joinpath(output_name) export_path = basic.make_new_dir(output_path, export_path, attempt=50) if db_export: if verbose: print("Writing SQL database file...") write_database(alchemist, db_version["Version"], export_path) elif csv_export or ffile_export != None: table_obj = alchemist.get_table(table) for column in table_obj.primary_key.columns: primary_key = column db_filter = Filter(alchemist=alchemist, key=primary_key) db_filter.values = values for or_filters in filters: for filter in or_filters: db_filter.add(filter) db_filter.update() if filters and not db_filter.values: return values_map = {} if groups: build_groups_map(db_filter, export_path, groups=groups, values_map=values_map, verbose=verbose) else: values_map.update({export_path: db_filter.values}) for export_path in values_map.keys(): values = values_map[export_path] if csv_export: execute_csv_export(alchemist, export_path, table=table, values=values, verbose=verbose) elif ffile_export != None: execute_ffx_export(alchemist, export_path, ffile_export, db_version, table=table, values=values, verbose=verbose)