def test_get_column_3(self, TranslateColumn): """Verify translate_column() is called with the correct parameters. """ TranslateColumn.return_value = "PhageID" querying.get_column(self.metadata, "phage.PhageID") TranslateColumn.assert_called_with(self.metadata, "phage.PhageID")
def test_get_column_1(self, ParseColumn): """Verify parse_column() is called with the correct parameters. """ ParseColumn.return_value = ["phage", "PhageID"] querying.get_column(self.metadata, "phage.PhageID") ParseColumn.assert_called_with("phage.PhageID")
def test_get_column_2(self, GetTable): """Verify get_table() is called with the correct parameters. """ GetTable.return_value = self.phage querying.get_column(self.metadata, "phage.PhageID") GetTable.assert_called_with(self.metadata, "phage")
def transpose(self, column, return_dict=False): self.check() if not self.values: return [] name = "" if isinstance(column, str): column = q.get_column(self.graph.graph["metadata"], column) name = column.name elif isinstance(column, Column): name = column.name else: raise TypeError where_clause = (self.key.in_(self.values)) query = q.build_distinct(self.graph, [column], where=[where_clause]) proxy = self.engine.execute(query) results = proxy.fetchall() values = [] for result in results: values.append(result[0]) if return_dict: values = {name: values} return values
def get_cds_seqrecords(alchemist, values=[], nucleotide=False, verbose=False): cds_list = parse_feature_data(alchemist, values=values) seqrecords = [] genomes_dict = {} for cds in cds_list: if not cds.genome_id in genomes_dict.keys(): if verbose: print(f"...Retrieving parent genome for {cds.id}...") phage_id_obj = querying.get_column(alchemist.metadata, "phage.PhageID") phage_obj = phage_id_obj.table parent_genome_query = querying.build_select( alchemist.graph, phage_obj, where=\ phage_id_obj==cds.genome_id) parent_genome_data = mysqldb_basic.first(alchemist.engine, parent_genome_query) parent_genome = mysqldb.parse_phage_table_data(parent_genome_data) genomes_dict.update({cds.genome_id: parent_genome}) if verbose: print(f"Converting {cds.id}...") cds.genome_length = genomes_dict[cds.genome_id].length cds.set_seqfeature() record = cds_to_seqrecord(cds, genomes_dict[cds.genome_id]) seqrecords.append(record) return seqrecords
def key(self, key): if isinstance(key, Column): self._key = key elif isinstance(key, str): if self.graph is None: raise ValueError("String key input requires MySQL connection.") metadata = self.graph.graph["metadata"] try: self._key = q.get_column(self.graph.graph["metadata"], key) except: try: table_obj = q.get_table(metadata, key) except: raise ValueError("Inputted string key is neither a valid " "MySQL column or table.") self._key = list(table_obj.primary_key.columns)[0] else: raise TypeError("Filter key value is invalid." "Filter key must be one of the following: \n" "SQLAlchemy Column\n" "MySQL column string\n" "MySQL table string\n")
def get_gr_data_columns(alchemist): """Gets labelled columns for pham gene data retrieval. :returns: List of labelled columns for gene data retrieval. :rtype: list[Column] """ pg_columns = [] for column_name in GR_DATA_COLUMNS: pg_columns.append(querying.get_column(alchemist.metadata, column_name)) return pg_columns
def get_pf_data_columns(alchemist): """Gets labelled columns for pham function data retrieval. :returns: List of labelled columns for function data retrieval. :rtype: list[Column] """ pf_columns = [] for column_name in PF_DATA_COLUMNS: pf_columns.append(querying.get_column(alchemist.metadata, column_name)) return pf_columns
def test_first_column_1(self): """Verify first_column() returns expected data type. """ where_clause = querying.build_where_clause(self.graph, "phage.Cluster=A") phageid = querying.get_column(self.metadata, "phage.PhageID") select = querying.build_select(self.graph, phageid, where=where_clause) results = querying.first_column(self.engine, select) self.assertTrue(isinstance(results, list)) self.assertTrue(isinstance(results[0], str))
def test_first_column_2(self): """Verify first_column() retrieves expected data. """ where_clause = querying.build_where_clause(self.graph, "phage.Cluster=A") phageid = querying.get_column(self.metadata, "phage.PhageID") select = querying.build_select(self.graph, phageid, where=where_clause) results = querying.first_column(self.engine, select) self.assertTrue("Trixie" in results) self.assertTrue("D29" in results) self.assertFalse("Myrna" in results)
def get_column(self, raw_column): """Converts a column input, string or Column, to a Column. :param raw_column: SQLAlchemy Column object or object name. :type raw_column: Column :type raw_column: str """ self.check() if isinstance(raw_column, str): column = q.get_column(self.graph.graph["metadata"], raw_column) elif type(raw_column) in COLUMN_TYPES: column = raw_column else: raise TypeError( "Column must be either a string or a Column object") return column
def test_execute_value_subqueries(self): """Verify execute_value_subqueries() retrieves expected data. """ where_clause = querying.build_where_clause(self.graph, "phage.Cluster=A") phage_table = querying.get_table(self.metadata, "phage") phageid = querying.get_column(self.metadata, "phage.PhageID") select = querying.build_select(self.graph, phage_table, where=where_clause) results = querying.execute_value_subqueries( self.engine, select, phageid, ["Trixie", "D29", "Alice", "Myrna"], limit=2) for result in results: self.assertEqual(result["Cluster"], "A")
def group(self, column): self.check() if isinstance(column, str): column = q.get_column(self.graph.graph["metadata"], column) elif isinstance(column, Column): pass else: raise TypeError groups = self.transpose(column) group_results = {} for group in groups: where_clause = (column == group) values = self.build_values(where=[where_clause]) group_results.update({group: values}) return group_results
def get_sort_columns(alchemist, sort_inputs): """Function that converts input for sorting to SQLAlchemy Columns. :param alchemist: A connected and fully build AlchemyHandler object. :type alchemist: AlchemyHandler :param sort_inputs: A list of supported MySQL column names. :type sort_inputs: list[str] :returns: A list of SQLAlchemy Column objects. :rtype: list[Column] """ sort_columns = [] for sort_input in sort_inputs: try: sort_column = querying.get_column(alchemist.metadata, sort_input) except ValueError: print("Error occured while selecting sort columns.") print(f"Column inputted, '{sort_input}', is invalid.") sys.exit(1) finally: sort_columns.append(sort_column) return sort_columns
def test_get_column_3(self): """Verify get_column() raises ValueError from invalid column name. """ with self.assertRaises(ValueError): querying.get_column(self.metadata, "gene.not_a_column")
def test_get_column_2(self): """Verify get_column() retrieves correct Column. """ self.assertEqual(querying.get_column(self.metadata, "GENE.GENEID"), self.GeneID)
def test_get_column_2(self, GetTable): GetTable.return_value = self.phage querying.get_column(self.metadata, "phage.PhageID") GetTable.assert_called_with(self.metadata, "phage")
def test_get_column_3(self, TranslateColumn): TranslateColumn.return_value = "PhageID" querying.get_column(self.metadata, "phage.PhageID") TranslateColumn.assert_called_with(self.metadata, "phage.PhageID")
def test_get_column_4(self): """Verify get_column() returns the correct Column object. """ column_obj = querying.get_column(self.metadata, "phage.PhageID") self.assertEqual(column_obj, self.PhageID)
def filter_csv_columns(alchemist, table, include_columns=[], exclude_columns=[], sequence_columns=False): """Function that filters and constructs a list of Columns to select. :param alchemist: A connected and fully built AlchemyHandler object. :type alchemist: AlchemyHandler :param table: MySQL table name. :type table: str :param include_columns: A list of supported MySQL column names. :type include_columns: list[str] :param exclude_columns: A list of supported MySQL column names. :type exclude_columns: list[str] :param sequence_columns: A boolean to toggle inclusion of sequence data. :type sequence_columns: bool :returns: A list of SQLAlchemy Column objects. :rtype: list[Column] """ table_obj = alchemist.metadata.tables[table] starting_columns = list(table_obj.columns) primary_key = list(table_obj.primary_key.columns)[0] include_column_objs = starting_columns for column in include_columns: try: column_obj = querying.get_column(alchemist.metadata, column) except ValueError: print("Error occured while selecting csv columns.") print(f"Column inputted, '{column}', is invalid.") sys.exit(1) finally: if column_obj not in include_column_objs: include_column_objs.append(column_obj) sequence_column_objs = [] if not sequence_columns: for sequence_column in SEQUENCE_COLUMNS[table]: sequence_column_obj = dict(table_obj.c)[sequence_column] sequence_column_objs.append(sequence_column_obj) exclude_column_objs = sequence_column_objs for column in exclude_columns: try: column_obj = querying.get_column(alchemist.metadata, column) except ValueError: print("Error occured while selecting csv columns.") print(f"Column inputted, '{column}', is invalid.") sys.exit(1) finally: exclude_column_objs.append(column_obj) if column_obj.compare(primary_key): print(f"Primary key to {table} cannot be excluded") sys.exit(1) if column_obj not in exclude_column_objs: exclude_column_objs.append(column_obj) columns = [] for column_obj in include_column_objs: if column_obj not in exclude_column_objs: columns.append(column_obj) return columns
def test_get_column_4(self): column_obj = querying.get_column(self.metadata, "phage.PhageID") self.assertEqual(column_obj, self.PhageID)