def _to_dict(self): r"""Returns the categories and their values in a dictionary Returns ------- dict of {str: str} A dictionary of the form {category: value} """ conn_handler = SQLConnectionHandler() d = dict(conn_handler.execute_fetchone( "SELECT * FROM qiita.{0} WHERE {1}=%s AND " "sample_id=%s".format(self._table, self._id_column), (self._md_template.id, self._id))) dynamic_d = dict(conn_handler.execute_fetchone( "SELECT * from qiita.{0} WHERE " "sample_id=%s".format(self._dynamic_table), (self._id, ))) d.update(dynamic_d) del d['sample_id'] del d[self._id_column] d.pop('study_id', None) # Modify all the *_id columns to include the string instead of the id for k, v in viewitems(self._md_template.translate_cols_dict): d[v] = self._md_template.str_cols_handlers[k][d[k]] del d[k] return d
def __getitem__(self, key): r"""Returns the value of the metadata category `key` Parameters ---------- key : str The metadata category Returns ------- obj The value of the metadata category `key` Raises ------ KeyError If the metadata category `key` does not exists See Also -------- get """ conn_handler = SQLConnectionHandler() key = key.lower() if key in self._get_categories(conn_handler): # It's possible that the key is asking for one of the *_id columns # that we have to do the translation def handler(x): return x # prevent flake8 from complaining about the function not being # used and a redefinition happening in the next few lines handler(None) if key in self._md_template.translate_cols_dict.values(): handler = ( lambda x: self._md_template.str_cols_handlers[key][x]) key = "%s_id" % key # Check if we have either to query the table with required columns # or the dynamic table if key in get_table_cols(self._table, conn_handler): result = conn_handler.execute_fetchone( "SELECT {0} FROM qiita.{1} WHERE {2}=%s AND " "sample_id=%s".format(key, self._table, self._id_column), (self._md_template.id, self._id))[0] return handler(result) else: return conn_handler.execute_fetchone( "SELECT {0} FROM qiita.{1} WHERE " "sample_id=%s".format(key, self._dynamic_table), (self._id, ))[0] else: # The key is not available for the sample, so raise a KeyError raise KeyError("Metadata category %s does not exists for sample %s" " in template %d" % (key, self._id, self._md_template.id))
def __getitem__(self, key): r"""Returns the value of the metadata category `key` Parameters ---------- key : str The metadata category Returns ------- obj The value of the metadata category `key` Raises ------ KeyError If the metadata category `key` does not exists See Also -------- get """ conn_handler = SQLConnectionHandler() key = key.lower() if key not in self._get_categories(conn_handler): # The key is not available for the sample, so raise a KeyError raise KeyError("Metadata category %s does not exists for sample %s" " in template %d" % (key, self._id, self._md_template.id)) sql = """SELECT {0} FROM qiita.{1} WHERE sample_id=%s""".format(key, self._dynamic_table) return conn_handler.execute_fetchone(sql, (self._id, ))[0]
def class_modifier(cls): # First, we check that we are not in a production environment conn_handler = SQLConnectionHandler() # It is possible that we are connecting to a production database test_db = conn_handler.execute_fetchone("SELECT test FROM settings")[0] # Or the loaded configuration file belongs to a production environment # or the test database is not qiita_test if not qiita_config.test_environment or not test_db \ or qiita_config.database != 'qiita_test': raise RuntimeError("Working in a production environment. Not " "executing the tests to keep the production " "database safe.") # Now, we decorate the setup and teardown functions class DecoratedClass(cls): @build_test_database def setUp(self): super(DecoratedClass, self).setUp() self.conn_handler = SQLConnectionHandler() @drop_test_database def tearDown(self): super(DecoratedClass, self).tearDown() del self.conn_handler return DecoratedClass
def class_modifier(cls): # First, we check that we are not in a production environment conn_handler = SQLConnectionHandler() # It is possible that we are connecting to a production database test_db = conn_handler.execute_fetchone("SELECT test FROM settings")[0] # Or the loaded configuration file belongs to a production environment # or the test database is not qiita_test if not qiita_config.test_environment or not test_db \ or qiita_config.database != 'qiita_test': raise RuntimeError("Working in a production environment. Not " "executing the tests to keep the production " "database safe.") # Now, we decorate the setup and teardown functions class DecoratedClass(cls): def setUp(self): super(DecoratedClass, self).setUp() self.conn_handler = SQLConnectionHandler() @reset_test_database def tearDown(self): super(DecoratedClass, self).tearDown() del self.conn_handler return DecoratedClass
def raw_data(self): conn_handler = SQLConnectionHandler() result = conn_handler.execute_fetchone( "SELECT raw_data_id FROM qiita.prep_template " "WHERE prep_template_id=%s", (self.id,)) if result: return result[0] return None
def _check_id(self, id_): r"""Checks that the MetadataTemplate id_ exists on the database""" self._check_subclass() conn_handler = SQLConnectionHandler() return conn_handler.execute_fetchone( "SELECT EXISTS(SELECT * FROM qiita.{0} WHERE " "{1}=%s)".format(self._table, self._id_column), (id_, ))[0]
def delete(cls, id_): r"""Deletes the table from the database Parameters ---------- id_ : obj The object identifier Raises ------ QiitaDBExecutionError If the prep template already has a preprocessed data QiitaDBUnknownIDError If no prep template with id = id_ exists """ table_name = cls._table_name(id_) conn_handler = SQLConnectionHandler() if not cls.exists(id_): raise QiitaDBUnknownIDError(id_, cls.__name__) preprocessed_data_exists = conn_handler.execute_fetchone( "SELECT EXISTS(SELECT * FROM qiita.prep_template_preprocessed_data" " WHERE prep_template_id=%s)", (id_,))[0] if preprocessed_data_exists: raise QiitaDBExecutionError("Cannot remove prep template %d " "because a preprocessed data has been" " already generated using it." % id_) # Delete the prep template filepaths conn_handler.execute( "DELETE FROM qiita.prep_template_filepath WHERE " "prep_template_id = %s", (id_, )) # Drop the prep_X table conn_handler.execute( "DROP TABLE qiita.{0}".format(table_name)) # Remove the rows from common_prep_info conn_handler.execute( "DELETE FROM qiita.{0} where {1} = %s".format(cls._table, cls._id_column), (id_,)) # Remove the rows from prep_columns conn_handler.execute( "DELETE FROM qiita.{0} where {1} = %s".format(cls._column_table, cls._id_column), (id_,)) # Remove the row from prep_template conn_handler.execute( "DELETE FROM qiita.prep_template where " "{0} = %s".format(cls._id_column), (id_,))
def preprocessing_status(self): r"""Tells if the data has been preprocessed or not Returns ------- str One of {'not_preprocessed', 'preprocessing', 'success', 'failed'} """ conn_handler = SQLConnectionHandler() return conn_handler.execute_fetchone( "SELECT preprocessing_status FROM qiita.prep_template " "WHERE {0}=%s".format(self._id_column), (self.id,))[0]
def __setitem__(self, column, value): r"""Sets the metadata value for the category `column` Parameters ---------- column : str The column to update value : str The value to set. This is expected to be a str on the assumption that psycopg2 will cast as necessary when updating. Raises ------ ValueError If the value type does not match the one in the DB """ conn_handler = SQLConnectionHandler() queue_name = "set_item_%s" % self._id conn_handler.create_queue(queue_name) self.add_setitem_queries(column, value, conn_handler, queue_name) try: conn_handler.execute_queue(queue_name) except QiitaDBExecutionError as e: # catching error so we can check if the error is due to different # column type or something else type_lookup = defaultdict(lambda: 'varchar') type_lookup[int] = 'integer' type_lookup[float] = 'float8' type_lookup[str] = 'varchar' value_type = type_lookup[type(value)] sql = """SELECT udt_name FROM information_schema.columns WHERE column_name = %s AND table_schema = 'qiita' AND (table_name = %s OR table_name = %s)""" column_type = conn_handler.execute_fetchone( sql, (column, self._table, self._dynamic_table)) if column_type != value_type: raise ValueError( 'The new value being added to column: "{0}" is "{1}" ' '(type: "{2}"). However, this column in the DB is of ' 'type "{3}". Please change the value in your updated ' 'template or reprocess your template.'.format( column, value, value_type, column_type)) raise e
def raw_data(self, raw_data): conn_handler = SQLConnectionHandler() sql = """SELECT ( SELECT raw_data_id FROM qiita.prep_template WHERE prep_template_id=%s) IS NOT NULL""" exists = conn_handler.execute_fetchone(sql, (self.id,))[0] if exists: raise QiitaDBError( "Prep template %d already has a raw data associated" % self.id) sql = """UPDATE qiita.prep_template SET raw_data_id = %s WHERE prep_template_id = %s""" conn_handler.execute(sql, (raw_data.id, self.id))
def study_id(self): """Gets the study id with which this prep template is associated Returns ------- int The ID of the study with which this prep template is associated """ conn = SQLConnectionHandler() sql = ("SELECT study_id FROM qiita.study_prep_template " "WHERE prep_template_id=%s") study_id = conn.execute_fetchone(sql, (self.id,)) if study_id: return study_id[0] else: raise QiitaDBError("No studies found associated with prep " "template ID %d" % self._id)
def _to_dict(self): r"""Returns the categories and their values in a dictionary Returns ------- dict of {str: str} A dictionary of the form {category: value} """ conn_handler = SQLConnectionHandler() d = dict(conn_handler.execute_fetchone( "SELECT * from qiita.{0} WHERE " "sample_id=%s".format(self._dynamic_table), (self._id, ))) # Remove the sample_id, is not part of the metadata del d['sample_id'] return d
def data_type(self, ret_id=False): """Returns the data_type or the data_type id Parameters ---------- ret_id : bool, optional If true, return the id instead of the string, default false. Returns ------- str or int string value of data_type or data_type_id if ret_id is True """ ret = "_id" if ret_id else "" conn_handler = SQLConnectionHandler() return conn_handler.execute_fetchone( "SELECT d.data_type{0} FROM qiita.data_type d JOIN " "qiita.prep_template p ON p.data_type_id = d.data_type_id WHERE " "p.prep_template_id=%s".format(ret), (self.id,))[0]
def exists(cls, sample_id, md_template): r"""Checks if already exists a MetadataTemplate for the provided object Parameters ---------- sample_id : str The sample id md_template : MetadataTemplate The metadata template to which the sample belongs to Returns ------- bool True if already exists. False otherwise. """ cls._check_subclass() conn_handler = SQLConnectionHandler() return conn_handler.execute_fetchone( "SELECT EXISTS(SELECT * FROM qiita.{0} WHERE sample_id=%s AND " "{1}=%s)".format(cls._table, cls._id_column), (sample_id, md_template.id))[0]
def create(cls, md_template, raw_data, study, data_type, investigation_type=None): r"""Creates the metadata template in the database Parameters ---------- md_template : DataFrame The metadata template file contents indexed by samples Ids raw_data : RawData The raw_data to which the prep template belongs to. study : Study The study to which the prep template belongs to. data_type : str or int The data_type of the prep template investigation_type : str, optional The investigation type, if relevant Returns ------- A new instance of `cls` to access to the PrepTemplate stored in the DB Raises ------ QiitaDBColumnError If the investigation_type is not valid If a required column is missing in md_template """ # If the investigation_type is supplied, make sure it is one of # the recognized investigation types if investigation_type is not None: cls.validate_investigation_type(investigation_type) # Get a connection handler conn_handler = SQLConnectionHandler() queue_name = "CREATE_PREP_TEMPLATE_%d" % raw_data.id conn_handler.create_queue(queue_name) # Check if the data_type is the id or the string if isinstance(data_type, (int, long)): data_type_id = data_type data_type_str = convert_from_id(data_type, "data_type", conn_handler) else: data_type_id = convert_to_id(data_type, "data_type", conn_handler) data_type_str = data_type md_template = cls._clean_validate_template(md_template, study.id, data_type_str, conn_handler) # Insert the metadata template # We need the prep_id for multiple calls below, which currently is not # supported by the queue system. Thus, executing this outside the queue prep_id = conn_handler.execute_fetchone( "INSERT INTO qiita.prep_template (data_type_id, raw_data_id, " "investigation_type) VALUES (%s, %s, %s) RETURNING " "prep_template_id", (data_type_id, raw_data.id, investigation_type))[0] cls._add_common_creation_steps_to_queue(md_template, prep_id, conn_handler, queue_name) try: conn_handler.execute_queue(queue_name) except Exception: # Clean up row from qiita.prep_template conn_handler.execute( "DELETE FROM qiita.prep_template where " "{0} = %s".format(cls._id_column), (prep_id,)) # Check if sample IDs present here but not in sample template sql = ("SELECT sample_id from qiita.required_sample_info WHERE " "study_id = %s") # Get list of study sample IDs, prep template study IDs, # and their intersection prep_samples = set(md_template.index.values) unknown_samples = prep_samples.difference( s[0] for s in conn_handler.execute_fetchall(sql, [study.id])) if unknown_samples: raise QiitaDBExecutionError( 'Samples found in prep template but not sample template: ' '%s' % ', '.join(unknown_samples)) # some other error we haven't seen before so raise it raise pt = cls(prep_id) pt.generate_files() return pt
def investigation_type(self): conn_handler = SQLConnectionHandler() sql = ("SELECT investigation_type FROM qiita.prep_template " "WHERE {0} = %s".format(self._id_column)) return conn_handler.execute_fetchone(sql, [self._id])[0]
def postgres_test(**kwargs): """Open a connection and query postgres""" from qiita_db.sql_connection import SQLConnectionHandler c = SQLConnectionHandler() return c.execute_fetchone("SELECT 42")[0]
def update_category(self, category, samples_and_values): """Update an existing column Parameters ---------- category : str The category to update samples_and_values : dict A mapping of {sample_id: value} Raises ------ QiitaDBUnknownIDError If a sample_id is included in values that is not in the template QiitaDBColumnError If the column does not exist in the table. This is implicit, and can be thrown by the contained Samples. ValueError If one of the new values cannot be inserted in the DB due to different types """ if not set(self.keys()).issuperset(samples_and_values): missing = set(self.keys()) - set(samples_and_values) table_name = self._table_name(self._id) raise QiitaDBUnknownIDError(missing, table_name) conn_handler = SQLConnectionHandler() queue_name = "update_category_%s_%s" % (self._id, category) conn_handler.create_queue(queue_name) for k, v in viewitems(samples_and_values): sample = self[k] sample.add_setitem_queries(category, v, conn_handler, queue_name) try: conn_handler.execute_queue(queue_name) except QiitaDBExecutionError as e: # catching error so we can check if the error is due to different # column type or something else type_lookup = defaultdict(lambda: 'varchar') type_lookup[int] = 'integer' type_lookup[float] = 'float8' type_lookup[str] = 'varchar' value_types = set(type_lookup[type(value)] for value in viewvalues(samples_and_values)) sql = """SELECT udt_name FROM information_schema.columns WHERE column_name = %s AND table_schema = 'qiita' AND (table_name = %s OR table_name = %s)""" column_type = conn_handler.execute_fetchone( sql, (category, self._table, self._table_name(self._id))) if any([column_type != vt for vt in value_types]): value_str = ', '.join( [str(value) for value in viewvalues(samples_and_values)]) value_types_str = ', '.join(value_types) raise ValueError( 'The new values being added to column: "%s" are "%s" ' '(types: "%s"). However, this column in the DB is of ' 'type "%s". Please change the values in your updated ' 'template or reprocess your template.' % (category, value_str, value_types_str, column_type)) raise e