Esempio n. 1
0
    def _to_dict(self):
        r"""Returns the categories and their values in a dictionary

        Returns
        -------
        dict of {str: str}
            A dictionary of the form {category: value}
        """
        conn_handler = SQLConnectionHandler()
        d = dict(conn_handler.execute_fetchone(
            "SELECT * FROM qiita.{0} WHERE {1}=%s AND "
            "sample_id=%s".format(self._table, self._id_column),
            (self._md_template.id, self._id)))
        dynamic_d = dict(conn_handler.execute_fetchone(
            "SELECT * from qiita.{0} WHERE "
            "sample_id=%s".format(self._dynamic_table),
            (self._id, )))
        d.update(dynamic_d)
        del d['sample_id']
        del d[self._id_column]
        d.pop('study_id', None)

        # Modify all the *_id columns to include the string instead of the id
        for k, v in viewitems(self._md_template.translate_cols_dict):
            d[v] = self._md_template.str_cols_handlers[k][d[k]]
            del d[k]
        return d
Esempio n. 2
0
    def __getitem__(self, key):
        r"""Returns the value of the metadata category `key`

        Parameters
        ----------
        key : str
            The metadata category

        Returns
        -------
        obj
            The value of the metadata category `key`

        Raises
        ------
        KeyError
            If the metadata category `key` does not exists

        See Also
        --------
        get
        """
        conn_handler = SQLConnectionHandler()
        key = key.lower()
        if key in self._get_categories(conn_handler):
            # It's possible that the key is asking for one of the *_id columns
            # that we have to do the translation
            def handler(x):
                return x

            # prevent flake8 from complaining about the function not being
            # used and a redefinition happening in the next few lines
            handler(None)

            if key in self._md_template.translate_cols_dict.values():
                handler = (
                    lambda x: self._md_template.str_cols_handlers[key][x])
                key = "%s_id" % key
            # Check if we have either to query the table with required columns
            # or the dynamic table
            if key in get_table_cols(self._table, conn_handler):
                result = conn_handler.execute_fetchone(
                    "SELECT {0} FROM qiita.{1} WHERE {2}=%s AND "
                    "sample_id=%s".format(key, self._table, self._id_column),
                    (self._md_template.id, self._id))[0]
                return handler(result)
            else:
                return conn_handler.execute_fetchone(
                    "SELECT {0} FROM qiita.{1} WHERE "
                    "sample_id=%s".format(key, self._dynamic_table),
                    (self._id, ))[0]
        else:
            # The key is not available for the sample, so raise a KeyError
            raise KeyError("Metadata category %s does not exists for sample %s"
                           " in template %d" %
                           (key, self._id, self._md_template.id))
Esempio n. 3
0
    def __getitem__(self, key):
        r"""Returns the value of the metadata category `key`

        Parameters
        ----------
        key : str
            The metadata category

        Returns
        -------
        obj
            The value of the metadata category `key`

        Raises
        ------
        KeyError
            If the metadata category `key` does not exists

        See Also
        --------
        get
        """
        conn_handler = SQLConnectionHandler()
        key = key.lower()
        if key not in self._get_categories(conn_handler):
            # The key is not available for the sample, so raise a KeyError
            raise KeyError("Metadata category %s does not exists for sample %s"
                           " in template %d" %
                           (key, self._id, self._md_template.id))

        sql = """SELECT {0} FROM qiita.{1}
                 WHERE sample_id=%s""".format(key, self._dynamic_table)

        return conn_handler.execute_fetchone(sql, (self._id, ))[0]
Esempio n. 4
0
    def class_modifier(cls):
        # First, we check that we are not in a production environment
        conn_handler = SQLConnectionHandler()
        # It is possible that we are connecting to a production database
        test_db = conn_handler.execute_fetchone("SELECT test FROM settings")[0]
        # Or the loaded configuration file belongs to a production environment
        # or the test database is not qiita_test
        if not qiita_config.test_environment or not test_db \
                or qiita_config.database != 'qiita_test':
            raise RuntimeError("Working in a production environment. Not "
                               "executing the tests to keep the production "
                               "database safe.")

        # Now, we decorate the setup and teardown functions
        class DecoratedClass(cls):
            @build_test_database
            def setUp(self):
                super(DecoratedClass, self).setUp()
                self.conn_handler = SQLConnectionHandler()

            @drop_test_database
            def tearDown(self):
                super(DecoratedClass, self).tearDown()
                del self.conn_handler

        return DecoratedClass
Esempio n. 5
0
    def class_modifier(cls):
        # First, we check that we are not in a production environment
        conn_handler = SQLConnectionHandler()
        # It is possible that we are connecting to a production database
        test_db = conn_handler.execute_fetchone("SELECT test FROM settings")[0]
        # Or the loaded configuration file belongs to a production environment
        # or the test database is not qiita_test
        if not qiita_config.test_environment or not test_db \
                or qiita_config.database != 'qiita_test':
            raise RuntimeError("Working in a production environment. Not "
                               "executing the tests to keep the production "
                               "database safe.")

        # Now, we decorate the setup and teardown functions
        class DecoratedClass(cls):
            def setUp(self):
                super(DecoratedClass, self).setUp()
                self.conn_handler = SQLConnectionHandler()

            @reset_test_database
            def tearDown(self):
                super(DecoratedClass, self).tearDown()
                del self.conn_handler

        return DecoratedClass
Esempio n. 6
0
 def raw_data(self):
     conn_handler = SQLConnectionHandler()
     result = conn_handler.execute_fetchone(
         "SELECT raw_data_id FROM qiita.prep_template "
         "WHERE prep_template_id=%s", (self.id,))
     if result:
         return result[0]
     return None
Esempio n. 7
0
    def _check_id(self, id_):
        r"""Checks that the MetadataTemplate id_ exists on the database"""
        self._check_subclass()

        conn_handler = SQLConnectionHandler()

        return conn_handler.execute_fetchone(
            "SELECT EXISTS(SELECT * FROM qiita.{0} WHERE "
            "{1}=%s)".format(self._table, self._id_column),
            (id_, ))[0]
Esempio n. 8
0
    def delete(cls, id_):
        r"""Deletes the table from the database

        Parameters
        ----------
        id_ : obj
            The object identifier

        Raises
        ------
        QiitaDBExecutionError
            If the prep template already has a preprocessed data
        QiitaDBUnknownIDError
            If no prep template with id = id_ exists
        """
        table_name = cls._table_name(id_)
        conn_handler = SQLConnectionHandler()

        if not cls.exists(id_):
            raise QiitaDBUnknownIDError(id_, cls.__name__)

        preprocessed_data_exists = conn_handler.execute_fetchone(
            "SELECT EXISTS(SELECT * FROM qiita.prep_template_preprocessed_data"
            " WHERE prep_template_id=%s)", (id_,))[0]

        if preprocessed_data_exists:
            raise QiitaDBExecutionError("Cannot remove prep template %d "
                                        "because a preprocessed data has been"
                                        " already generated using it." % id_)

        # Delete the prep template filepaths
        conn_handler.execute(
            "DELETE FROM qiita.prep_template_filepath WHERE "
            "prep_template_id = %s", (id_, ))

        # Drop the prep_X table
        conn_handler.execute(
            "DROP TABLE qiita.{0}".format(table_name))

        # Remove the rows from common_prep_info
        conn_handler.execute(
            "DELETE FROM qiita.{0} where {1} = %s".format(cls._table,
                                                          cls._id_column),
            (id_,))

        # Remove the rows from prep_columns
        conn_handler.execute(
            "DELETE FROM qiita.{0} where {1} = %s".format(cls._column_table,
                                                          cls._id_column),
            (id_,))

        # Remove the row from prep_template
        conn_handler.execute(
            "DELETE FROM qiita.prep_template where "
            "{0} = %s".format(cls._id_column), (id_,))
Esempio n. 9
0
    def preprocessing_status(self):
        r"""Tells if the data has been preprocessed or not

        Returns
        -------
        str
            One of {'not_preprocessed', 'preprocessing', 'success', 'failed'}
        """
        conn_handler = SQLConnectionHandler()
        return conn_handler.execute_fetchone(
            "SELECT preprocessing_status FROM qiita.prep_template "
            "WHERE {0}=%s".format(self._id_column), (self.id,))[0]
Esempio n. 10
0
    def __setitem__(self, column, value):
        r"""Sets the metadata value for the category `column`

        Parameters
        ----------
        column : str
            The column to update
        value : str
            The value to set. This is expected to be a str on the assumption
            that psycopg2 will cast as necessary when updating.

        Raises
        ------
        ValueError
            If the value type does not match the one in the DB
        """
        conn_handler = SQLConnectionHandler()
        queue_name = "set_item_%s" % self._id
        conn_handler.create_queue(queue_name)

        self.add_setitem_queries(column, value, conn_handler, queue_name)

        try:
            conn_handler.execute_queue(queue_name)
        except QiitaDBExecutionError as e:
            # catching error so we can check if the error is due to different
            # column type or something else
            type_lookup = defaultdict(lambda: 'varchar')
            type_lookup[int] = 'integer'
            type_lookup[float] = 'float8'
            type_lookup[str] = 'varchar'
            value_type = type_lookup[type(value)]

            sql = """SELECT udt_name
                     FROM information_schema.columns
                     WHERE column_name = %s
                        AND table_schema = 'qiita'
                        AND (table_name = %s OR table_name = %s)"""
            column_type = conn_handler.execute_fetchone(
                sql, (column, self._table, self._dynamic_table))

            if column_type != value_type:
                raise ValueError(
                    'The new value being added to column: "{0}" is "{1}" '
                    '(type: "{2}"). However, this column in the DB is of '
                    'type "{3}". Please change the value in your updated '
                    'template or reprocess your template.'.format(
                        column, value, value_type, column_type))

            raise e
Esempio n. 11
0
 def raw_data(self, raw_data):
     conn_handler = SQLConnectionHandler()
     sql = """SELECT (
                 SELECT raw_data_id
                 FROM qiita.prep_template
                 WHERE prep_template_id=%s)
             IS NOT NULL"""
     exists = conn_handler.execute_fetchone(sql, (self.id,))[0]
     if exists:
         raise QiitaDBError(
             "Prep template %d already has a raw data associated"
             % self.id)
     sql = """UPDATE qiita.prep_template
              SET raw_data_id = %s
              WHERE prep_template_id = %s"""
     conn_handler.execute(sql, (raw_data.id, self.id))
Esempio n. 12
0
    def study_id(self):
        """Gets the study id with which this prep template is associated

        Returns
        -------
        int
            The ID of the study with which this prep template is associated
        """
        conn = SQLConnectionHandler()
        sql = ("SELECT study_id FROM qiita.study_prep_template "
               "WHERE prep_template_id=%s")
        study_id = conn.execute_fetchone(sql, (self.id,))
        if study_id:
            return study_id[0]
        else:
            raise QiitaDBError("No studies found associated with prep "
                               "template ID %d" % self._id)
Esempio n. 13
0
    def _to_dict(self):
        r"""Returns the categories and their values in a dictionary

        Returns
        -------
        dict of {str: str}
            A dictionary of the form {category: value}
        """
        conn_handler = SQLConnectionHandler()
        d = dict(conn_handler.execute_fetchone(
            "SELECT * from qiita.{0} WHERE "
            "sample_id=%s".format(self._dynamic_table),
            (self._id, )))

        # Remove the sample_id, is not part of the metadata
        del d['sample_id']

        return d
Esempio n. 14
0
    def data_type(self, ret_id=False):
        """Returns the data_type or the data_type id

        Parameters
        ----------
        ret_id : bool, optional
            If true, return the id instead of the string, default false.

        Returns
        -------
        str or int
            string value of data_type or data_type_id if ret_id is True
        """
        ret = "_id" if ret_id else ""
        conn_handler = SQLConnectionHandler()
        return conn_handler.execute_fetchone(
            "SELECT d.data_type{0} FROM qiita.data_type d JOIN "
            "qiita.prep_template p ON p.data_type_id = d.data_type_id WHERE "
            "p.prep_template_id=%s".format(ret), (self.id,))[0]
Esempio n. 15
0
    def exists(cls, sample_id, md_template):
        r"""Checks if already exists a MetadataTemplate for the provided object

        Parameters
        ----------
        sample_id : str
            The sample id
        md_template : MetadataTemplate
            The metadata template to which the sample belongs to

        Returns
        -------
        bool
            True if already exists. False otherwise.
        """
        cls._check_subclass()
        conn_handler = SQLConnectionHandler()
        return conn_handler.execute_fetchone(
            "SELECT EXISTS(SELECT * FROM qiita.{0} WHERE sample_id=%s AND "
            "{1}=%s)".format(cls._table, cls._id_column),
            (sample_id, md_template.id))[0]
Esempio n. 16
0
    def create(cls, md_template, raw_data, study, data_type,
               investigation_type=None):
        r"""Creates the metadata template in the database

        Parameters
        ----------
        md_template : DataFrame
            The metadata template file contents indexed by samples Ids
        raw_data : RawData
            The raw_data to which the prep template belongs to.
        study : Study
            The study to which the prep template belongs to.
        data_type : str or int
            The data_type of the prep template
        investigation_type : str, optional
            The investigation type, if relevant

        Returns
        -------
        A new instance of `cls` to access to the PrepTemplate stored in the DB

        Raises
        ------
        QiitaDBColumnError
            If the investigation_type is not valid
            If a required column is missing in md_template
        """
        # If the investigation_type is supplied, make sure it is one of
        # the recognized investigation types
        if investigation_type is not None:
            cls.validate_investigation_type(investigation_type)

        # Get a connection handler
        conn_handler = SQLConnectionHandler()
        queue_name = "CREATE_PREP_TEMPLATE_%d" % raw_data.id
        conn_handler.create_queue(queue_name)

        # Check if the data_type is the id or the string
        if isinstance(data_type, (int, long)):
            data_type_id = data_type
            data_type_str = convert_from_id(data_type, "data_type",
                                            conn_handler)
        else:
            data_type_id = convert_to_id(data_type, "data_type", conn_handler)
            data_type_str = data_type

        md_template = cls._clean_validate_template(md_template, study.id,
                                                   data_type_str, conn_handler)

        # Insert the metadata template
        # We need the prep_id for multiple calls below, which currently is not
        # supported by the queue system. Thus, executing this outside the queue
        prep_id = conn_handler.execute_fetchone(
            "INSERT INTO qiita.prep_template (data_type_id, raw_data_id, "
            "investigation_type) VALUES (%s, %s, %s) RETURNING "
            "prep_template_id", (data_type_id, raw_data.id,
                                 investigation_type))[0]

        cls._add_common_creation_steps_to_queue(md_template, prep_id,
                                                conn_handler, queue_name)

        try:
            conn_handler.execute_queue(queue_name)
        except Exception:
            # Clean up row from qiita.prep_template
            conn_handler.execute(
                "DELETE FROM qiita.prep_template where "
                "{0} = %s".format(cls._id_column), (prep_id,))

            # Check if sample IDs present here but not in sample template
            sql = ("SELECT sample_id from qiita.required_sample_info WHERE "
                   "study_id = %s")
            # Get list of study sample IDs, prep template study IDs,
            # and their intersection
            prep_samples = set(md_template.index.values)
            unknown_samples = prep_samples.difference(
                s[0] for s in conn_handler.execute_fetchall(sql, [study.id]))
            if unknown_samples:
                raise QiitaDBExecutionError(
                    'Samples found in prep template but not sample template: '
                    '%s' % ', '.join(unknown_samples))

            # some other error we haven't seen before so raise it
            raise

        pt = cls(prep_id)
        pt.generate_files()

        return pt
Esempio n. 17
0
 def investigation_type(self):
     conn_handler = SQLConnectionHandler()
     sql = ("SELECT investigation_type FROM qiita.prep_template "
            "WHERE {0} = %s".format(self._id_column))
     return conn_handler.execute_fetchone(sql, [self._id])[0]
Esempio n. 18
0
 def postgres_test(**kwargs):
     """Open a connection and query postgres"""
     from qiita_db.sql_connection import SQLConnectionHandler
     c = SQLConnectionHandler()
     return c.execute_fetchone("SELECT 42")[0]
Esempio n. 19
0
 def postgres_test(**kwargs):
     """Open a connection and query postgres"""
     from qiita_db.sql_connection import SQLConnectionHandler
     c = SQLConnectionHandler()
     return c.execute_fetchone("SELECT 42")[0]
Esempio n. 20
0
    def update_category(self, category, samples_and_values):
        """Update an existing column

        Parameters
        ----------
        category : str
            The category to update
        samples_and_values : dict
            A mapping of {sample_id: value}

        Raises
        ------
        QiitaDBUnknownIDError
            If a sample_id is included in values that is not in the template
        QiitaDBColumnError
            If the column does not exist in the table. This is implicit, and
            can be thrown by the contained Samples.
        ValueError
            If one of the new values cannot be inserted in the DB due to
            different types
        """
        if not set(self.keys()).issuperset(samples_and_values):
            missing = set(self.keys()) - set(samples_and_values)
            table_name = self._table_name(self._id)
            raise QiitaDBUnknownIDError(missing, table_name)

        conn_handler = SQLConnectionHandler()
        queue_name = "update_category_%s_%s" % (self._id, category)
        conn_handler.create_queue(queue_name)

        for k, v in viewitems(samples_and_values):
            sample = self[k]
            sample.add_setitem_queries(category, v, conn_handler, queue_name)

        try:
            conn_handler.execute_queue(queue_name)
        except QiitaDBExecutionError as e:
            # catching error so we can check if the error is due to different
            # column type or something else
            type_lookup = defaultdict(lambda: 'varchar')
            type_lookup[int] = 'integer'
            type_lookup[float] = 'float8'
            type_lookup[str] = 'varchar'
            value_types = set(type_lookup[type(value)]
                              for value in viewvalues(samples_and_values))

            sql = """SELECT udt_name
                     FROM information_schema.columns
                     WHERE column_name = %s
                        AND table_schema = 'qiita'
                        AND (table_name = %s OR table_name = %s)"""
            column_type = conn_handler.execute_fetchone(
                sql, (category, self._table, self._table_name(self._id)))

            if any([column_type != vt for vt in value_types]):
                value_str = ', '.join(
                    [str(value) for value in viewvalues(samples_and_values)])
                value_types_str = ', '.join(value_types)

                raise ValueError(
                    'The new values being added to column: "%s" are "%s" '
                    '(types: "%s"). However, this column in the DB is of '
                    'type "%s". Please change the values in your updated '
                    'template or reprocess your template.'
                    % (category, value_str, value_types_str, column_type))

            raise e