def test_delete_study(self):
        # as samples have been submitted to EBI, this will fail
        job = self._create_job('delete_study', {'study': 1})
        private_task(job.id)
        self.assertEqual(job.status, 'error')
        self.assertIn(
            "Cannot delete artifact 2: Artifact 2 has been "
            "submitted to EBI", job.log.msg)
        # making sure the analysis, first thing to delete, still exists
        self.assertTrue(Analysis.exists(1))

        # delete everything from the EBI submissions and the processing job so
        # we can try again: test success (with tags)
        with TRN:
            sql = """DELETE FROM qiita.ebi_run_accession"""
            TRN.add(sql)
            sql = """DELETE FROM qiita.artifact_processing_job"""
            TRN.add(sql)
            TRN.execute()

            # adding tags
            Study(1).update_tags(self.user, ['my new tag!'])

            job = self._create_job('delete_study', {'study': 1})
            private_task(job.id)

            self.assertEqual(job.status, 'success')
            with self.assertRaises(QiitaDBUnknownIDError):
                Study(1)
    def test_delete_study(self):
        # as samples have been submitted to EBI, this will fail
        job = self._create_job('delete_study', {'study': 1})
        private_task(job.id)
        self.assertEqual(job.status, 'error')
        self.assertIn("Cannot delete artifact 2: it has been "
                      "submitted to EBI", job.log.msg)

        # delete everything from the EBI submissions and the processing job so
        # we can try again: test success (with tags)
        with TRN:
            sql = """DELETE FROM qiita.ebi_run_accession"""
            TRN.add(sql)
            sql = """DELETE FROM qiita.artifact_processing_job"""
            TRN.add(sql)
            TRN.execute()

            # adding tags
            Study(1).update_tags(self.user, ['my new tag!'])

            job = self._create_job('delete_study', {'study': 1})
            private_task(job.id)

            self.assertEqual(job.status, 'success')
            with self.assertRaises(QiitaDBUnknownIDError):
                Study(1)
    def _common_creation_steps(cls, md_template, obj_id):
        r"""Executes the common creation steps

        Parameters
        ----------
        md_template : DataFrame
            The metadata template file contents indexed by sample ids
        obj_id : int
            The id of the object being created
        """
        with TRN:
            cls._check_subclass()

            # Get some useful information from the metadata template
            sample_ids = md_template.index.tolist()
            headers = sorted(md_template.keys().tolist())

            # Insert values on template_sample table
            values = [[obj_id, s_id] for s_id in sample_ids]
            sql = """INSERT INTO qiita.{0} ({1}, sample_id)
                     VALUES (%s, %s)""".format(cls._table, cls._id_column)
            TRN.add(sql, values, many=True)

            # Insert rows on *_columns table
            datatypes = get_datatypes(md_template.ix[:, headers])
            # psycopg2 requires a list of tuples, in which each tuple is a set
            # of values to use in the string formatting of the query. We have
            # all the values in different lists (but in the same order) so use
            # zip to create the list of tuples that psycopg2 requires.
            values = [[obj_id, h, d] for h, d in zip(headers, datatypes)]
            sql = """INSERT INTO qiita.{0} ({1}, column_name, column_type)
                     VALUES (%s, %s, %s)""".format(cls._column_table,
                                                   cls._id_column)
            TRN.add(sql, values, many=True)

            # Create table with custom columns
            table_name = cls._table_name(obj_id)
            column_datatype = ["%s %s" % (col, dtype)
                               for col, dtype in zip(headers, datatypes)]
            sql = """CREATE TABLE qiita.{0} (
                        sample_id varchar NOT NULL, {1},
                        CONSTRAINT fk_{0} FOREIGN KEY (sample_id)
                            REFERENCES qiita.study_sample (sample_id)
                            ON UPDATE CASCADE
                     )""".format(table_name, ', '.join(column_datatype))
            TRN.add(sql)

            # Insert values on custom table
            values = as_python_types(md_template, headers)
            values.insert(0, sample_ids)
            values = [list(v) for v in zip(*values)]
            sql = """INSERT INTO qiita.{0} (sample_id, {1})
                     VALUES (%s, {2})""".format(
                table_name, ", ".join(headers),
                ', '.join(["%s"] * len(headers)))
            TRN.add(sql, values, many=True)

            # Execute all the steps
            TRN.execute()
Exemple #4
0
    def test_full_query_and_insertion(self):
        # let's archive different values from different jobs
        with TRN:
            # 3 - close reference picking
            # 3 - success
            sql = """SELECT processing_job_id
                     FROM qiita.processing_job
                     WHERE command_id = 3 AND processing_job_status_id = 3"""
            TRN.add(sql)
            jobs = TRN.execute_fetchflatten()

            # this is so we can also tests the parent merging scheme
            # 1 - split libraries
            sql = """UPDATE qiita.command_parameter
                     SET check_biom_merge = True
                     WHERE command_id = 1
                        and parameter_name = 'barcode_type'"""
            TRN.add(sql)
            TRN.execute()

            exp_all_features = {}
            for j in jobs:
                featureA = 'AA - %s' % j
                featureB = 'BB - %s' % j

                # testing that nothing is there
                data = {'job_id': j, 'features': [featureA, featureB]}
                obs = self.post(
                    '/qiita_db/archive/observations/', headers=self.header,
                    data=data)
                exp = {}
                self.assertEqual(obs.code, 200)
                self.assertEqual(loads(obs.body), exp)

                # inserting and testing insertion
                data = {'path': j,
                        'value': dumps({featureA: 'CA', featureB: 'CB'})}
                obs = self.patch(
                    '/qiita_db/archive/observations/', headers=self.header,
                    data=data)
                exp = {featureA: 'CA', featureB: 'CB'}
                self.assertEqual(obs.code, 200)
                self.assertEqual(loads(obs.body), exp)

                exp_all_features[featureA] = 'CA'
                exp_all_features[featureB] = 'CB'

            # testing retrieve all featues
            obs = Archive.retrieve_feature_values()
            self.assertEqual(obs, exp_all_features)

            # this doesn't exist so should be empty
            obs = Archive.retrieve_feature_values(archive_merging_scheme='')
            self.assertEqual(obs, {})

            obs = Archive.retrieve_feature_values(
                archive_merging_scheme='Pick closed-reference OTUs | Split '
                'libraries FASTQ (barcode_type: golay_12)')
            self.assertEqual(obs, exp_all_features)
    def update_category(self, category, samples_and_values):
        """Update an existing column

        Parameters
        ----------
        category : str
            The category to update
        samples_and_values : dict
            A mapping of {sample_id: value}

        Raises
        ------
        QiitaDBUnknownIDError
            If a sample_id is included in values that is not in the template
        QiitaDBColumnError
            If the column does not exist in the table. This is implicit, and
            can be thrown by the contained Samples.
        ValueError
            If one of the new values cannot be inserted in the DB due to
            different types
        """
        with TRN:
            if not set(self.keys()).issuperset(samples_and_values):
                missing = set(self.keys()) - set(samples_and_values)
                table_name = self._table_name(self._id)
                raise QiitaDBUnknownIDError(missing, table_name)

            for k, v in viewitems(samples_and_values):
                sample = self[k]
                sample.setitem(category, v)

            try:
                TRN.execute()
            except ValueError as e:
                # catching error so we can check if the error is due to
                # different column type or something else

                value_types = set(type_lookup(type(value)) for value in viewvalues(samples_and_values))

                sql = """SELECT udt_name
                         FROM information_schema.columns
                         WHERE column_name = %s
                            AND table_schema = 'qiita'
                            AND (table_name = %s OR table_name = %s)"""
                TRN.add(sql, [category, self._table, self._table_name(self._id)])
                column_type = TRN.execute_fetchlast()

                if any([column_type != vt for vt in value_types]):
                    value_str = ", ".join([str(value) for value in viewvalues(samples_and_values)])
                    value_types_str = ", ".join(value_types)

                    raise ValueError(
                        'The new values being added to column: "%s" are "%s" '
                        '(types: "%s"). However, this column in the DB is of '
                        'type "%s". Please change the values in your updated '
                        "template or reprocess your template." % (category, value_str, value_types_str, column_type)
                    )

                raise e
Exemple #6
0
 def _set_allocation(memory):
     with TRN:
         sql = """UPDATE qiita.processing_job_resource_allocation
                  SET allocation = '{0}'
                  WHERE name = 'build_analysis_files'""".format(
             '-q qiita -l mem=%s' % memory)
         TRN.add(sql)
         TRN.execute()
    def _update_accession_numbers(self, column, values):
        """Update accession numbers stored in `column` with the ones in `values`

        Parameters
        ----------
        column : str
            The column name where the accession number are stored
        values : dict of {str: str}
            The accession numbers keyed by sample id

        Raises
        ------
        QiitaDBError
            If a sample in `values` already has an accession number
        QiitaDBWarning
            If `values` is not updating any accesion number
        """
        with TRN:
            sql = """SELECT sample_id, {0}
                     FROM qiita.{1}
                     WHERE {2}=%s
                        AND {0} IS NOT NULL""".format(column, self._table,
                                                      self._id_column)
            TRN.add(sql, [self.id])
            db_vals = {sample_id: accession
                       for sample_id, accession in TRN.execute_fetchindex()}
            common_samples = set(db_vals) & set(values)
            diff = [sample for sample in common_samples
                    if db_vals[sample] != values[sample]]
            if diff:
                raise QiitaDBError(
                    "The following samples already have an accession number: "
                    "%s" % ', '.join(diff))

            # Remove the common samples form the values dictionary
            values = deepcopy(values)
            for sample in common_samples:
                del values[sample]

            if values:
                sql_vals = ', '.join(["(%s, %s)"] * len(values))
                sql = """UPDATE qiita.{0} AS t
                         SET {1}=c.{1}
                         FROM (VALUES {2}) AS c(sample_id, {1})
                         WHERE c.sample_id = t.sample_id
                            AND t.{3} = %s
                         """.format(self._table, column, sql_vals,
                                    self._id_column)
                sql_vals = list(chain.from_iterable(values.items()))
                sql_vals.append(self.id)
                TRN.add(sql, sql_vals)
                TRN.execute()
            else:
                warnings.warn("No new accession numbers to update",
                              QiitaDBWarning)
Exemple #8
0
    def test_context_manager_no_commit(self):
        with TRN:
            sql = """INSERT INTO qiita.test_table (str_column, int_column)
                 VALUES (%s, %s) RETURNING str_column, int_column"""
            args = [["insert1", 1], ["insert2", 2], ["insert3", 3]]
            TRN.add(sql, args, many=True)

            TRN.execute()
            self._assert_sql_equal([])

        self._assert_sql_equal([("insert1", True, 1), ("insert2", True, 2), ("insert3", True, 3)])
        self.assertEqual(TRN._connection.get_transaction_status(), TRANSACTION_STATUS_IDLE)
Exemple #9
0
    def test_context_manager_rollback(self):
        try:
            with TRN:
                sql = """INSERT INTO qiita.test_table (str_column, int_column)
                     VALUES (%s, %s) RETURNING str_column, int_column"""
                args = [["insert1", 1], ["insert2", 2], ["insert3", 3]]
                TRN.add(sql, args, many=True)

                TRN.execute()
                raise ValueError("Force exiting the context manager")
        except ValueError:
            pass
        self._assert_sql_equal([])
        self.assertEqual(TRN._connection.get_transaction_status(), TRANSACTION_STATUS_IDLE)
Exemple #10
0
    def delete(cls, id_):
        r"""Deletes the table from the database

        Parameters
        ----------
        id_ : integer
            The object identifier

        Raises
        ------
        QiitaDBUnknownIDError
            If no sample template with id id_ exists
        QiitaDBError
            If the study that owns this sample template has raw datas
        """
        with TRN:
            cls._check_subclass()

            if not cls.exists(id_):
                raise QiitaDBUnknownIDError(id_, cls.__name__)

            # Check if there is any PrepTemplate
            sql = """SELECT EXISTS(SELECT * FROM qiita.study_prep_template
                                   WHERE study_id=%s)"""
            TRN.add(sql, [id_])
            has_prep_templates = TRN.execute_fetchlast()
            if has_prep_templates:
                raise QiitaDBError("Sample template can not be erased because "
                                   "there are prep templates associated.")

            table_name = cls._table_name(id_)

            # Delete the sample template filepaths
            sql = """DELETE FROM qiita.sample_template_filepath
                     WHERE study_id = %s"""
            args = [id_]
            TRN.add(sql, args)

            TRN.add("DROP TABLE qiita.{0}".format(table_name))

            sql = "DELETE FROM qiita.{0} WHERE {1} = %s".format(
                cls._table, cls._id_column)
            TRN.add(sql, args)

            sql = "DELETE FROM qiita.{0} WHERE {1} = %s".format(
                cls._column_table, cls._id_column)
            TRN.add(sql, args)

            TRN.execute()
    def add_filepath(self, filepath, fp_id=None):
        r"""Populates the DB tables for storing the filepath and connects the
        `self` objects with this filepath"""
        with TRN:
            fp_id = self._fp_id if fp_id is None else fp_id

            try:
                fpp_id = insert_filepaths([(filepath, fp_id)], None, "templates", "filepath", move_files=False)[0]
                sql = """INSERT INTO qiita.{0} ({1}, filepath_id)
                         VALUES (%s, %s)""".format(
                    self._filepath_table, self._id_column
                )
                TRN.add(sql, [self._id, fpp_id])
                TRN.execute()
            except Exception as e:
                LogEntry.create("Runtime", str(e), info={self.__class__.__name__: self.id})
                raise e
Exemple #12
0
 def raw_data(self, raw_data):
     with TRN:
         sql = """SELECT (
                     SELECT raw_data_id
                     FROM qiita.prep_template
                     WHERE prep_template_id=%s)
                 IS NOT NULL"""
         TRN.add(sql, [self.id])
         exists = TRN.execute_fetchlast()
         if exists:
             raise QiitaDBError(
                 "Prep template %d already has a raw data associated"
                 % self.id)
         sql = """UPDATE qiita.prep_template
                  SET raw_data_id = %s
                  WHERE prep_template_id = %s"""
         TRN.add(sql, [raw_data.id, self.id])
         TRN.execute()
Exemple #13
0
    def test_context_manager_checker(self):
        with self.assertRaises(RuntimeError):
            TRN.add("SELECT 42")

        with self.assertRaises(RuntimeError):
            TRN.execute()

        with self.assertRaises(RuntimeError):
            TRN.commit()

        with self.assertRaises(RuntimeError):
            TRN.rollback()

        with TRN:
            TRN.add("SELECT 42")

        with self.assertRaises(RuntimeError):
            TRN.execute()
    def __setitem__(self, column, value):
        r"""Sets the metadata value for the category `column`

        Parameters
        ----------
        column : str
            The column to update
        value : str
            The value to set. This is expected to be a str on the assumption
            that psycopg2 will cast as necessary when updating.

        Raises
        ------
        ValueError
            If the value type does not match the one in the DB
        """
        with TRN:
            self.setitem(column, value)

            try:
                TRN.execute()
            except ValueError as e:
                # catching error so we can check if the error is due to
                # different column type or something else
                value_type = type_lookup(type(value))

                sql = """SELECT udt_name
                         FROM information_schema.columns
                         WHERE column_name = %s
                            AND table_schema = 'qiita'
                            AND (table_name = %s OR table_name = %s)"""
                TRN.add(sql, [column, self._table, self._dynamic_table])
                column_type = TRN.execute_fetchlast()

                if column_type != value_type:
                    raise ValueError(
                        'The new value being added to column: "{0}" is "{1}" '
                        '(type: "{2}"). However, this column in the DB is of '
                        'type "{3}". Please change the value in your updated '
                        'template or reprocess your template.'.format(
                            column, value, value_type, column_type))

                raise e
Exemple #15
0
 def test_execute_return(self):
     with TRN:
         sql = """INSERT INTO qiita.test_table (str_column, int_column)
                  VALUES (%s, %s) RETURNING str_column, int_column"""
         TRN.add(sql, ["test_insert", 2])
         sql = """UPDATE qiita.test_table SET bool_column = %s
                  WHERE str_column = %s RETURNING int_column"""
         TRN.add(sql, [False, "test_insert"])
         obs = TRN.execute()
         self.assertEqual(obs, [[["test_insert", 2]], [[2]]])
Exemple #16
0
    def test_index(self):
        with TRN:
            self.assertEqual(TRN.index, 0)

            TRN.add("SELECT 42")
            self.assertEqual(TRN.index, 1)

            sql = "INSERT INTO qiita.test_table (int_column) VALUES (%s)"
            args = [[1], [2], [3]]
            TRN.add(sql, args, many=True)
            self.assertEqual(TRN.index, 4)

            TRN.execute()
            self.assertEqual(TRN.index, 4)

            TRN.add(sql, args, many=True)
            self.assertEqual(TRN.index, 7)

        self.assertEqual(TRN.index, 0)
Exemple #17
0
    def test_execute_huge_transaction(self):
        with TRN:
            # Add a lot of inserts to the transaction
            sql = "INSERT INTO qiita.test_table (int_column) VALUES (%s)"
            for i in range(1000):
                TRN.add(sql, [i])
            # Add some updates to the transaction
            sql = """UPDATE qiita.test_table SET bool_column = %s
                     WHERE int_column = %s"""
            for i in range(500):
                TRN.add(sql, [False, i])
            # Make the transaction fail with the last insert
            sql = """INSERT INTO qiita.table_to_make (the_trans_to_fail)
                     VALUES (1)"""
            TRN.add(sql)

            with self.assertRaises(ValueError):
                TRN.execute()

            # make sure rollback correctly
            self._assert_sql_equal([])
Exemple #18
0
    def preprocessing_status(self, state):
        r"""Update the preprocessing status

        Parameters
        ----------
        state : str, {'not_preprocessed', 'preprocessing', 'success', 'failed'}
            The current status of preprocessing

        Raises
        ------
        ValueError
            If the state is not known.
        """
        if (state not in ('not_preprocessed', 'preprocessing', 'success') and
                not state.startswith('failed:')):
            raise ValueError('Unknown state: %s' % state)
        with TRN:
            sql = """UPDATE qiita.prep_template SET preprocessing_status = %s
                     WHERE {0} = %s""".format(self._id_column)
            TRN.add(sql, [state, self.id])
            TRN.execute()
Exemple #19
0
    def investigation_type(self, investigation_type):
        r"""Update the investigation type

        Parameters
        ----------
        investigation_type : str
            The investigation type to set, should be part of the ENA ontology

        Raises
        ------
        QiitaDBColumnError
            If the investigation type is not a valid ENA ontology
        """
        with TRN:
            if investigation_type is not None:
                self.validate_investigation_type(investigation_type)

            sql = """UPDATE qiita.prep_template SET investigation_type = %s
                     WHERE {0} = %s""".format(self._id_column)
            TRN.add(sql, [investigation_type, self.id])
            TRN.execute()
Exemple #20
0
def create_rarefaction_job(depth, biom_artifact_id, analysis, srare_cmd_id):
    """Create a new rarefaction job

    Parameters
    ----------
    depth : int
        The rarefaction depth
    biom_artifact_id : int
        The artifact id of the input rarefaction biom table
    analysis : dict
        Dictionary with the analysis information
    srare_cmd_id : int
        The command id of the single rarefaction command

    Returns
    -------
    job_id : str
        The job id
    params : str
        The job parameters
    """
    # Add the row in the procesisng job table
    params = ('{"depth":%d,"subsample_multinomial":false,"biom_table":%s}' %
              (depth, biom_artifact_id))
    with TRN:
        # magic number 3: status -> success
        sql = """INSERT INTO qiita.processing_job
                    (email, command_id, command_parameters,
                     processing_job_status_id)
                 VALUES (%s, %s, %s, %s)
                 RETURNING processing_job_id"""
        TRN.add(sql, [analysis['email'], srare_cmd_id, params, 3])
        job_id = TRN.execute_fetchlast()
        # Step 1.2.b: Link the job with the input artifact
        sql = """INSERT INTO qiita.artifact_processing_job
                    (artifact_id, processing_job_id)
                 VALUES (%s, %s)"""
        TRN.add(sql, [biom_artifact_id, job_id])
        TRN.execute()
    return job_id, params
Exemple #21
0
def create_rarefaction_job(depth, biom_artifact_id, analysis, srare_cmd_id):
    """Create a new rarefaction job

    Parameters
    ----------
    depth : int
        The rarefaction depth
    biom_artifact_id : int
        The artifact id of the input rarefaction biom table
    analysis : dict
        Dictionary with the analysis information
    srare_cmd_id : int
        The command id of the single rarefaction command

    Returns
    -------
    job_id : str
        The job id
    params : str
        The job parameters
    """
    # Add the row in the procesisng job table
    params = ('{"depth":%d,"subsample_multinomial":false,"biom_table":%s}'
              % (depth, biom_artifact_id))
    with TRN:
        # magic number 3: status -> success
        sql = """INSERT INTO qiita.processing_job
                    (email, command_id, command_parameters,
                     processing_job_status_id)
                 VALUES (%s, %s, %s, %s)
                 RETURNING processing_job_id"""
        TRN.add(sql, [analysis['email'], srare_cmd_id, params, 3])
        job_id = TRN.execute_fetchlast()
        # Step 1.2.b: Link the job with the input artifact
        sql = """INSERT INTO qiita.artifact_processing_job
                    (artifact_id, processing_job_id)
                 VALUES (%s, %s)"""
        TRN.add(sql, [biom_artifact_id, job_id])
        TRN.execute()
    return job_id, params
Exemple #22
0
    def test_execute_commit_false_wipe_queries(self):
        with TRN:
            sql = """INSERT INTO qiita.test_table (str_column, int_column)
                     VALUES (%s, %s) RETURNING str_column, int_column"""
            args = [["insert1", 1], ["insert2", 2], ["insert3", 3]]
            TRN.add(sql, args, many=True)

            obs = TRN.execute()
            exp = [[["insert1", 1]], [["insert2", 2]], [["insert3", 3]]]
            self.assertEqual(obs, exp)

            self._assert_sql_equal([])

            sql = """UPDATE qiita.test_table SET bool_column = %s
                     WHERE str_column = %s"""
            args = [False, "insert2"]
            TRN.add(sql, args)
            self.assertEqual(TRN._queries, [(sql, args)])

            TRN.execute()
            self._assert_sql_equal([])

        self._assert_sql_equal([("insert1", True, 1), ("insert3", True, 3), ("insert2", False, 2)])
Exemple #23
0
    def test_execute(self):
        with TRN:
            sql = """INSERT INTO qiita.test_table (str_column, int_column)
                     VALUES (%s, %s)"""
            TRN.add(sql, ["test_insert", 2])
            sql = """UPDATE qiita.test_table
                     SET int_column = %s, bool_column = %s
                     WHERE str_column = %s"""
            TRN.add(sql, [20, False, "test_insert"])
            obs = TRN.execute()
            self.assertEqual(obs, [None, None])
            self._assert_sql_equal([])

        self._assert_sql_equal([("test_insert", False, 20)])
Exemple #24
0
    def test_execute_commit_false_rollback(self):
        with TRN:
            sql = """INSERT INTO qiita.test_table (str_column, int_column)
                     VALUES (%s, %s) RETURNING str_column, int_column"""
            args = [["insert1", 1], ["insert2", 2], ["insert3", 3]]
            TRN.add(sql, args, many=True)

            obs = TRN.execute()
            exp = [[["insert1", 1]], [["insert2", 2]], [["insert3", 3]]]
            self.assertEqual(obs, exp)

            self._assert_sql_equal([])

            TRN.rollback()

            self._assert_sql_equal([])
Exemple #25
0
    def test_execute_many(self):
        with TRN:
            sql = """INSERT INTO qiita.test_table (str_column, int_column)
                     VALUES (%s, %s)"""
            args = [["insert1", 1], ["insert2", 2], ["insert3", 3]]
            TRN.add(sql, args, many=True)
            sql = """UPDATE qiita.test_table
                     SET int_column = %s, bool_column = %s
                     WHERE str_column = %s"""
            TRN.add(sql, [20, False, "insert2"])
            obs = TRN.execute()
            self.assertEqual(obs, [None, None, None, None])

            self._assert_sql_equal([])

        self._assert_sql_equal([("insert1", True, 1), ("insert3", True, 3), ("insert2", False, 20)])
Exemple #26
0
 def test_execute_return_many(self):
     with TRN:
         sql = """INSERT INTO qiita.test_table (str_column, int_column)
                  VALUES (%s, %s) RETURNING str_column, int_column"""
         args = [["insert1", 1], ["insert2", 2], ["insert3", 3]]
         TRN.add(sql, args, many=True)
         sql = """UPDATE qiita.test_table SET bool_column = %s
                  WHERE str_column = %s"""
         TRN.add(sql, [False, "insert2"])
         sql = "SELECT * FROM qiita.test_table"
         TRN.add(sql)
         obs = TRN.execute()
         exp = [
             [["insert1", 1]],  # First query of the many query
             [["insert2", 2]],  # Second query of the many query
             [["insert3", 3]],  # Third query of the many query
             None,  # Update query
             [
                 ["insert1", True, 1],  # First result select
                 ["insert3", True, 3],  # Second result select
                 ["insert2", False, 2],
             ],
         ]  # Third result select
         self.assertEqual(obs, exp)
Exemple #27
0
# The full license is in the file LICENSE, distributed with this software.
# -----------------------------------------------------------------------------

from qiita_db.sql_connection import TRN


# Due to the size of these changes we will
with TRN:
    # select all table and column names from all sample template
    sql = """SELECT DISTINCT table_name FROM information_schema.columns
                WHERE (table_name LIKE 'sample_%'
                       OR table_name LIKE 'prep_%')
                    AND table_name NOT LIKE '%template%'"""
    TRN.add(sql)

    all_tables = TRN.execute_fetchflatten()

for table in all_tables:
    with TRN:
        sql = """SELECT column_name FROM information_schema.columns
                    WHERE table_name = %s
                    ORDER BY column_name"""
        TRN.add(sql, [table])

        for column in TRN.execute_fetchflatten():
            sql = "ALTER TABLE qiita.%s ALTER COLUMN %s TYPE VARCHAR" % (
                table, column)
            TRN.add(sql)

        TRN.execute()
Exemple #28
0
# -----------------------------------------------------------------------------
# Copyright (c) 2014--, The Qiita Development Team.
#
# Distributed under the terms of the BSD 3-clause License.
#
# The full license is in the file LICENSE, distributed with this software.
# -----------------------------------------------------------------------------

from random import SystemRandom
from string import ascii_letters, digits

from qiita_db.sql_connection import TRN

pool = ascii_letters + digits
client_id = ''.join([SystemRandom().choice(pool) for _ in range(50)])
client_secret = ''.join([SystemRandom().choice(pool) for _ in range(255)])

with TRN:
    sql = """INSERT INTO qiita.oauth_identifiers (client_id, client_secret)
             VALUES (%s, %s)"""
    TRN.add(sql, [client_id, client_secret])

    sql = """INSERT INTO qiita.oauth_software (software_id, client_id)
             VALUES (%s, %s)"""
    TRN.add(sql, [1, client_id])
    TRN.execute()
Exemple #29
0
def create_non_rarefied_biom_artifact(analysis, biom_data, rarefied_table):
    """Creates the initial non-rarefied BIOM artifact of the analysis

    Parameters
    ----------
    analysis : dict
        Dictionary with the analysis information
    biom_data : dict
        Dictionary with the biom file information
    rarefied_table : biom.Table
        The rarefied BIOM table

    Returns
    -------
    int
        The id of the new artifact
    """
    # The non rarefied biom artifact is the initial biom table of the analysis.
    # This table does not currently exist anywhere, so we need to actually
    # create the BIOM file. To create this BIOM file we need: (1) the samples
    # and artifacts they come from and (2) whether the samples where
    # renamed or not. (1) is on the database, but we need to inferr (2) from
    # the existing rarefied BIOM table. Fun, fun...

    with TRN:
        # Get the samples included in the BIOM table grouped by artifact id
        # Note that the analysis contains a BIOM table per data type included
        # in it, and the table analysis_sample does not differentiate between
        # datatypes, so we need to check the data type in the artifact table
        sql = """SELECT artifact_id, array_agg(sample_id)
                 FROM qiita.analysis_sample
                    JOIN qiita.artifact USING (artifact_id)
                 WHERE analysis_id = %s AND data_type_id = %s
                 GROUP BY artifact_id"""
        TRN.add(sql, [analysis['analysis_id'], biom_data['data_type_id']])
        samples_by_artifact = TRN.execute_fetchindex()

        # Create an empty BIOM table to be the new master table
        new_table = Table([], [], [])
        ids_map = {}
        for a_id, samples in samples_by_artifact:
            # Get the filepath of the BIOM table from the artifact
            artifact = Artifact(a_id)
            biom_fp = None
            for _, fp, fp_type in artifact.filepaths:
                if fp_type == 'biom':
                    biom_fp = fp
            # Note that we are sure that the biom table exists for sure, so
            # no need to check if biom_fp is undefined
            biom_table = load_table(biom_fp)
            samples = set(samples).intersection(biom_table.ids())
            biom_table.filter(samples, axis='sample', inplace=True)
            # we need to check if the table has samples left before merging
            if biom_table.shape[0] != 0 and biom_table.shape[1] != 0:
                new_table = new_table.merge(biom_table)
                ids_map.update({sid: "%d.%s" % (a_id, sid)
                                for sid in biom_table.ids()})

        # Check if we need to rename the sample ids in the biom table
        new_table_ids = set(new_table.ids())
        if not new_table_ids.issuperset(rarefied_table.ids()):
            # We need to rename the sample ids
            new_table.update_ids(ids_map, 'sample', True, True)

        sql = """INSERT INTO qiita.artifact
                    (generated_timestamp, data_type_id, visibility_id,
                     artifact_type_id, submitted_to_vamps)
            VALUES (%s, %s, %s, %s, %s)
            RETURNING artifact_id"""
        # Magic number 4 -> visibility sandbox
        # Magix number 7 -> biom artifact type
        TRN.add(sql, [analysis['timestamp'], biom_data['data_type_id'],
                      4, 7, False])
        artifact_id = TRN.execute_fetchlast()

        # Associate the artifact with the analysis
        sql = """INSERT INTO qiita.analysis_artifact
                    (analysis_id, artifact_id)
                 VALUES (%s, %s)"""
        TRN.add(sql, [analysis['analysis_id'], artifact_id])
        # Link the artifact with its file
        dd_id, mp = get_mountpoint('BIOM')[0]
        dir_fp = join(get_db_files_base_dir(), mp, str(artifact_id))
        if not exists(dir_fp):
            makedirs(dir_fp)
        new_table_fp = join(dir_fp, "biom_table.biom")
        with biom_open(new_table_fp, 'w') as f:
            new_table.to_hdf5(f, "Generated by Qiita")

        sql = """INSERT INTO qiita.filepath
                    (filepath, filepath_type_id, checksum,
                     checksum_algorithm_id, data_directory_id)
                 VALUES (%s, %s, %s, %s, %s)
                 RETURNING filepath_id"""
        # Magic number 7 -> filepath_type_id = 'biom'
        # Magic number 1 -> the checksum algorithm id
        TRN.add(sql, [basename(new_table_fp), 7,
                      compute_checksum(new_table_fp), 1, dd_id])
        fp_id = TRN.execute_fetchlast()
        sql = """INSERT INTO qiita.artifact_filepath
                    (artifact_id, filepath_id)
                 VALUES (%s, %s)"""
        TRN.add(sql, [artifact_id, fp_id])
        TRN.execute()

    return artifact_id
Exemple #30
0
def create_command(software, name, description, parameters, outputs=None,
                   analysis_only=False):
    r"""Replicates the Command.create code at the time the patch was written"""
    # Perform some sanity checks in the parameters dictionary
    if not parameters:
        raise QiitaDBError(
            "Error creating command %s. At least one parameter should "
            "be provided." % name)
    sql_param_values = []
    sql_artifact_params = []
    for pname, vals in parameters.items():
        if len(vals) != 2:
            raise QiitaDBError(
                "Malformed parameters dictionary, the format should be "
                "{param_name: [parameter_type, default]}. Found: "
                "%s for parameter name %s" % (vals, pname))

        ptype, dflt = vals
        # Check that the type is one of the supported types
        supported_types = ['string', 'integer', 'float', 'reference',
                           'boolean', 'prep_template', 'analysis']
        if ptype not in supported_types and not ptype.startswith(
                ('choice', 'mchoice', 'artifact')):
            supported_types.extend(['choice', 'mchoice', 'artifact'])
            raise QiitaDBError(
                "Unsupported parameters type '%s' for parameter %s. "
                "Supported types are: %s"
                % (ptype, pname, ', '.join(supported_types)))

        if ptype.startswith(('choice', 'mchoice')) and dflt is not None:
            choices = set(loads(ptype.split(':')[1]))
            dflt_val = dflt
            if ptype.startswith('choice'):
                # In the choice case, the dflt value is a single string,
                # create a list with it the string on it to use the
                # issuperset call below
                dflt_val = [dflt_val]
            else:
                # jsonize the list to store it in the DB
                dflt = dumps(dflt)
            if not choices.issuperset(dflt_val):
                raise QiitaDBError(
                    "The default value '%s' for the parameter %s is not "
                    "listed in the available choices: %s"
                    % (dflt, pname, ', '.join(choices)))

        if ptype.startswith('artifact'):
            atypes = loads(ptype.split(':')[1])
            sql_artifact_params.append(
                [pname, 'artifact', atypes])
        else:
            if dflt is not None:
                sql_param_values.append([pname, ptype, False, dflt])
            else:
                sql_param_values.append([pname, ptype, True, None])

    with TRN:
        sql = """SELECT EXISTS(SELECT *
                               FROM qiita.software_command
                               WHERE software_id = %s AND name = %s)"""
        TRN.add(sql, [software.id, name])
        if TRN.execute_fetchlast():
            raise QiitaDBDuplicateError(
                "command", "software: %d, name: %s"
                           % (software.id, name))
        # Add the command to the DB
        sql = """INSERT INTO qiita.software_command
                        (name, software_id, description, is_analysis)
                 VALUES (%s, %s, %s, %s)
                 RETURNING command_id"""
        sql_params = [name, software.id, description, analysis_only]
        TRN.add(sql, sql_params)
        c_id = TRN.execute_fetchlast()

        # Add the parameters to the DB
        sql = """INSERT INTO qiita.command_parameter
                    (command_id, parameter_name, parameter_type, required,
                     default_value)
                 VALUES (%s, %s, %s, %s, %s)
                 RETURNING command_parameter_id"""
        sql_params = [[c_id, pname, p_type, reqd, default]
                      for pname, p_type, reqd, default in sql_param_values]
        TRN.add(sql, sql_params, many=True)
        TRN.execute()

        # Add the artifact parameters
        sql_type = """INSERT INTO qiita.parameter_artifact_type
                        (command_parameter_id, artifact_type_id)
                      VALUES (%s, %s)"""
        supported_types = []
        for pname, p_type, atypes in sql_artifact_params:
            sql_params = [c_id, pname, p_type, True, None]
            TRN.add(sql, sql_params)
            pid = TRN.execute_fetchlast()
            sql_params = [[pid, convert_to_id(at, 'artifact_type')]
                          for at in atypes]
            TRN.add(sql_type, sql_params, many=True)
            supported_types.extend([atid for _, atid in sql_params])

        # If the software type is 'artifact definition', there are a couple
        # of extra steps
        if software.type == 'artifact definition':
            # If supported types is not empty, link the software with these
            # types
            if supported_types:
                sql = """INSERT INTO qiita.software_artifact_type
                                (software_id, artifact_type_id)
                            VALUES (%s, %s)"""
                sql_params = [[software.id, atid]
                              for atid in supported_types]
                TRN.add(sql, sql_params, many=True)
            # If this is the validate command, we need to add the
            # provenance and name parameters. These are used internally,
            # that's why we are adding them here
            if name == 'Validate':
                sql = """INSERT INTO qiita.command_parameter
                            (command_id, parameter_name, parameter_type,
                             required, default_value)
                         VALUES (%s, 'name', 'string', 'False',
                                 'dflt_name'),
                                (%s, 'provenance', 'string', 'False', NULL)
                         """
                TRN.add(sql, [c_id, c_id])

        # Add the outputs to the command
        if outputs:
            sql = """INSERT INTO qiita.command_output
                        (name, command_id, artifact_type_id)
                     VALUES (%s, %s, %s)"""
            sql_args = [[pname, c_id, convert_to_id(at, 'artifact_type')]
                        for pname, at in outputs.items()]
            TRN.add(sql, sql_args, many=True)
            TRN.execute()

    return Command(c_id)
Exemple #31
0
def create_non_rarefied_biom_artifact(analysis, biom_data, rarefied_table):
    """Creates the initial non-rarefied BIOM artifact of the analysis

    Parameters
    ----------
    analysis : dict
        Dictionary with the analysis information
    biom_data : dict
        Dictionary with the biom file information
    rarefied_table : biom.Table
        The rarefied BIOM table

    Returns
    -------
    int
        The id of the new artifact
    """
    # The non rarefied biom artifact is the initial biom table of the analysis.
    # This table does not currently exist anywhere, so we need to actually
    # create the BIOM file. To create this BIOM file we need: (1) the samples
    # and artifacts they come from and (2) whether the samples where
    # renamed or not. (1) is on the database, but we need to inferr (2) from
    # the existing rarefied BIOM table. Fun, fun...

    with TRN:
        # Get the samples included in the BIOM table grouped by artifact id
        # Note that the analysis contains a BIOM table per data type included
        # in it, and the table analysis_sample does not differentiate between
        # datatypes, so we need to check the data type in the artifact table
        sql = """SELECT artifact_id, array_agg(sample_id)
                 FROM qiita.analysis_sample
                    JOIN qiita.artifact USING (artifact_id)
                 WHERE analysis_id = %s AND data_type_id = %s
                 GROUP BY artifact_id"""
        TRN.add(sql, [analysis['analysis_id'], biom_data['data_type_id']])
        samples_by_artifact = TRN.execute_fetchindex()

        # Create an empty BIOM table to be the new master table
        new_table = Table([], [], [])
        ids_map = {}
        for a_id, samples in samples_by_artifact:
            # Get the filepath of the BIOM table from the artifact
            artifact = Artifact(a_id)
            biom_fp = None
            for _, fp, fp_type in artifact.filepaths:
                if fp_type == 'biom':
                    biom_fp = fp
            # Note that we are sure that the biom table exists for sure, so
            # no need to check if biom_fp is undefined
            biom_table = load_table(biom_fp)
            samples = set(samples).intersection(biom_table.ids())
            biom_table.filter(samples, axis='sample', inplace=True)
            # we need to check if the table has samples left before merging
            if biom_table.shape[0] != 0 and biom_table.shape[1] != 0:
                new_table = new_table.merge(biom_table)
                ids_map.update(
                    {sid: "%d.%s" % (a_id, sid)
                     for sid in biom_table.ids()})

        # Check if we need to rename the sample ids in the biom table
        new_table_ids = set(new_table.ids())
        if not new_table_ids.issuperset(rarefied_table.ids()):
            # We need to rename the sample ids
            new_table.update_ids(ids_map, 'sample', True, True)

        sql = """INSERT INTO qiita.artifact
                    (generated_timestamp, data_type_id, visibility_id,
                     artifact_type_id, submitted_to_vamps)
            VALUES (%s, %s, %s, %s, %s)
            RETURNING artifact_id"""
        # Magic number 4 -> visibility sandbox
        # Magix number 7 -> biom artifact type
        TRN.add(
            sql,
            [analysis['timestamp'], biom_data['data_type_id'], 4, 7, False])
        artifact_id = TRN.execute_fetchlast()

        # Associate the artifact with the analysis
        sql = """INSERT INTO qiita.analysis_artifact
                    (analysis_id, artifact_id)
                 VALUES (%s, %s)"""
        TRN.add(sql, [analysis['analysis_id'], artifact_id])
        # Link the artifact with its file
        dd_id, mp = get_mountpoint('BIOM')[0]
        dir_fp = join(get_db_files_base_dir(), mp, str(artifact_id))
        if not exists(dir_fp):
            makedirs(dir_fp)
        new_table_fp = join(dir_fp, "biom_table.biom")
        with biom_open(new_table_fp, 'w') as f:
            new_table.to_hdf5(f, "Generated by Qiita")

        sql = """INSERT INTO qiita.filepath
                    (filepath, filepath_type_id, checksum,
                     checksum_algorithm_id, data_directory_id)
                 VALUES (%s, %s, %s, %s, %s)
                 RETURNING filepath_id"""
        # Magic number 7 -> filepath_type_id = 'biom'
        # Magic number 1 -> the checksum algorithm id
        TRN.add(sql, [
            basename(new_table_fp), 7,
            compute_checksum(new_table_fp), 1, dd_id
        ])
        fp_id = TRN.execute_fetchlast()
        sql = """INSERT INTO qiita.artifact_filepath
                    (artifact_id, filepath_id)
                 VALUES (%s, %s)"""
        TRN.add(sql, [artifact_id, fp_id])
        TRN.execute()

    return artifact_id
Exemple #32
0
    def test_retrive_workflows(self):
        # we should see all 3 workflows
        DefaultWorkflow(2).active = False
        exp = deepcopy(WORKFLOWS)
        self.assertCountEqual(_retrive_workflows(False), exp)

        # validating that the params_name is not being used
        self.assertNotIn(
            'Split libraries | Defaults with Golay 12 barcodes',
            [x[2] for x in _retrive_workflows(False)[1]['nodes']])
        # now it should be there
        with TRN:
            # Hard-coded values; 19 -> barcode_type
            sql = """UPDATE qiita.command_parameter
                     SET name_order = 0
                     WHERE command_parameter_id = 19"""
            TRN.add(sql)
            TRN.execute()
        self.assertIn(
            'Split libraries | Defaults with Golay 12 barcodes',
            [x[2] for x in _retrive_workflows(False)[1]['nodes']])
        # and gone again
        with TRN:
            sql = """UPDATE qiita.command_parameter
                     SET name_order = NULL
                     WHERE command_parameter_id = 19"""
            TRN.add(sql)
            TRN.execute()
        self.assertNotIn(
            'Split libraries | Defaults with Golay 12 barcodes',
            [x[2] for x in _retrive_workflows(False)[1]['nodes']])

        # we should not see the middle one
        del exp[1]
        self.assertCountEqual(_retrive_workflows(True), exp)

        # let's create a couple of more complex scenarios so we touch all code
        # by adding multiple paths, that should connect and get separate
        # -- adds a new path that should be kept separate all the way; this is
        #    to emulate what happens with different trimming (different
        #    default parameter) and deblur (same for each of the previous
        #    steps)
        sql = """
            INSERT INTO qiita.default_workflow_node (
                default_workflow_id, default_parameter_set_id)
            VALUES (1, 2), (1, 10);
            INSERT INTO qiita.default_workflow_edge (
                parent_id, child_id)
            VALUES (7, 8);
            INSERT INTO qiita.default_workflow_edge_connections (
                default_workflow_edge_id, parent_output_id, child_input_id)
            VALUES (4, 1, 3)"""
        perform_as_transaction(sql)
        # -- adds a new path that should be kept together and then separate;
        #    this is to simulate what happens with MTX/WGS processing, one
        #    single QC step (together) and 2 separete profilers
        sql = """
            INSERT INTO qiita.default_parameter_set (
                command_id, parameter_set_name, parameter_set)
            VALUES (3, '100%',
                    ('{"reference":1,"sortmerna_e_value":1,'
                     || '"sortmerna_max_pos":'
                     || '10000,"similarity":1.0,"sortmerna_coverage":1.00,'
                     || '"threads":1}')::json);
            INSERT INTO qiita.default_workflow_node (
                default_workflow_id, default_parameter_set_id)
            VALUES (2, 17);
            INSERT INTO qiita.default_workflow_edge (
                parent_id, child_id)
            VALUES (3, 9);
            INSERT INTO qiita.default_workflow_edge_connections (
                default_workflow_edge_id, parent_output_id, child_input_id)
            VALUES (5, 1, 3)"""
        perform_as_transaction(sql)

        # adding new expected values
        exp = deepcopy(WORKFLOWS)
        obs = _retrive_workflows(False)
        exp[0]['nodes'].extend([
            ['params_7', 1, 'Split libraries FASTQ', 'Defaults with reverse '
             'complement mapping file barcodes', {
                'max_bad_run_length': '3',
                'min_per_read_length_fraction': '0.75',
                'sequence_max_n': '0', 'rev_comp_barcode': 'False',
                'rev_comp_mapping_barcodes': 'True', 'rev_comp': 'False',
                'phred_quality_threshold': '3', 'barcode_type': 'golay_12',
                'max_barcode_errors': '1.5', 'phred_offset': 'auto'}],
            ['output_params_7_demultiplexed | Demultiplexed', 1,
             'demultiplexed | Demultiplexed'],
            ['params_8', 3, 'Pick closed-reference OTUs', 'Defaults', {
                'reference': '1', 'sortmerna_e_value': '1',
                'sortmerna_max_pos': '10000', 'similarity': '0.97',
                'sortmerna_coverage': '0.97', 'threads': '1'}],
            ['output_params_8_OTU table | BIOM', 3, 'OTU table | BIOM']])
        exp[0]['edges'].extend([
            ['input_params_1_FASTQ | per_sample_FASTQ', 'params_7'],
            ['params_7', 'output_params_7_demultiplexed | Demultiplexed'],
            ['output_params_7_demultiplexed | Demultiplexed', 'params_8'],
            ['params_8', 'output_params_8_OTU table | BIOM']])
        exp[1]['nodes'].extend([
            ['params_9', 3, 'Pick closed-reference OTUs', '100%', {
                'reference': '1', 'sortmerna_e_value': '1',
                'sortmerna_max_pos': '10000', 'similarity': '1.0',
                'sortmerna_coverage': '1.0', 'threads': '1'}],
            ['output_params_9_OTU table | BIOM', 3, 'OTU table | BIOM']])
        exp[1]['edges'].extend([
            ['output_params_3_demultiplexed | Demultiplexed', 'params_9'],
            ['params_9', 'output_params_9_OTU table | BIOM']
        ])
        self.assertCountEqual(obs, exp)
Exemple #33
0
def create_command(software, name, description, parameters, outputs=None,
                   analysis_only=False):
    r"""Replicates the Command.create code at the time the patch was written"""
    # Perform some sanity checks in the parameters dictionary
    if not parameters:
        raise QiitaDBError(
            "Error creating command %s. At least one parameter should "
            "be provided." % name)
    sql_param_values = []
    sql_artifact_params = []
    for pname, vals in parameters.items():
        if len(vals) != 2:
            raise QiitaDBError(
                "Malformed parameters dictionary, the format should be "
                "{param_name: [parameter_type, default]}. Found: "
                "%s for parameter name %s" % (vals, pname))

        ptype, dflt = vals
        # Check that the type is one of the supported types
        supported_types = ['string', 'integer', 'float', 'reference',
                           'boolean', 'prep_template', 'analysis']
        if ptype not in supported_types and not ptype.startswith(
                ('choice', 'mchoice', 'artifact')):
            supported_types.extend(['choice', 'mchoice', 'artifact'])
            raise QiitaDBError(
                "Unsupported parameters type '%s' for parameter %s. "
                "Supported types are: %s"
                % (ptype, pname, ', '.join(supported_types)))

        if ptype.startswith(('choice', 'mchoice')) and dflt is not None:
            choices = set(loads(ptype.split(':')[1]))
            dflt_val = dflt
            if ptype.startswith('choice'):
                # In the choice case, the dflt value is a single string,
                # create a list with it the string on it to use the
                # issuperset call below
                dflt_val = [dflt_val]
            else:
                # jsonize the list to store it in the DB
                dflt = dumps(dflt)
            if not choices.issuperset(dflt_val):
                raise QiitaDBError(
                    "The default value '%s' for the parameter %s is not "
                    "listed in the available choices: %s"
                    % (dflt, pname, ', '.join(choices)))

        if ptype.startswith('artifact'):
            atypes = loads(ptype.split(':')[1])
            sql_artifact_params.append(
                [pname, 'artifact', atypes])
        else:
            if dflt is not None:
                sql_param_values.append([pname, ptype, False, dflt])
            else:
                sql_param_values.append([pname, ptype, True, None])

    with TRN:
        sql = """SELECT EXISTS(SELECT *
                               FROM qiita.software_command
                               WHERE software_id = %s AND name = %s)"""
        TRN.add(sql, [software.id, name])
        if TRN.execute_fetchlast():
            raise QiitaDBDuplicateError(
                "command", "software: %d, name: %s"
                           % (software.id, name))
        # Add the command to the DB
        sql = """INSERT INTO qiita.software_command
                        (name, software_id, description, is_analysis)
                 VALUES (%s, %s, %s, %s)
                 RETURNING command_id"""
        sql_params = [name, software.id, description, analysis_only]
        TRN.add(sql, sql_params)
        c_id = TRN.execute_fetchlast()

        # Add the parameters to the DB
        sql = """INSERT INTO qiita.command_parameter
                    (command_id, parameter_name, parameter_type, required,
                     default_value)
                 VALUES (%s, %s, %s, %s, %s)
                 RETURNING command_parameter_id"""
        sql_params = [[c_id, pname, p_type, reqd, default]
                      for pname, p_type, reqd, default in sql_param_values]
        TRN.add(sql, sql_params, many=True)
        TRN.execute()

        # Add the artifact parameters
        sql_type = """INSERT INTO qiita.parameter_artifact_type
                        (command_parameter_id, artifact_type_id)
                      VALUES (%s, %s)"""
        supported_types = []
        for pname, p_type, atypes in sql_artifact_params:
            sql_params = [c_id, pname, p_type, True, None]
            TRN.add(sql, sql_params)
            pid = TRN.execute_fetchlast()
            sql_params = [[pid, convert_to_id(at, 'artifact_type')]
                          for at in atypes]
            TRN.add(sql_type, sql_params, many=True)
            supported_types.extend([atid for _, atid in sql_params])

        # If the software type is 'artifact definition', there are a couple
        # of extra steps
        if software.type == 'artifact definition':
            # If supported types is not empty, link the software with these
            # types
            if supported_types:
                sql = """INSERT INTO qiita.software_artifact_type
                                (software_id, artifact_type_id)
                            VALUES (%s, %s)"""
                sql_params = [[software.id, atid]
                              for atid in supported_types]
                TRN.add(sql, sql_params, many=True)
            # If this is the validate command, we need to add the
            # provenance and name parameters. These are used internally,
            # that's why we are adding them here
            if name == 'Validate':
                sql = """INSERT INTO qiita.command_parameter
                            (command_id, parameter_name, parameter_type,
                             required, default_value)
                         VALUES (%s, 'name', 'string', 'False',
                                 'dflt_name'),
                                (%s, 'provenance', 'string', 'False', NULL)
                         """
                TRN.add(sql, [c_id, c_id])

        # Add the outputs to the command
        if outputs:
            sql = """INSERT INTO qiita.command_output
                        (name, command_id, artifact_type_id)
                     VALUES (%s, %s, %s)"""
            sql_args = [[pname, c_id, convert_to_id(at, 'artifact_type')]
                        for pname, at in outputs.items()]
            TRN.add(sql, sql_args, many=True)
            TRN.execute()

    return Command(c_id)
    def update(self, md_template):
        r"""Update values in the template

        Parameters
        ----------
        md_template : DataFrame
            The metadata template file contents indexed by samples ids

        Raises
        ------
        QiitaDBError
            If md_template and db do not have the same sample ids
            If md_template and db do not have the same column headers
            If self.can_be_updated is not True
        QiitaDBWarning
            If there are no differences between the contents of the DB and the
            passed md_template
        """
        with TRN:
            # Clean and validate the metadata template given
            new_map = self._clean_validate_template(
                md_template, self.study_id, self.columns_restrictions,
                current_columns=self.categories())
            # Retrieving current metadata
            current_map = self.to_dataframe()

            # simple validations of sample ids and column names
            samples_diff = set(new_map.index).difference(current_map.index)
            if samples_diff:
                raise QiitaDBError(
                    'The new template differs from what is stored '
                    'in database by these samples names: %s'
                    % ', '.join(samples_diff))

            if not set(current_map.columns).issuperset(new_map.columns):
                columns_diff = set(new_map.columns).difference(
                    current_map.columns)
                raise QiitaDBError(
                    'Some of the columns in your template are not present in '
                    'the system. Use "extend" if you want to add more columns '
                    'to the template. Missing columns: %s'
                    % ', '.join(columns_diff))

            # In order to speed up some computation, let's compare only the
            # common columns and rows. current_map.columns and
            # current_map.index are supersets of new_map.columns and
            # new_map.index, respectivelly, so this will not fail
            current_map = current_map[new_map.columns].loc[new_map.index]

            # Get the values that we need to change
            # diff_map is a DataFrame that hold boolean values. If a cell is
            # True, means that the new_map is different from the current_map
            # while False means that the cell has the same value
            diff_map = current_map != new_map
            # ne_stacked holds a MultiIndexed DataFrame in which the first
            # level of indexing is the sample_name and the second one is the
            # columns. We only have 1 column, which holds if that
            # (sample, column) pair has been modified or not (i.e. cell)
            ne_stacked = diff_map.stack()
            # by using ne_stacked to index himself, we get only the columns
            # that did change (see boolean indexing in pandas docs)
            changed = ne_stacked[ne_stacked]
            if changed.empty:
                warnings.warn(
                    "There are no differences between the data stored in the "
                    "DB and the new data provided",
                    QiitaDBWarning)
                return

            changed.index.names = ['sample_name', 'column']
            # the combination of np.where and boolean indexing produces
            # a numpy array with only the values that actually changed
            # between the current_map and new_map
            changed_to = new_map.values[np.where(diff_map)]

            # to_update is a MultiIndexed DataFrame, in which the index 0 is
            # the samples and the index 1 is the columns, we define these
            # variables here so we don't put magic numbers across the code
            sample_idx = 0
            col_idx = 1
            to_update = pd.DataFrame({'to': changed_to}, index=changed.index)

            # Get the columns that we need to change
            indices = list(set(to_update.index.labels[col_idx]))
            cols_to_update = to_update.index.levels[col_idx][indices]

            if not self.can_be_updated(columns=set(cols_to_update)):
                raise QiitaDBError(
                    'The new template is modifying fields that cannot be '
                    'modified. Try removing the restricted fields or '
                    'deleting the processed data. You are trying to modify: %s'
                    % ', '.join(cols_to_update))

            # Get the samples that we need to change
            indices = list(set(to_update.index.labels[sample_idx]))
            samples_to_update = to_update.index.levels[sample_idx][indices]

            sql_eq_cols = ', '.join(
                ["{0} = c.{0}".format(col) for col in cols_to_update])
            # We add 1 because we need to add the sample name
            single_value = "(%s)" % ', '.join(
                ["%s"] * (len(cols_to_update) + 1))
            sql_values = ', '.join([single_value] * len(samples_to_update))
            sql_cols = ', '.join(cols_to_update)

            sql = """UPDATE qiita.{0} AS t SET
                        {1}
                     FROM (VALUES {2})
                        AS c(sample_id, {3})
                     WHERE c.sample_id = t.sample_id
                    """.format(self._table_name(self._id), sql_eq_cols,
                               sql_values, sql_cols)
            sql_args = []
            for sample in samples_to_update:
                sample_vals = [cast_to_python(new_map[col][sample])
                               for col in cols_to_update]
                sample_vals.insert(0, sample)
                sql_args.extend(sample_vals)

            TRN.add(sql, sql_args)
            TRN.execute()

            self.generate_files()
Exemple #35
0
    def create(cls, md_template, study, data_type, investigation_type=None):
        r"""Creates the metadata template in the database

        Parameters
        ----------
        md_template : DataFrame
            The metadata template file contents indexed by samples Ids
        study : Study
            The study to which the prep template belongs to.
        data_type : str or int
            The data_type of the prep template
        investigation_type : str, optional
            The investigation type, if relevant

        Returns
        -------
        A new instance of `cls` to access to the PrepTemplate stored in the DB

        Raises
        ------
        QiitaDBColumnError
            If the investigation_type is not valid
            If a required column is missing in md_template
        """
        with TRN:
            # If the investigation_type is supplied, make sure it is one of
            # the recognized investigation types
            if investigation_type is not None:
                cls.validate_investigation_type(investigation_type)

            # Check if the data_type is the id or the string
            if isinstance(data_type, (int, long)):
                data_type_id = data_type
                data_type_str = convert_from_id(data_type, "data_type")
            else:
                data_type_id = convert_to_id(data_type, "data_type")
                data_type_str = data_type

            pt_cols = PREP_TEMPLATE_COLUMNS
            if data_type_str in TARGET_GENE_DATA_TYPES:
                pt_cols = deepcopy(PREP_TEMPLATE_COLUMNS)
                pt_cols.update(PREP_TEMPLATE_COLUMNS_TARGET_GENE)

            md_template = cls._clean_validate_template(md_template, study.id,
                                                       pt_cols)

            # Insert the metadata template
            sql = """INSERT INTO qiita.prep_template
                        (data_type_id, investigation_type)
                     VALUES (%s, %s)
                     RETURNING prep_template_id"""
            TRN.add(sql, [data_type_id, investigation_type])
            prep_id = TRN.execute_fetchlast()

            try:
                cls._common_creation_steps(md_template, prep_id)
            except Exception:
                # Check if sample IDs present here but not in sample template
                sql = """SELECT sample_id from qiita.study_sample
                         WHERE study_id = %s"""
                # Get list of study sample IDs, prep template study IDs,
                # and their intersection
                TRN.add(sql, [study.id])
                prep_samples = set(md_template.index.values)
                unknown_samples = prep_samples.difference(
                    TRN.execute_fetchflatten())
                if unknown_samples:
                    raise QiitaDBExecutionError(
                        'Samples found in prep template but not sample '
                        'template: %s' % ', '.join(unknown_samples))

                # some other error we haven't seen before so raise it
                raise

            # Link the prep template with the study
            sql = """INSERT INTO qiita.study_prep_template
                        (study_id, prep_template_id)
                     VALUES (%s, %s)"""
            TRN.add(sql, [study.id, prep_id])

            TRN.execute()

            pt = cls(prep_id)
            pt.generate_files()

            return pt
    def _common_extend_steps(self, md_template):
        r"""executes the common extend steps

        Parameters
        ----------
        md_template : DataFrame
            The metadata template file contents indexed by sample ids
        """
        with TRN:
            # Check if we are adding new samples
            sample_ids = md_template.index.tolist()
            curr_samples = set(self.keys())
            existing_samples = curr_samples.intersection(sample_ids)
            new_samples = set(sample_ids).difference(existing_samples)

            # Check if we are adding new columns
            headers = md_template.keys().tolist()
            new_cols = set(headers).difference(self.categories())

            if not new_cols and not new_samples:
                return

            is_extendable, error_msg = self.can_be_extended(new_samples,
                                                            new_cols)

            if not is_extendable:
                raise QiitaDBError(error_msg)

            table_name = self._table_name(self._id)
            if new_cols:
                warnings.warn(
                    "The following columns have been added to the existing"
                    " template: %s" % ", ".join(new_cols),
                    QiitaDBWarning)
                # If we are adding new columns, add them first (simplifies
                # code). Sorting the new columns to enforce an order
                new_cols = sorted(new_cols)
                datatypes = get_datatypes(md_template.ix[:, new_cols])
                sql_cols = """INSERT INTO qiita.{0}
                                    ({1}, column_name, column_type)
                              VALUES (%s, %s, %s)""".format(self._column_table,
                                                            self._id_column)
                sql_alter = """ALTER TABLE qiita.{0} ADD COLUMN {1} {2}"""
                for category, dtype in zip(new_cols, datatypes):
                    TRN.add(sql_cols, [self._id, category, dtype])
                    TRN.add(sql_alter.format(table_name, category, dtype))

                if existing_samples:
                    # The values for the new columns are the only ones that get
                    # added to the database. None of the existing values will
                    # be modified (see update for that functionality)
                    min_md_template = \
                        md_template[new_cols].loc[existing_samples]
                    values = as_python_types(min_md_template, new_cols)
                    values.append(existing_samples)
                    # psycopg2 requires a list of iterable, in which each
                    # iterable is a set of values to use in the string
                    # formatting of the query. We have all the values in
                    # different lists (but in the same order) so use zip to
                    # create the list of iterable that psycopg2 requires.
                    values = [list(v) for v in zip(*values)]
                    set_str = ["{0} = %s".format(col) for col in new_cols]
                    sql = """UPDATE qiita.{0}
                             SET {1}
                             WHERE sample_id=%s""".format(table_name,
                                                          ",".join(set_str))
                    TRN.add(sql, values, many=True)

            if new_samples:
                warnings.warn(
                    "The following samples have been added to the existing"
                    " template: %s" % ", ".join(new_samples),
                    QiitaDBWarning)
                new_samples = sorted(new_samples)
                # At this point we only want the information
                # from the new samples
                md_template = md_template.loc[new_samples]

                # Insert values on required columns
                values = [[self._id, s_id] for s_id in new_samples]
                sql = """INSERT INTO qiita.{0} ({1}, sample_id)
                         VALUES (%s, %s)""".format(self._table,
                                                   self._id_column)
                TRN.add(sql, values, many=True)

                # Insert values on custom table
                values = as_python_types(md_template, headers)
                values.insert(0, new_samples)
                values = [list(v) for v in zip(*values)]
                sql = """INSERT INTO qiita.{0} (sample_id, {1})
                         VALUES (%s, {2})""".format(
                    table_name, ", ".join(headers),
                    ', '.join(["%s"] * len(headers)))
                TRN.add(sql, values, many=True)

            # Execute all the steps
            TRN.execute()
Exemple #37
0
    def delete(cls, id_):
        r"""Deletes the table from the database

        Parameters
        ----------
        id_ : obj
            The object identifier

        Raises
        ------
        QiitaDBExecutionError
            If the prep template already has a preprocessed data
            If the prep template has a raw data attached
        QiitaDBUnknownIDError
            If no prep template with id = id_ exists
        """
        with TRN:
            table_name = cls._table_name(id_)

            if not cls.exists(id_):
                raise QiitaDBUnknownIDError(id_, cls.__name__)

            sql = """SELECT EXISTS(
                        SELECT * FROM qiita.prep_template_preprocessed_data
                        WHERE prep_template_id=%s)"""
            args = [id_]
            TRN.add(sql, args)
            preprocessed_data_exists = TRN.execute_fetchlast()

            if preprocessed_data_exists:
                raise QiitaDBExecutionError(
                    "Cannot remove prep template %d because a preprocessed "
                    "data has been already generated using it." % id_)

            sql = """SELECT (
                        SELECT raw_data_id
                        FROM qiita.prep_template
                        WHERE prep_template_id=%s)
                    IS NOT NULL"""
            TRN.add(sql, args)
            raw_data_attached = TRN.execute_fetchlast()
            if raw_data_attached:
                raise QiitaDBExecutionError(
                    "Cannot remove prep template %d because it has raw data "
                    "associated with it" % id_)

            # Delete the prep template filepaths
            sql = """DELETE FROM qiita.prep_template_filepath
                     WHERE prep_template_id = %s"""
            TRN.add(sql, args)

            # Drop the prep_X table
            TRN.add("DROP TABLE qiita.{0}".format(table_name))

            # Remove the rows from prep_template_samples
            sql = "DELETE FROM qiita.{0} WHERE {1} = %s".format(
                cls._table, cls._id_column)
            TRN.add(sql, args)

            # Remove the rows from prep_columns
            sql = "DELETE FROM qiita.{0} where {1} = %s".format(
                cls._column_table, cls._id_column)
            TRN.add(sql, args)

            # Remove the row from study_prep_template
            sql = """DELETE FROM qiita.study_prep_template
                     WHERE {0} = %s""".format(cls._id_column)
            TRN.add(sql, args)

            # Remove the row from prep_template
            sql = "DELETE FROM qiita.prep_template WHERE {0} = %s".format(
                cls._id_column)
            TRN.add(sql, args)

            TRN.execute()