Ejemplo n.º 1
0
    def class_modifier(cls):
        # First, we check that we are not in a production environment
        conn_handler = SQLConnectionHandler()
        # It is possible that we are connecting to a production database
        test_db = conn_handler.execute_fetchone("SELECT test FROM settings")[0]
        # Or the loaded configuration file belongs to a production environment
        # or the test database is not qiita_test
        if not qiita_config.test_environment or not test_db \
                or qiita_config.database != 'qiita_test':
            raise RuntimeError("Working in a production environment. Not "
                               "executing the tests to keep the production "
                               "database safe.")

        # Now, we decorate the setup and teardown functions
        class DecoratedClass(cls):
            def setUp(self):
                super(DecoratedClass, self).setUp()
                self.conn_handler = SQLConnectionHandler()

            @reset_test_database
            def tearDown(self):
                super(DecoratedClass, self).tearDown()
                del self.conn_handler

        return DecoratedClass
Ejemplo n.º 2
0
    def __getitem__(self, key):
        r"""Returns the value of the metadata category `key`

        Parameters
        ----------
        key : str
            The metadata category

        Returns
        -------
        obj
            The value of the metadata category `key`

        Raises
        ------
        KeyError
            If the metadata category `key` does not exists

        See Also
        --------
        get
        """
        conn_handler = SQLConnectionHandler()
        key = key.lower()
        if key not in self._get_categories(conn_handler):
            # The key is not available for the sample, so raise a KeyError
            raise KeyError("Metadata category %s does not exists for sample %s"
                           " in template %d" %
                           (key, self._id, self._md_template.id))

        sql = """SELECT {0} FROM qiita.{1}
                 WHERE sample_id=%s""".format(key, self._dynamic_table)

        return conn_handler.execute_fetchone(sql, (self._id, ))[0]
Ejemplo n.º 3
0
    def class_modifier(cls):
        # First, we check that we are not in a production environment
        conn_handler = SQLConnectionHandler()
        # It is possible that we are connecting to a production database
        test_db = conn_handler.execute_fetchone("SELECT test FROM settings")[0]
        # Or the loaded configuration file belongs to a production environment
        # or the test database is not qiita_test
        if not qiita_config.test_environment or not test_db \
                or qiita_config.database != 'qiita_test':
            raise RuntimeError("Working in a production environment. Not "
                               "executing the tests to keep the production "
                               "database safe.")

        # Now, we decorate the setup and teardown functions
        class DecoratedClass(cls):
            @build_test_database
            def setUp(self):
                super(DecoratedClass, self).setUp()
                self.conn_handler = SQLConnectionHandler()

            @drop_test_database
            def tearDown(self):
                super(DecoratedClass, self).tearDown()
                del self.conn_handler

        return DecoratedClass
Ejemplo n.º 4
0
 def preprocessed_data(self):
     conn_handler = SQLConnectionHandler()
     prep_datas = conn_handler.execute_fetchall(
         "SELECT preprocessed_data_id FROM "
         "qiita.prep_template_preprocessed_data WHERE prep_template_id=%s",
         (self.id,))
     return [x[0] for x in prep_datas]
Ejemplo n.º 5
0
    def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_1(self):
        # Test that the run prefixes in the prep_template and the file names
        # actually match and raise an error if not
        conn_handler = SQLConnectionHandler()
        sql = ("""
            INSERT INTO qiita.filepath (filepath_id, filepath,
                filepath_type_id, checksum, checksum_algorithm_id,
                data_directory_id) VALUES (19, '1_new.sff', 17, 852952723, 1,
                5);
            INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id) VALUES
                (3, 19);
            INSERT INTO qiita.filepath (filepath_id, filepath,
                filepath_type_id, checksum, checksum_algorithm_id,
                data_directory_id) VALUES (20, '1_error.sff', 17, 852952723,
                1, 5);
            INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id) VALUES
                (3, 20);
            UPDATE qiita.prep_1 SET run_prefix='preprocess_test';
            UPDATE qiita.prep_1 SET run_prefix='new' WHERE
                sample_id = '1.SKB8.640193';
        """)
        conn_handler.execute(sql)

        raw_data = RawData(3)
        params = Preprocessed454Params(1)
        prep_template = PrepTemplate(1)

        with self.assertRaises(ValueError):
            _get_preprocess_fasta_cmd(raw_data, prep_template, params)
Ejemplo n.º 6
0
    def status(self):
        """The status of the prep template

        Returns
        -------
        str
            The status of the prep template

        Notes
        -----
        The status of a prep template is inferred by the status of the
        processed data generated from this prep template. If no processed
        data has been generated with this prep template; then the status
        is 'sandbox'.
        """
        conn_handler = SQLConnectionHandler()
        sql = """SELECT processed_data_status
                FROM qiita.processed_data_status pds
                  JOIN qiita.processed_data pd
                    USING (processed_data_status_id)
                  JOIN qiita.preprocessed_processed_data ppd_pd
                    USING (processed_data_id)
                  JOIN qiita.prep_template_preprocessed_data pt_ppd
                    USING (preprocessed_data_id)
                WHERE pt_ppd.prep_template_id=%s"""
        pd_statuses = conn_handler.execute_fetchall(sql, (self._id,))

        return infer_status(pd_statuses)
Ejemplo n.º 7
0
    def get_filepaths(self):
        r"""Retrieves the list of (filepath_id, filepath)"""
        # Check that this function has been called from a subclass
        self._check_subclass()

        # Check if the connection handler has been provided. Create a new
        # one if not.
        conn_handler = SQLConnectionHandler()

        try:
            filepath_ids = conn_handler.execute_fetchall(
                "SELECT filepath_id, filepath FROM qiita.filepath WHERE "
                "filepath_id IN (SELECT filepath_id FROM qiita.{0} WHERE "
                "{1}=%s) ORDER BY filepath_id DESC".format(
                    self._filepath_table, self._id_column),
                (self.id, ))
        except Exception as e:
            LogEntry.create('Runtime', str(e),
                            info={self.__class__.__name__: self.id})
            raise e

        _, fb = get_mountpoint('templates')[0]
        base_fp = partial(join, fb)

        return [(fpid, base_fp(fp)) for fpid, fp in filepath_ids]
Ejemplo n.º 8
0
    def test_huge_queue(self):
        self.conn_handler = SQLConnectionHandler()
        self.conn_handler.create_queue("toy_queue")
        # add tons of inserts to queue
        for x in range(120):
            self.conn_handler.add_to_queue(
                "toy_queue",
                "INSERT INTO qiita.qiita_user (email, name, password) VALUES "
                "(%s, %s, %s)", ['*****@*****.**' % x, 'Toy', 'pass'])
        # add failing insert as final item in queue
        self.conn_handler.add_to_queue(
            "toy_queue",
            "INSERT INTO qiita.qiita_BADTABLE (email, name, password) VALUES "
            "(%s, %s, %s)", ['*****@*****.**' % x, 'Toy', 'pass'])
        self.conn_handler.add_to_queue(
            "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1 "
            "WHERE email = %s and password = %s", [{0}, {1}])
        with self.assertRaises(QiitaDBExecutionError):
            self.conn_handler.execute_queue("toy_queue")

        # make sure roll back correctly
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email LIKE "
            "'%[email protected]%'")
        self.assertEqual(obs, [])
Ejemplo n.º 9
0
    def test_get_preprocess_fasta_cmd_sff_run_prefix(self):
        # Need to alter the run_prefix of one sample so we can test the
        # multiple values
        conn_handler = SQLConnectionHandler()
        sql = ("UPDATE qiita.prep_1 SET run_prefix='test1' WHERE "
               "sample_id = '1.SKM9.640192'")
        conn_handler.execute(sql)

        raw_data = RawData(3)
        params = Preprocessed454Params(1)
        prep_template = PrepTemplate(1)

        obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
            raw_data, prep_template, params)

        obs_cmds = obs_cmd.split('; ')
        # assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is
        # working we only need to test for the commands being ran and
        # that n is valid
        self.assertEqual(len(obs_cmds), 8)
        self.assertTrue(obs_cmds[0].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[1].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[2].startswith('split_libraries.py'))
        self.assertIn('-n 1', obs_cmds[2])
        self.assertTrue(obs_cmds[3].startswith('split_libraries.py'))
        self.assertIn('-n 800000', obs_cmds[3])
        self.assertTrue(obs_cmds[4].startswith('cat'))
        self.assertIn('split_library_log.txt', obs_cmds[4])
        self.assertTrue(obs_cmds[5].startswith('cat'))
        self.assertTrue('seqs.fna', obs_cmds[5])
        self.assertTrue(obs_cmds[6].startswith('cat'))
        self.assertIn('seqs_filtered.qual', obs_cmds[6])
Ejemplo n.º 10
0
    def _to_dict(self):
        r"""Returns the categories and their values in a dictionary

        Returns
        -------
        dict of {str: str}
            A dictionary of the form {category: value}
        """
        conn_handler = SQLConnectionHandler()
        d = dict(conn_handler.execute_fetchone(
            "SELECT * FROM qiita.{0} WHERE {1}=%s AND "
            "sample_id=%s".format(self._table, self._id_column),
            (self._md_template.id, self._id)))
        dynamic_d = dict(conn_handler.execute_fetchone(
            "SELECT * from qiita.{0} WHERE "
            "sample_id=%s".format(self._dynamic_table),
            (self._id, )))
        d.update(dynamic_d)
        del d['sample_id']
        del d[self._id_column]
        d.pop('study_id', None)

        # Modify all the *_id columns to include the string instead of the id
        for k, v in viewitems(self._md_template.translate_cols_dict):
            d[v] = self._md_template.str_cols_handlers[k][d[k]]
            del d[k]
        return d
Ejemplo n.º 11
0
    def to_dataframe(self):
        """Returns the metadata template as a dataframe

        Returns
        -------
        pandas DataFrame
            The metadata in the template,indexed on sample id
        """
        conn_handler = SQLConnectionHandler()
        cols = get_table_cols(self._table, conn_handler)
        if 'study_id' in cols:
            cols.remove('study_id')
        dyncols = get_table_cols(self._table_name(self._id), conn_handler)
        # remove sample_id from dyncols so not repeated
        dyncols.remove('sample_id')
        # Get all metadata for the template
        sql = """SELECT {0}, {1} FROM qiita.{2} req
            INNER JOIN qiita.{3} dyn on req.sample_id = dyn.sample_id
            WHERE req.{4} = %s""".format(
            ", ".join("req.%s" % c for c in cols),
            ", ".join("dyn.%s" % d for d in dyncols),
            self._table, self._table_name(self._id), self._id_column)
        meta = conn_handler.execute_fetchall(sql, [self._id])
        cols = cols + dyncols

        # Create the dataframe and clean it up a bit
        df = pd.DataFrame((list(x) for x in meta), columns=cols)
        df.set_index('sample_id', inplace=True, drop=True)
        # Turn id cols to value cols
        for col, value in viewitems(self.str_cols_handlers):
            df[col].replace(value, inplace=True)
        df.rename(columns=self.translate_cols_dict, inplace=True)

        return df
Ejemplo n.º 12
0
    def create(cls, md_template, study):
        r"""Creates the sample template in the database

        Parameters
        ----------
        md_template : DataFrame
            The metadata template file contents indexed by samples Ids
        study : Study
            The study to which the sample template belongs to.
        """
        cls._check_subclass()

        # Check that we don't have a MetadataTemplate for study
        if cls.exists(study.id):
            raise QiitaDBDuplicateError(cls.__name__, 'id: %d' % study.id)

        conn_handler = SQLConnectionHandler()
        queue_name = "CREATE_SAMPLE_TEMPLATE_%d" % study.id
        conn_handler.create_queue(queue_name)

        # Clean and validate the metadata template given
        md_template = cls._clean_validate_template(md_template, study.id,
                                                   study.id, conn_handler)

        cls._add_common_creation_steps_to_queue(md_template, study.id,
                                                conn_handler, queue_name)

        conn_handler.execute_queue(queue_name)

        st = cls(study.id)
        st.generate_files()

        return st
Ejemplo n.º 13
0
    def test_get_qiime_minimal_mapping_single_reverse_primer(self):
        conn_handler = SQLConnectionHandler()
        conn_handler
        sql = """INSERT INTO qiita.prep_columns
                        (prep_template_id, column_name, column_type)
                    VALUES (1, 'reverselinkerprimer', 'varchar');
                 ALTER TABLE qiita.prep_1
                    ADD COLUMN reverselinkerprimer varchar;
                 DELETE FROM qiita.prep_columns
                 WHERE prep_template_id = 1 AND column_name = 'run_prefix';
                 ALTER TABLE qiita.prep_1 DROP COLUMN run_prefix;
                 UPDATE qiita.prep_1 SET reverselinkerprimer = %s
                 """
        conn_handler.execute(sql, ('GTGCCAGCM',))
        prep_template = PrepTemplate(1)
        prep_template.generate_files()
        out_dir = mkdtemp()

        obs_fps = _get_qiime_minimal_mapping(prep_template, out_dir)
        exp_fps = [join(out_dir, 'prep_1_MMF.txt')]

        # Check that the returned list is as expected
        self.assertEqual(obs_fps, exp_fps)
        # Check that the file exists
        self.assertTrue(exists(exp_fps[0]))
        # Check the contents of the file
        with open(exp_fps[0], "U") as f:
            self.assertEqual(f.read(), EXP_PREP_RLP)
Ejemplo n.º 14
0
    def update(self, md_template):
        r"""Update values in the template

        Parameters
        ----------
        md_template : DataFrame
            The metadata template file contents indexed by samples Ids

        Raises
        ------
        QiitaDBError
            If md_template and db do not have the same sample ids
            If md_template and db do not have the same column headers
            If self.can_be_updated is not True
        """
        conn_handler = SQLConnectionHandler()

        # Clean and validate the metadata template given
        new_map = self._clean_validate_template(md_template, self.study_id,
                                                self.columns_restrictions)
        # Retrieving current metadata
        current_map = self._transform_to_dict(conn_handler.execute_fetchall(
            "SELECT * FROM qiita.{0}".format(self._table_name(self.id))))
        current_map = pd.DataFrame.from_dict(current_map, orient='index')

        # simple validations of sample ids and column names
        samples_diff = set(new_map.index).difference(current_map.index)
        if samples_diff:
            raise QiitaDBError('The new template differs from what is stored '
                               'in database by these samples names: %s'
                               % ', '.join(samples_diff))
        columns_diff = set(new_map.columns).difference(current_map.columns)
        if columns_diff:
            raise QiitaDBError('The new template differs from what is stored '
                               'in database by these columns names: %s'
                               % ', '.join(columns_diff))

        # here we are comparing two dataframes following:
        # http://stackoverflow.com/a/17095620/4228285
        current_map.sort(axis=0, inplace=True)
        current_map.sort(axis=1, inplace=True)
        new_map.sort(axis=0, inplace=True)
        new_map.sort(axis=1, inplace=True)
        map_diff = (current_map != new_map).stack()
        map_diff = map_diff[map_diff]
        map_diff.index.names = ['id', 'column']
        changed_cols = map_diff.index.get_level_values('column').unique()

        if not self.can_be_updated(columns=set(changed_cols)):
            raise QiitaDBError('The new template is modifying fields that '
                               'cannot be modified. Try removing the target '
                               'gene fields or deleting the processed data. '
                               'You are trying to modify: %s'
                               % ', '.join(changed_cols))

        for col in changed_cols:
            self.update_category(col, new_map[col].to_dict())

        self.generate_files()
Ejemplo n.º 15
0
 def raw_data(self):
     conn_handler = SQLConnectionHandler()
     result = conn_handler.execute_fetchone(
         "SELECT raw_data_id FROM qiita.prep_template "
         "WHERE prep_template_id=%s", (self.id,))
     if result:
         return result[0]
     return None
Ejemplo n.º 16
0
    def __getitem__(self, key):
        r"""Returns the value of the metadata category `key`

        Parameters
        ----------
        key : str
            The metadata category

        Returns
        -------
        obj
            The value of the metadata category `key`

        Raises
        ------
        KeyError
            If the metadata category `key` does not exists

        See Also
        --------
        get
        """
        conn_handler = SQLConnectionHandler()
        key = key.lower()
        if key in self._get_categories(conn_handler):
            # It's possible that the key is asking for one of the *_id columns
            # that we have to do the translation
            def handler(x):
                return x

            # prevent flake8 from complaining about the function not being
            # used and a redefinition happening in the next few lines
            handler(None)

            if key in self._md_template.translate_cols_dict.values():
                handler = (
                    lambda x: self._md_template.str_cols_handlers[key][x])
                key = "%s_id" % key
            # Check if we have either to query the table with required columns
            # or the dynamic table
            if key in get_table_cols(self._table, conn_handler):
                result = conn_handler.execute_fetchone(
                    "SELECT {0} FROM qiita.{1} WHERE {2}=%s AND "
                    "sample_id=%s".format(key, self._table, self._id_column),
                    (self._md_template.id, self._id))[0]
                return handler(result)
            else:
                return conn_handler.execute_fetchone(
                    "SELECT {0} FROM qiita.{1} WHERE "
                    "sample_id=%s".format(key, self._dynamic_table),
                    (self._id, ))[0]
        else:
            # The key is not available for the sample, so raise a KeyError
            raise KeyError("Metadata category %s does not exists for sample %s"
                           " in template %d" %
                           (key, self._id, self._md_template.id))
Ejemplo n.º 17
0
    def _check_id(self, id_):
        r"""Checks that the MetadataTemplate id_ exists on the database"""
        self._check_subclass()

        conn_handler = SQLConnectionHandler()

        return conn_handler.execute_fetchone(
            "SELECT EXISTS(SELECT * FROM qiita.{0} WHERE "
            "{1}=%s)".format(self._table, self._id_column),
            (id_, ))[0]
Ejemplo n.º 18
0
    def test_init(self):
        obs = SQLConnectionHandler()
        self.assertEqual(obs.admin, "no_admin")
        self.assertEqual(obs.queues, {})
        self.assertTrue(isinstance(obs._connection, connection))
        self.assertEqual(self.conn_handler._user_conn.closed, 0)

        # Let's close the connection and make sure that it gets reopened
        obs.close()
        obs = SQLConnectionHandler()
        self.assertEqual(self.conn_handler._user_conn.closed, 0)
Ejemplo n.º 19
0
    def preprocessing_status(self):
        r"""Tells if the data has been preprocessed or not

        Returns
        -------
        str
            One of {'not_preprocessed', 'preprocessing', 'success', 'failed'}
        """
        conn_handler = SQLConnectionHandler()
        return conn_handler.execute_fetchone(
            "SELECT preprocessing_status FROM qiita.prep_template "
            "WHERE {0}=%s".format(self._id_column), (self.id,))[0]
Ejemplo n.º 20
0
    def metadata_headers():
        """Returns metadata headers available

        Returns
        -------
        list
            Alphabetical list of all metadata headers available
        """
        conn_handler = SQLConnectionHandler()
        return [x[0] for x in
                conn_handler.execute_fetchall(
                "SELECT DISTINCT column_name FROM qiita.study_sample_columns "
                "ORDER BY column_name")]
Ejemplo n.º 21
0
 def setUp(self):
     if self.database:
         self.conn_handler = SQLConnectionHandler()
         # Drop the schema
         self.conn_handler.execute("DROP SCHEMA qiita CASCADE")
         # Create the schema
         with open(LAYOUT_FP, 'U') as f:
             self.conn_handler.execute(f.read())
         # Initialize the database
         with open(INITIALIZE_FP, 'U') as f:
             self.conn_handler.execute(f.read())
         # Populate the database
         with open(POPULATE_FP, 'U') as f:
             self.conn_handler.execute(f.read())
     super(TestHandlerBase, self).setUp()
Ejemplo n.º 22
0
    def metadata_headers():
        """Returns metadata headers available

        Returns
        -------
        list
            Alphabetical list of all metadata headers available
        """
        conn_handler = SQLConnectionHandler()
        return [x[0] for x in
                conn_handler.execute_fetchall(
                "SELECT DISTINCT column_name FROM qiita.study_sample_columns "
                "UNION SELECT column_name FROM information_schema.columns "
                "WHERE table_name = 'required_sample_info' "
                "ORDER BY column_name")]
Ejemplo n.º 23
0
    def test_huge_queue(self):
        self.conn_handler = SQLConnectionHandler()
        self.conn_handler.create_queue("toy_queue")
        # add tons of inserts to queue
        for x in range(120):
            self.conn_handler.add_to_queue(
                "toy_queue",
                "INSERT INTO qiita.qiita_user (email, name, password) VALUES "
                "(%s, %s, %s)", ['*****@*****.**' % x, 'Toy', 'pass'])
        # add failing insert as final item in queue
        self.conn_handler.add_to_queue(
            "toy_queue",
            "INSERT INTO qiita.qiita_BADTABLE (email, name, password) VALUES "
            "(%s, %s, %s)", ['*****@*****.**' % x, 'Toy', 'pass'])
        self.conn_handler.add_to_queue(
            "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1 "
            "WHERE email = %s and password = %s", [{0}, {1}])
        with self.assertRaises(QiitaDBExecutionError):
            self.conn_handler.execute_queue("toy_queue")

        # make sure roll back correctly
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email LIKE "
            "'%[email protected]%'")
        self.assertEqual(obs, [])
Ejemplo n.º 24
0
 def raw_data(self, raw_data):
     conn_handler = SQLConnectionHandler()
     sql = """SELECT (
                 SELECT raw_data_id
                 FROM qiita.prep_template
                 WHERE prep_template_id=%s)
             IS NOT NULL"""
     exists = conn_handler.execute_fetchone(sql, (self.id,))[0]
     if exists:
         raise QiitaDBError(
             "Prep template %d already has a raw data associated"
             % self.id)
     sql = """UPDATE qiita.prep_template
              SET raw_data_id = %s
              WHERE prep_template_id = %s"""
     conn_handler.execute(sql, (raw_data.id, self.id))
Ejemplo n.º 25
0
    def __call__(self, searchstr, user):
        """Runs a Study query and returns matching studies and samples

        Parameters
        ----------
        searchstr : str
            Search string to use
        user : User object
            User making the search. Needed for permissions checks.

        Returns
        -------
        dict
            Found samples in format
            {study_id: [[samp_id1, meta1, meta2, ...],
                        [samp_id2, meta1, meta2, ...], ...}
        list
            metadata column names searched for

        Notes
        -----
        Metadata information for each sample is in the same order as the
        metadata columns list returned

        Metadata column names and string searches are case-sensitive
        """
        study_sql, sample_sql, meta_headers = \
            self._parse_study_search_string(searchstr, True)
        conn_handler = SQLConnectionHandler()
        # get all studies containing the metadata headers requested
        study_ids = {x[0] for x in conn_handler.execute_fetchall(study_sql)}
        # strip to only studies user has access to
        if user.level not in {'admin', 'dev', 'superuser'}:
            study_ids = study_ids.intersection(Study.get_by_status('public') |
                                               user.user_studies |
                                               user.shared_studies)

        results = {}
        # run search on each study to get out the matching samples
        for sid in study_ids:
            study_res = conn_handler.execute_fetchall(sample_sql.format(sid))
            if study_res:
                # only add study to results if actually has samples in results
                results[sid] = study_res
        self.results = results
        self.meta_headers = meta_headers
        return results, meta_headers
Ejemplo n.º 26
0
    def study_id(self):
        """Gets the study id with which this prep template is associated

        Returns
        -------
        int
            The ID of the study with which this prep template is associated
        """
        conn = SQLConnectionHandler()
        sql = ("SELECT study_id FROM qiita.study_prep_template "
               "WHERE prep_template_id=%s")
        study_id = conn.execute_fetchone(sql, (self.id,))
        if study_id:
            return study_id[0]
        else:
            raise QiitaDBError("No studies found associated with prep "
                               "template ID %d" % self._id)
Ejemplo n.º 27
0
    def __call__(self, searchstr, user):
        """Runs a Study query and returns matching studies and samples

        Parameters
        ----------
        searchstr : str
            Search string to use
        user : User object
            User making the search. Needed for permissions checks.

        Returns
        -------
        dict
            Found samples in format
            {study_id: [[samp_id1, meta1, meta2, ...],
                        [samp_id2, meta1, meta2, ...], ...}
        list
            metadata column names searched for

        Notes
        -----
        Metadata information for each sample is in the same order as the
        metadata columns list returned

        Metadata column names and string searches are case-sensitive
        """
        study_sql, sample_sql, meta_headers = \
            self._parse_study_search_string(searchstr, True)
        conn_handler = SQLConnectionHandler()
        # get all studies containing the metadata headers requested
        study_ids = {x[0] for x in conn_handler.execute_fetchall(study_sql)}
        # strip to only studies user has access to
        if user.level not in {'admin', 'dev', 'superuser'}:
            study_ids = study_ids.intersection(
                Study.get_by_status('public') + user.user_studies +
                user.shared_studies)

        results = {}
        # run search on each study to get out the matching samples
        for sid in study_ids:
            study_res = conn_handler.execute_fetchall(sample_sql.format(sid))
            if study_res:
                # only add study to results if actually has samples in results
                results[sid] = study_res
        return results, meta_headers
Ejemplo n.º 28
0
    def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_2(self):
        # Should raise error
        conn_handler = SQLConnectionHandler()
        sql = ("""
            UPDATE qiita.prep_1 SET run_prefix='test1';
            UPDATE qiita.prep_1 SET run_prefix='test2' WHERE
                sample_id = '1.SKB2.640194';
            UPDATE qiita.prep_1 SET run_prefix='error' WHERE
                sample_id = '1.SKB8.640193';
        """)
        conn_handler.execute(sql)

        raw_data = RawData(3)
        params = Preprocessed454Params(1)
        prep_template = PrepTemplate(1)

        with self.assertRaises(ValueError):
            _get_preprocess_fasta_cmd(raw_data, prep_template, params)
Ejemplo n.º 29
0
    def _to_dict(self):
        r"""Returns the categories and their values in a dictionary

        Returns
        -------
        dict of {str: str}
            A dictionary of the form {category: value}
        """
        conn_handler = SQLConnectionHandler()
        d = dict(conn_handler.execute_fetchone(
            "SELECT * from qiita.{0} WHERE "
            "sample_id=%s".format(self._dynamic_table),
            (self._id, )))

        # Remove the sample_id, is not part of the metadata
        del d['sample_id']

        return d
Ejemplo n.º 30
0
    def extend(self, md_template):
        """Adds the given sample template to the current one

        Parameters
        ----------
        md_template : DataFrame
            The metadata template file contents indexed by samples Ids
        """
        conn_handler = SQLConnectionHandler()
        queue_name = "EXTEND_SAMPLE_TEMPLATE_%d" % self.id
        conn_handler.create_queue(queue_name)

        md_template = self._clean_validate_template(md_template, self.study_id, SAMPLE_TEMPLATE_COLUMNS)

        self._add_common_extend_steps_to_queue(md_template, conn_handler, queue_name)
        conn_handler.execute_queue(queue_name)

        self.generate_files()
Ejemplo n.º 31
0
    def to_file(self, fp, samples=None):
        r"""Writes the MetadataTemplate to the file `fp` in tab-delimited
        format

        Parameters
        ----------
        fp : str
            Path to the output file
        samples : set, optional
            If supplied, only the specified samples will be written to the
            file
        """
        conn_handler = SQLConnectionHandler()
        metadata_map = self._transform_to_dict(conn_handler.execute_fetchall(
            "SELECT * FROM qiita.{0} WHERE {1}=%s".format(self._table,
                                                          self._id_column),
            (self.id,)))
        dyn_vals = self._transform_to_dict(conn_handler.execute_fetchall(
            "SELECT * FROM qiita.{0}".format(self._table_name(self.id))))

        for k in metadata_map:
            for key, value in viewitems(self.translate_cols_dict):
                id_ = metadata_map[k][key]
                metadata_map[k][value] = self.str_cols_handlers[key][id_]
                del metadata_map[k][key]
            metadata_map[k].update(dyn_vals[k])
            metadata_map[k].pop('study_id', None)

        # Remove samples that are not in the samples list, if it was supplied
        if samples is not None:
            for sid, d in metadata_map.items():
                if sid not in samples:
                    metadata_map.pop(sid)

        # Write remaining samples to file
        headers = sorted(list(metadata_map.values())[0].keys())
        with open(fp, 'w') as f:
            # First write the headers
            f.write("sample_name\t%s\n" % '\t'.join(headers))
            # Write the values for each sample id
            for sid, d in sorted(metadata_map.items()):
                values = [str(d[h]) for h in headers]
                values.insert(0, sid)
                f.write("%s\n" % '\t'.join(values))
Ejemplo n.º 32
0
    def test_get_preprocess_fasta_cmd_sff_run_prefix_match(self):
        # Test that the run prefixes in the prep_template and the file names
        # actually match and raise an error if not
        conn_handler = SQLConnectionHandler()
        sql = ("""
            INSERT INTO qiita.filepath (filepath_id, filepath,
                filepath_type_id, checksum, checksum_algorithm_id,
                data_directory_id) VALUES (19, '1_new.sff', 17, 852952723, 1,
                5);
            INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id) VALUES
                (3, 19);
            UPDATE qiita.prep_1 SET run_prefix='preprocess_test';
            UPDATE qiita.prep_1 SET run_prefix='new' WHERE
                sample_id = '1.SKB8.640193';
        """)
        conn_handler.execute(sql)

        raw_data = RawData(3)
        params = Preprocessed454Params(1)
        prep_template = PrepTemplate(1)

        obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
            raw_data, prep_template, params)

        obs_cmds = obs_cmd.split('; ')
        # assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is
        # working we only need to test for the commands being ran and
        # that n is valid
        self.assertEqual(len(obs_cmds), 9)
        self.assertTrue(obs_cmds[0].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[1].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[2].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[3].startswith('split_libraries.py'))
        self.assertIn('-n 1', obs_cmds[3])
        self.assertTrue(obs_cmds[4].startswith('split_libraries.py'))
        self.assertIn('-n 800000', obs_cmds[4])
        self.assertTrue(obs_cmds[5].startswith('cat'))
        self.assertIn('split_library_log.txt', obs_cmds[5])
        self.assertTrue(obs_cmds[6].startswith('cat'))
        self.assertIn('seqs.fna', obs_cmds[6])
        self.assertEqual(len(obs_cmds[6].split(' ')), 5)
        self.assertTrue(obs_cmds[7].startswith('cat'))
        self.assertIn('seqs_filtered.qual', obs_cmds[7])
        self.assertEqual(len(obs_cmds[7].split(' ')), 5)
Ejemplo n.º 33
0
    def data_type(self, ret_id=False):
        """Returns the data_type or the data_type id

        Parameters
        ----------
        ret_id : bool, optional
            If true, return the id instead of the string, default false.

        Returns
        -------
        str or int
            string value of data_type or data_type_id if ret_id is True
        """
        ret = "_id" if ret_id else ""
        conn_handler = SQLConnectionHandler()
        return conn_handler.execute_fetchone(
            "SELECT d.data_type{0} FROM qiita.data_type d JOIN "
            "qiita.prep_template p ON p.data_type_id = d.data_type_id WHERE "
            "p.prep_template_id=%s".format(ret), (self.id,))[0]
Ejemplo n.º 34
0
    def test_queue_fail(self):
        """Fail if no results data exists for substitution"""
        self.conn_handler = SQLConnectionHandler()
        self.conn_handler.create_queue("toy_queue")
        self.conn_handler.add_to_queue(
            "toy_queue",
            "INSERT INTO qiita.qiita_user (email, name, password) VALUES "
            "(%s, %s, %s)", ['*****@*****.**', 'Toy', 'pass'])
        self.conn_handler.add_to_queue(
            "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1 "
            "WHERE email = %s and password = %s", [{0}, {1}])

        with self.assertRaises(QiitaDBExecutionError):
            self.conn_handler.execute_queue("toy_queue")

        # make sure roll back correctly
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email = %s",
            ['*****@*****.**'])
        self.assertEqual(obs, [])
Ejemplo n.º 35
0
    def test_get_qiime_minimal_mapping_single_no_run_prefix(self):
        conn_handler = SQLConnectionHandler()
        sql = """DELETE FROM qiita.prep_columns
                 WHERE prep_template_id = 1 AND column_name = 'run_prefix';
                 ALTER TABLE qiita.prep_1 DROP COLUMN run_prefix"""
        conn_handler.execute(sql)
        prep_template = PrepTemplate(1)
        prep_template.generate_files()
        out_dir = mkdtemp()

        obs_fps = _get_qiime_minimal_mapping(prep_template, out_dir)
        exp_fps = [join(out_dir, 'prep_1_MMF.txt')]

        # Check that the returned list is as expected
        self.assertEqual(obs_fps, exp_fps)
        # Check that the file exists
        self.assertTrue(exists(exp_fps[0]))
        # Check the contents of the file
        with open(exp_fps[0], "U") as f:
            self.assertEqual(f.read(), EXP_PREP)
Ejemplo n.º 36
0
class TestHandlerBase(AsyncHTTPTestCase):
    database = False

    def get_app(self):
        BaseHandler.get_current_user = Mock(return_value="*****@*****.**")
        self.app = Application()
        return self.app

    def setUp(self):
        if self.database:
            self.conn_handler = SQLConnectionHandler()
            # Drop the schema
            self.conn_handler.execute("DROP SCHEMA qiita CASCADE")
            # Create the schema
            with open(LAYOUT_FP, 'U') as f:
                self.conn_handler.execute(f.read())
            # Initialize the database
            with open(INITIALIZE_FP, 'U') as f:
                self.conn_handler.execute(f.read())
            # Populate the database
            with open(POPULATE_FP, 'U') as f:
                self.conn_handler.execute(f.read())
        super(TestHandlerBase, self).setUp()

    def tearDown(self):
        if self.database:
            del self.conn_handler

    # helpers from http://www.peterbe.com/plog/tricks-asynchttpclient-tornado
    def get(self, url, data=None, headers=None, doseq=True):
        if data is not None:
            if isinstance(data, dict):
                data = urlencode(data, doseq=doseq)
            if '?' in url:
                url += '&%s' % data
            else:
                url += '?%s' % data
        return self._fetch(url, 'GET', headers=headers)

    def post(self, url, data, headers=None, doseq=True):
        if data is not None:
            if isinstance(data, dict):
                data = urlencode(data, doseq=doseq)
        return self._fetch(url, 'POST', data, headers)

    def _fetch(self, url, method, data=None, headers=None):
        self.http_client.fetch(self.get_url(url), self.stop, method=method,
                               body=data, headers=headers)
        return self.wait()
Ejemplo n.º 37
0
class TestHandlerBase(AsyncHTTPTestCase):
    database = False
    conn_handler = SQLConnectionHandler()
    app = Application()

    def get_app(self):
        BaseHandler.get_current_user = Mock(return_value=User("*****@*****.**"))
        return self.app

    def setUp(self):
        if self.database:
            # First, we check that we are not in a production environment
            # It is possible that we are connecting to a production database
            test_db = self.conn_handler.execute_fetchone(
                "SELECT test FROM settings")[0]
            # Or the loaded config file belongs to a production environment
            if not qiita_config.test_environment or not test_db:
                raise RuntimeError("Working in a production environment. Not "
                                   "executing the tests to keep the production"
                                   " database safe.")

            # Drop the schema and rebuild the test database
            drop_and_rebuild_tst_database(self.conn_handler)

        super(TestHandlerBase, self).setUp()

    # helpers from http://www.peterbe.com/plog/tricks-asynchttpclient-tornado
    def get(self, url, data=None, headers=None, doseq=True):
        if data is not None:
            if isinstance(data, dict):
                data = urlencode(data, doseq=doseq)
            if '?' in url:
                url += '&%s' % data
            else:
                url += '?%s' % data
        return self._fetch(url, 'GET', headers=headers)

    def post(self, url, data, headers=None, doseq=True):
        if data is not None:
            if isinstance(data, dict):
                data = urlencode(data, doseq=doseq)
        return self._fetch(url, 'POST', data, headers)

    def _fetch(self, url, method, data=None, headers=None):
        self.http_client.fetch(self.get_url(url), self.stop, method=method,
                               body=data, headers=headers)
        return self.wait()
Ejemplo n.º 38
0
def reset_test_database(wrapped_fn):
    """Decorator that drops the qiita schema, rebuilds and repopulates the
    schema with test data, then executes wrapped_fn
    """
    conn_handler = SQLConnectionHandler()

    def decorated_wrapped_fn(*args, **kwargs):
        # Drop the schema
        conn_handler.execute("DROP SCHEMA qiita CASCADE")
        # Create the schema
        with open(LAYOUT_FP, 'U') as f:
            conn_handler.execute(f.read())
        # Initialize the database
        with open(INITIALIZE_FP, 'U') as f:
            conn_handler.execute(f.read())
        # Populate the database
        with open(POPULATE_FP, 'U') as f:
            conn_handler.execute(f.read())
        # Execute the wrapped function
        return wrapped_fn(*args, **kwargs)

    return decorated_wrapped_fn
Ejemplo n.º 39
0
 def setUp(self):
     super(DecoratedClass, self).setUp()
     self.conn_handler = SQLConnectionHandler()
Ejemplo n.º 40
0
class TestConnHandler(TestCase):
    def test_create_queue(self):
        self.conn_handler.create_queue("toy_queue")
        self.assertEqual(self.conn_handler.list_queues(), ["toy_queue"])

    def test_run_queue(self):
        self.conn_handler.create_queue("toy_queue")
        self.conn_handler.add_to_queue(
            "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password,"
            "phone) VALUES (%s, %s, %s, %s)",
            ['*****@*****.**', 'Toy', 'pass', '111-111-11112'])
        self.conn_handler.add_to_queue(
            "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, "
            "phone = '222-222-2221' WHERE email = %s",
            ['*****@*****.**'])
        obs = self.conn_handler.execute_queue("toy_queue")
        self.assertEqual(obs, [])
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email = %s",
            ['*****@*****.**'])
        exp = [['*****@*****.**', 1, 'pass', 'Toy', None, None, '222-222-2221',
                None, None, None]]
        self.assertEqual(obs, exp)

    def test_run_queue_many(self):
        sql = ("INSERT INTO qiita.qiita_user (email, name, password,"
               "phone) VALUES (%s, %s, %s, %s)")
        sql_args = [
            ('*****@*****.**', 'p1', 'pass1', '111-111'),
            ('*****@*****.**', 'p2', 'pass2', '111-222')
            ]
        self.conn_handler.create_queue("toy_queue")
        self.conn_handler.add_to_queue(
            "toy_queue", sql, sql_args, many=True)
        self.conn_handler.execute_queue('toy_queue')

        # make sure both users added
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email = %s",
            ['*****@*****.**'])
        exp = [['*****@*****.**', 5, 'pass1', 'p1', None, None, '111-111',
                None, None, None]]
        self.assertEqual(obs, exp)
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email = %s",
            ['*****@*****.**'])
        exp = [['*****@*****.**', 5, 'pass2', 'p2', None, None, '111-222',
                None, None, None]]
        self.assertEqual(obs, exp)

    def test_run_queue_last_return(self):
        self.conn_handler.create_queue("toy_queue")
        self.conn_handler.add_to_queue(
            "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password,"
            "phone) VALUES (%s, %s, %s, %s)",
            ['*****@*****.**', 'Toy', 'pass', '111-111-11112'])
        self.conn_handler.add_to_queue(
            "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, "
            "phone = '222-222-2221' WHERE email = %s RETURNING phone",
            ['*****@*****.**'])
        obs = self.conn_handler.execute_queue("toy_queue")
        self.assertEqual(obs, ['222-222-2221'])

    def test_run_queue_placeholders(self):
        self.conn_handler.create_queue("toy_queue")
        self.conn_handler.add_to_queue(
            "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password,"
            "phone) VALUES (%s, %s, %s, %s) RETURNING email, password",
            ['*****@*****.**', 'Toy', 'pass', '111-111-11112'])
        self.conn_handler.add_to_queue(
            "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, "
            "phone = '222-222-2221' WHERE email = %s AND password = %s",
            ['{0}', '{1}'])
        obs = self.conn_handler.execute_queue("toy_queue")
        self.assertEqual(obs, [])
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email = %s",
            ['*****@*****.**'])
        exp = [['*****@*****.**', 1, 'pass', 'Toy', None, None, '222-222-2221',
                None, None, None]]
        self.assertEqual(obs, exp)

    def test_queue_fail(self):
        """Fail if no results data exists for substitution"""
        self.conn_handler = SQLConnectionHandler()
        self.conn_handler.create_queue("toy_queue")
        self.conn_handler.add_to_queue(
            "toy_queue",
            "INSERT INTO qiita.qiita_user (email, name, password) VALUES "
            "(%s, %s, %s)", ['*****@*****.**', 'Toy', 'pass'])
        self.conn_handler.add_to_queue(
            "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1 "
            "WHERE email = %s and password = %s", [{0}, {1}])

        with self.assertRaises(QiitaDBExecutionError):
            self.conn_handler.execute_queue("toy_queue")

        # make sure roll back correctly
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email = %s",
            ['*****@*****.**'])
        self.assertEqual(obs, [])

    def test_huge_queue(self):
        self.conn_handler = SQLConnectionHandler()
        self.conn_handler.create_queue("toy_queue")
        # add tons of inserts to queue
        for x in range(120):
            self.conn_handler.add_to_queue(
                "toy_queue",
                "INSERT INTO qiita.qiita_user (email, name, password) VALUES "
                "(%s, %s, %s)", ['*****@*****.**' % x, 'Toy', 'pass'])
        # add failing insert as final item in queue
        self.conn_handler.add_to_queue(
            "toy_queue",
            "INSERT INTO qiita.qiita_BADTABLE (email, name, password) VALUES "
            "(%s, %s, %s)", ['*****@*****.**' % x, 'Toy', 'pass'])
        self.conn_handler.add_to_queue(
            "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1 "
            "WHERE email = %s and password = %s", [{0}, {1}])
        with self.assertRaises(QiitaDBExecutionError):
            self.conn_handler.execute_queue("toy_queue")

        # make sure roll back correctly
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email LIKE "
            "'%[email protected]%'")
        self.assertEqual(obs, [])

    def test_get_temp_queue(self):
        my_queue = self.conn_handler.get_temp_queue()
        self.assertTrue(my_queue in self.conn_handler.list_queues())

        self.conn_handler.add_to_queue(my_queue,
                                       "SELECT * from qiita.qiita_user")
        self.conn_handler.add_to_queue(my_queue,
                                       "SELECT * from qiita.user_level")
        self.conn_handler.execute_queue(my_queue)

        self.assertTrue(my_queue not in self.conn_handler.list_queues())
Ejemplo n.º 41
0
# 23 Nov, 2014
# This patch creates all the qiime mapping files for the existing
# prep templates

from qiita_db.util import get_mountpoint
from qiita_db.sql_connection import SQLConnectionHandler
from qiita_db.metadata_template import PrepTemplate

conn_handler = SQLConnectionHandler()

_id, fp_base = get_mountpoint('templates')[0]

for prep_template_id in conn_handler.execute_fetchall(
        "SELECT prep_template_id FROM qiita.prep_template"):
    prep_template_id = prep_template_id[0]
    pt = PrepTemplate(prep_template_id)
    study_id = pt.study_id

    for _, fpt in pt.get_filepaths():
        pt.create_qiime_mapping_file(fpt)
Ejemplo n.º 42
0
# Feberuary 7, 2015
# This patch recreates all the QIIME mapping files to avoid lower/upper case
# problems. See https://github.com/biocore/qiita/issues/799
#
# heavily based on 7.py

from os.path import basename

from skbio.util import flatten

from qiita_db.sql_connection import SQLConnectionHandler
from qiita_db.metadata_template import PrepTemplate

conn_handler = SQLConnectionHandler()

sql = "SELECT prep_template_id FROM qiita.prep_template"
all_ids = conn_handler.execute_fetchall(sql)

q_name = 'unlink-bad-mapping-files'
conn_handler.create_queue(q_name)

# remove all the bad mapping files
for prep_template_id in all_ids:

    prep_template_id = prep_template_id[0]
    pt = PrepTemplate(prep_template_id)
    fps = pt.get_filepaths()

    # get the QIIME mapping file, note that the way to figure out what is and
    # what's not a qiime mapping file is to check for the existance of the
    # word qiime in the basename of the file path, hacky but that's the way
Ejemplo n.º 43
0
 def postgres_test(**kwargs):
     """Open a connection and query postgres"""
     from qiita_db.sql_connection import SQLConnectionHandler
     c = SQLConnectionHandler()
     return c.execute_fetchone("SELECT 42")[0]