Ejemplo n.º 1
0
 def preprocessed_data(self):
     conn_handler = SQLConnectionHandler()
     prep_datas = conn_handler.execute_fetchall(
         "SELECT preprocessed_data_id FROM "
         "qiita.prep_template_preprocessed_data WHERE prep_template_id=%s",
         (self.id,))
     return [x[0] for x in prep_datas]
Ejemplo n.º 2
0
    def get_filepaths(self):
        r"""Retrieves the list of (filepath_id, filepath)"""
        # Check that this function has been called from a subclass
        self._check_subclass()

        # Check if the connection handler has been provided. Create a new
        # one if not.
        conn_handler = SQLConnectionHandler()

        try:
            filepath_ids = conn_handler.execute_fetchall(
                "SELECT filepath_id, filepath FROM qiita.filepath WHERE "
                "filepath_id IN (SELECT filepath_id FROM qiita.{0} WHERE "
                "{1}=%s) ORDER BY filepath_id DESC".format(
                    self._filepath_table, self._id_column),
                (self.id, ))
        except Exception as e:
            LogEntry.create('Runtime', str(e),
                            info={self.__class__.__name__: self.id})
            raise e

        _, fb = get_mountpoint('templates')[0]
        base_fp = partial(join, fb)

        return [(fpid, base_fp(fp)) for fpid, fp in filepath_ids]
Ejemplo n.º 3
0
    def to_dataframe(self):
        """Returns the metadata template as a dataframe

        Returns
        -------
        pandas DataFrame
            The metadata in the template,indexed on sample id
        """
        conn_handler = SQLConnectionHandler()
        cols = get_table_cols(self._table, conn_handler)
        if 'study_id' in cols:
            cols.remove('study_id')
        dyncols = get_table_cols(self._table_name(self._id), conn_handler)
        # remove sample_id from dyncols so not repeated
        dyncols.remove('sample_id')
        # Get all metadata for the template
        sql = """SELECT {0}, {1} FROM qiita.{2} req
            INNER JOIN qiita.{3} dyn on req.sample_id = dyn.sample_id
            WHERE req.{4} = %s""".format(
            ", ".join("req.%s" % c for c in cols),
            ", ".join("dyn.%s" % d for d in dyncols),
            self._table, self._table_name(self._id), self._id_column)
        meta = conn_handler.execute_fetchall(sql, [self._id])
        cols = cols + dyncols

        # Create the dataframe and clean it up a bit
        df = pd.DataFrame((list(x) for x in meta), columns=cols)
        df.set_index('sample_id', inplace=True, drop=True)
        # Turn id cols to value cols
        for col, value in viewitems(self.str_cols_handlers):
            df[col].replace(value, inplace=True)
        df.rename(columns=self.translate_cols_dict, inplace=True)

        return df
Ejemplo n.º 4
0
    def status(self):
        """The status of the prep template

        Returns
        -------
        str
            The status of the prep template

        Notes
        -----
        The status of a prep template is inferred by the status of the
        processed data generated from this prep template. If no processed
        data has been generated with this prep template; then the status
        is 'sandbox'.
        """
        conn_handler = SQLConnectionHandler()
        sql = """SELECT processed_data_status
                FROM qiita.processed_data_status pds
                  JOIN qiita.processed_data pd
                    USING (processed_data_status_id)
                  JOIN qiita.preprocessed_processed_data ppd_pd
                    USING (processed_data_id)
                  JOIN qiita.prep_template_preprocessed_data pt_ppd
                    USING (preprocessed_data_id)
                WHERE pt_ppd.prep_template_id=%s"""
        pd_statuses = conn_handler.execute_fetchall(sql, (self._id,))

        return infer_status(pd_statuses)
Ejemplo n.º 5
0
    def __call__(self, searchstr, user):
        """Runs a Study query and returns matching studies and samples

        Parameters
        ----------
        searchstr : str
            Search string to use
        user : User object
            User making the search. Needed for permissions checks.

        Returns
        -------
        dict
            Found samples in format
            {study_id: [[samp_id1, meta1, meta2, ...],
                        [samp_id2, meta1, meta2, ...], ...}
        list
            metadata column names searched for

        Notes
        -----
        Metadata information for each sample is in the same order as the
        metadata columns list returned

        Metadata column names and string searches are case-sensitive
        """
        study_sql, sample_sql, meta_headers = \
            self._parse_study_search_string(searchstr, True)
        conn_handler = SQLConnectionHandler()
        # get all studies containing the metadata headers requested
        study_ids = {x[0] for x in conn_handler.execute_fetchall(study_sql)}
        # strip to only studies user has access to
        if user.level not in {'admin', 'dev', 'superuser'}:
            study_ids = study_ids.intersection(Study.get_by_status('public') |
                                               user.user_studies |
                                               user.shared_studies)

        results = {}
        # run search on each study to get out the matching samples
        for sid in study_ids:
            study_res = conn_handler.execute_fetchall(sample_sql.format(sid))
            if study_res:
                # only add study to results if actually has samples in results
                results[sid] = study_res
        self.results = results
        self.meta_headers = meta_headers
        return results, meta_headers
Ejemplo n.º 6
0
    def update(self, md_template):
        r"""Update values in the template

        Parameters
        ----------
        md_template : DataFrame
            The metadata template file contents indexed by samples Ids

        Raises
        ------
        QiitaDBError
            If md_template and db do not have the same sample ids
            If md_template and db do not have the same column headers
            If self.can_be_updated is not True
        """
        conn_handler = SQLConnectionHandler()

        # Clean and validate the metadata template given
        new_map = self._clean_validate_template(md_template, self.study_id,
                                                self.columns_restrictions)
        # Retrieving current metadata
        current_map = self._transform_to_dict(conn_handler.execute_fetchall(
            "SELECT * FROM qiita.{0}".format(self._table_name(self.id))))
        current_map = pd.DataFrame.from_dict(current_map, orient='index')

        # simple validations of sample ids and column names
        samples_diff = set(new_map.index).difference(current_map.index)
        if samples_diff:
            raise QiitaDBError('The new template differs from what is stored '
                               'in database by these samples names: %s'
                               % ', '.join(samples_diff))
        columns_diff = set(new_map.columns).difference(current_map.columns)
        if columns_diff:
            raise QiitaDBError('The new template differs from what is stored '
                               'in database by these columns names: %s'
                               % ', '.join(columns_diff))

        # here we are comparing two dataframes following:
        # http://stackoverflow.com/a/17095620/4228285
        current_map.sort(axis=0, inplace=True)
        current_map.sort(axis=1, inplace=True)
        new_map.sort(axis=0, inplace=True)
        new_map.sort(axis=1, inplace=True)
        map_diff = (current_map != new_map).stack()
        map_diff = map_diff[map_diff]
        map_diff.index.names = ['id', 'column']
        changed_cols = map_diff.index.get_level_values('column').unique()

        if not self.can_be_updated(columns=set(changed_cols)):
            raise QiitaDBError('The new template is modifying fields that '
                               'cannot be modified. Try removing the target '
                               'gene fields or deleting the processed data. '
                               'You are trying to modify: %s'
                               % ', '.join(changed_cols))

        for col in changed_cols:
            self.update_category(col, new_map[col].to_dict())

        self.generate_files()
Ejemplo n.º 7
0
    def __call__(self, searchstr, user):
        """Runs a Study query and returns matching studies and samples

        Parameters
        ----------
        searchstr : str
            Search string to use
        user : User object
            User making the search. Needed for permissions checks.

        Returns
        -------
        dict
            Found samples in format
            {study_id: [[samp_id1, meta1, meta2, ...],
                        [samp_id2, meta1, meta2, ...], ...}
        list
            metadata column names searched for

        Notes
        -----
        Metadata information for each sample is in the same order as the
        metadata columns list returned

        Metadata column names and string searches are case-sensitive
        """
        study_sql, sample_sql, meta_headers = \
            self._parse_study_search_string(searchstr, True)
        conn_handler = SQLConnectionHandler()
        # get all studies containing the metadata headers requested
        study_ids = {x[0] for x in conn_handler.execute_fetchall(study_sql)}
        # strip to only studies user has access to
        if user.level not in {'admin', 'dev', 'superuser'}:
            study_ids = study_ids.intersection(
                Study.get_by_status('public') + user.user_studies +
                user.shared_studies)

        results = {}
        # run search on each study to get out the matching samples
        for sid in study_ids:
            study_res = conn_handler.execute_fetchall(sample_sql.format(sid))
            if study_res:
                # only add study to results if actually has samples in results
                results[sid] = study_res
        return results, meta_headers
Ejemplo n.º 8
0
    def to_file(self, fp, samples=None):
        r"""Writes the MetadataTemplate to the file `fp` in tab-delimited
        format

        Parameters
        ----------
        fp : str
            Path to the output file
        samples : set, optional
            If supplied, only the specified samples will be written to the
            file
        """
        conn_handler = SQLConnectionHandler()
        metadata_map = self._transform_to_dict(conn_handler.execute_fetchall(
            "SELECT * FROM qiita.{0} WHERE {1}=%s".format(self._table,
                                                          self._id_column),
            (self.id,)))
        dyn_vals = self._transform_to_dict(conn_handler.execute_fetchall(
            "SELECT * FROM qiita.{0}".format(self._table_name(self.id))))

        for k in metadata_map:
            for key, value in viewitems(self.translate_cols_dict):
                id_ = metadata_map[k][key]
                metadata_map[k][value] = self.str_cols_handlers[key][id_]
                del metadata_map[k][key]
            metadata_map[k].update(dyn_vals[k])
            metadata_map[k].pop('study_id', None)

        # Remove samples that are not in the samples list, if it was supplied
        if samples is not None:
            for sid, d in metadata_map.items():
                if sid not in samples:
                    metadata_map.pop(sid)

        # Write remaining samples to file
        headers = sorted(list(metadata_map.values())[0].keys())
        with open(fp, 'w') as f:
            # First write the headers
            f.write("sample_name\t%s\n" % '\t'.join(headers))
            # Write the values for each sample id
            for sid, d in sorted(metadata_map.items()):
                values = [str(d[h]) for h in headers]
                values.insert(0, sid)
                f.write("%s\n" % '\t'.join(values))
Ejemplo n.º 9
0
    def metadata_headers():
        """Returns metadata headers available

        Returns
        -------
        list
            Alphabetical list of all metadata headers available
        """
        conn_handler = SQLConnectionHandler()
        return [x[0] for x in
                conn_handler.execute_fetchall(
                "SELECT DISTINCT column_name FROM qiita.study_sample_columns "
                "ORDER BY column_name")]
Ejemplo n.º 10
0
    def metadata_headers():
        """Returns metadata headers available

        Returns
        -------
        list
            Alphabetical list of all metadata headers available
        """
        conn_handler = SQLConnectionHandler()
        return [x[0] for x in
                conn_handler.execute_fetchall(
                "SELECT DISTINCT column_name FROM qiita.study_sample_columns "
                "UNION SELECT column_name FROM information_schema.columns "
                "WHERE table_name = 'required_sample_info' "
                "ORDER BY column_name")]
Ejemplo n.º 11
0
    def to_dataframe(self):
        """Returns the metadata template as a dataframe

        Returns
        -------
        pandas DataFrame
            The metadata in the template,indexed on sample id
        """
        conn_handler = SQLConnectionHandler()
        cols = sorted(get_table_cols(self._table_name(self._id)))
        # Get all metadata for the template
        sql = "SELECT {0} FROM qiita.{1}".format(", ".join(cols),
                                                 self._table_name(self.id))
        meta = conn_handler.execute_fetchall(sql, (self._id,))

        # Create the dataframe and clean it up a bit
        df = pd.DataFrame((list(x) for x in meta), columns=cols)
        df.set_index('sample_id', inplace=True, drop=True)

        return df
Ejemplo n.º 12
0
    def qiime_map_fp(self):
        """The QIIME mapping filepath attached to the prep template

        Returns
        -------
        str
            The filepath of the QIIME mapping file
        """
        conn_handler = SQLConnectionHandler()

        sql = """SELECT filepath_id, filepath
                 FROM qiita.filepath
                    JOIN qiita.{0} USING (filepath_id)
                    JOIN qiita.filepath_type USING (filepath_type_id)
                 WHERE {1} = %s AND filepath_type = 'qiime_map'
                 ORDER BY filepath_id DESC""".format(self._filepath_table,
                                                     self._id_column)
        fn = conn_handler.execute_fetchall(sql, (self._id,))[0][1]
        base_dir = get_mountpoint('templates')[0][1]
        return join(base_dir, fn)
Ejemplo n.º 13
0
    def update(self, md_template):
        r"""Update values in the sample template

        Parameters
        ----------
        md_template : DataFrame
            The metadata template file contents indexed by samples Ids

        Raises
        ------
        QiitaDBError
            If md_template and db do not have the same sample ids
            If md_template and db do not have the same column headers
        """
        conn_handler = SQLConnectionHandler()

        # Clean and validate the metadata template given
        new_map = self._clean_validate_template(md_template, self.id,
                                                conn_handler)
        # Retrieving current metadata
        current_map = self._transform_to_dict(conn_handler.execute_fetchall(
            "SELECT * FROM qiita.{0} WHERE {1}=%s".format(self._table,
                                                          self._id_column),
            (self.id,)))
        dyn_vals = self._transform_to_dict(conn_handler.execute_fetchall(
            "SELECT * FROM qiita.{0}".format(self._table_name(self.id))))

        for k in current_map:
            current_map[k].update(dyn_vals[k])
            current_map[k].pop('study_id', None)

        # converting sql results to dataframe
        current_map = pd.DataFrame.from_dict(current_map, orient='index')

        # simple validations of sample ids and column names
        samples_diff = set(
            new_map.index.tolist()) - set(current_map.index.tolist())
        if samples_diff:
            raise QiitaDBError('The new sample template differs from what is '
                               'stored in database by these samples names: %s'
                               % ', '.join(samples_diff))
        columns_diff = set(new_map.columns) - set(current_map.columns)
        if columns_diff:
            raise QiitaDBError('The new sample template differs from what is '
                               'stored in database by these columns names: %s'
                               % ', '.join(columns_diff))

        # here we are comparing two dataframes following:
        # http://stackoverflow.com/a/17095620/4228285
        current_map.sort(axis=0, inplace=True)
        current_map.sort(axis=1, inplace=True)
        new_map.sort(axis=0, inplace=True)
        new_map.sort(axis=1, inplace=True)
        map_diff = (current_map != new_map).stack()
        map_diff = map_diff[map_diff]
        map_diff.index.names = ['id', 'column']
        changed_cols = map_diff.index.get_level_values('column').unique()

        for col in changed_cols:
            self.update_category(col, new_map[col].to_dict())

        self.generate_files()
Ejemplo n.º 14
0
    def create_qiime_mapping_file(self, prep_template_fp):
        """This creates the QIIME mapping file and links it in the db.

        Parameters
        ----------
        prep_template_fp : str
            The prep template filepath that should be concatenated to the
            sample template go used to generate a new  QIIME mapping file

        Returns
        -------
        filepath : str
            The filepath of the created QIIME mapping file

        Raises
        ------
        ValueError
            If the prep template is not a subset of the sample template
        """
        rename_cols = {
            'barcode': 'BarcodeSequence',
            'barcodesequence': 'BarcodeSequence',
            'primer': 'LinkerPrimerSequence',
            'linkerprimersequence': 'LinkerPrimerSequence',
            'description': 'Description',
        }

        # getting the latest sample template
        conn_handler = SQLConnectionHandler()
        sql = """SELECT filepath_id, filepath
                 FROM qiita.filepath
                    JOIN qiita.sample_template_filepath
                    USING (filepath_id)
                 WHERE study_id=%s
                 ORDER BY filepath_id DESC"""
        sample_template_fname = conn_handler.execute_fetchall(
            sql, (self.study_id,))[0][1]
        _, fp = get_mountpoint('templates')[0]
        sample_template_fp = join(fp, sample_template_fname)

        # reading files via pandas
        st = load_template_to_dataframe(sample_template_fp)
        pt = load_template_to_dataframe(prep_template_fp)
        st_sample_names = set(st.index)
        pt_sample_names = set(pt.index)

        if not pt_sample_names.issubset(st_sample_names):
            raise ValueError(
                "Prep template is not a sub set of the sample template, files:"
                "%s %s - samples: %s" % (sample_template_fp, prep_template_fp,
                                         str(pt_sample_names-st_sample_names)))

        mapping = pt.join(st, lsuffix="_prep")
        mapping.rename(columns=rename_cols, inplace=True)

        # Gets the orginal mapping columns and readjust the order to comply
        # with QIIME requirements
        cols = mapping.columns.values.tolist()
        cols.remove('BarcodeSequence')
        cols.remove('LinkerPrimerSequence')
        cols.remove('Description')
        new_cols = ['BarcodeSequence', 'LinkerPrimerSequence']
        new_cols.extend(cols)
        new_cols.append('Description')
        mapping = mapping[new_cols]

        # figuring out the filepath for the QIIME map file
        _id, fp = get_mountpoint('templates')[0]
        filepath = join(fp, '%d_prep_%d_qiime_%s.txt' % (self.study_id,
                        self.id, strftime("%Y%m%d-%H%M%S")))

        # Save the mapping file
        mapping.to_csv(filepath, index_label='#SampleID', na_rep='unknown',
                       sep='\t')

        # adding the fp to the object
        self.add_filepath(filepath)

        return filepath
Ejemplo n.º 15
0
class TestConnHandler(TestCase):
    def test_create_queue(self):
        self.conn_handler.create_queue("toy_queue")
        self.assertEqual(self.conn_handler.list_queues(), ["toy_queue"])

    def test_run_queue(self):
        self.conn_handler.create_queue("toy_queue")
        self.conn_handler.add_to_queue(
            "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password,"
            "phone) VALUES (%s, %s, %s, %s)",
            ['*****@*****.**', 'Toy', 'pass', '111-111-11112'])
        self.conn_handler.add_to_queue(
            "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, "
            "phone = '222-222-2221' WHERE email = %s",
            ['*****@*****.**'])
        obs = self.conn_handler.execute_queue("toy_queue")
        self.assertEqual(obs, [])
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email = %s",
            ['*****@*****.**'])
        exp = [['*****@*****.**', 1, 'pass', 'Toy', None, None, '222-222-2221',
                None, None, None]]
        self.assertEqual(obs, exp)

    def test_run_queue_many(self):
        sql = ("INSERT INTO qiita.qiita_user (email, name, password,"
               "phone) VALUES (%s, %s, %s, %s)")
        sql_args = [
            ('*****@*****.**', 'p1', 'pass1', '111-111'),
            ('*****@*****.**', 'p2', 'pass2', '111-222')
            ]
        self.conn_handler.create_queue("toy_queue")
        self.conn_handler.add_to_queue(
            "toy_queue", sql, sql_args, many=True)
        self.conn_handler.execute_queue('toy_queue')

        # make sure both users added
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email = %s",
            ['*****@*****.**'])
        exp = [['*****@*****.**', 5, 'pass1', 'p1', None, None, '111-111',
                None, None, None]]
        self.assertEqual(obs, exp)
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email = %s",
            ['*****@*****.**'])
        exp = [['*****@*****.**', 5, 'pass2', 'p2', None, None, '111-222',
                None, None, None]]
        self.assertEqual(obs, exp)

    def test_run_queue_last_return(self):
        self.conn_handler.create_queue("toy_queue")
        self.conn_handler.add_to_queue(
            "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password,"
            "phone) VALUES (%s, %s, %s, %s)",
            ['*****@*****.**', 'Toy', 'pass', '111-111-11112'])
        self.conn_handler.add_to_queue(
            "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, "
            "phone = '222-222-2221' WHERE email = %s RETURNING phone",
            ['*****@*****.**'])
        obs = self.conn_handler.execute_queue("toy_queue")
        self.assertEqual(obs, ['222-222-2221'])

    def test_run_queue_placeholders(self):
        self.conn_handler.create_queue("toy_queue")
        self.conn_handler.add_to_queue(
            "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password,"
            "phone) VALUES (%s, %s, %s, %s) RETURNING email, password",
            ['*****@*****.**', 'Toy', 'pass', '111-111-11112'])
        self.conn_handler.add_to_queue(
            "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, "
            "phone = '222-222-2221' WHERE email = %s AND password = %s",
            ['{0}', '{1}'])
        obs = self.conn_handler.execute_queue("toy_queue")
        self.assertEqual(obs, [])
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email = %s",
            ['*****@*****.**'])
        exp = [['*****@*****.**', 1, 'pass', 'Toy', None, None, '222-222-2221',
                None, None, None]]
        self.assertEqual(obs, exp)

    def test_queue_fail(self):
        """Fail if no results data exists for substitution"""
        self.conn_handler = SQLConnectionHandler()
        self.conn_handler.create_queue("toy_queue")
        self.conn_handler.add_to_queue(
            "toy_queue",
            "INSERT INTO qiita.qiita_user (email, name, password) VALUES "
            "(%s, %s, %s)", ['*****@*****.**', 'Toy', 'pass'])
        self.conn_handler.add_to_queue(
            "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1 "
            "WHERE email = %s and password = %s", [{0}, {1}])

        with self.assertRaises(QiitaDBExecutionError):
            self.conn_handler.execute_queue("toy_queue")

        # make sure roll back correctly
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email = %s",
            ['*****@*****.**'])
        self.assertEqual(obs, [])

    def test_huge_queue(self):
        self.conn_handler = SQLConnectionHandler()
        self.conn_handler.create_queue("toy_queue")
        # add tons of inserts to queue
        for x in range(120):
            self.conn_handler.add_to_queue(
                "toy_queue",
                "INSERT INTO qiita.qiita_user (email, name, password) VALUES "
                "(%s, %s, %s)", ['*****@*****.**' % x, 'Toy', 'pass'])
        # add failing insert as final item in queue
        self.conn_handler.add_to_queue(
            "toy_queue",
            "INSERT INTO qiita.qiita_BADTABLE (email, name, password) VALUES "
            "(%s, %s, %s)", ['*****@*****.**' % x, 'Toy', 'pass'])
        self.conn_handler.add_to_queue(
            "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1 "
            "WHERE email = %s and password = %s", [{0}, {1}])
        with self.assertRaises(QiitaDBExecutionError):
            self.conn_handler.execute_queue("toy_queue")

        # make sure roll back correctly
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email LIKE "
            "'%[email protected]%'")
        self.assertEqual(obs, [])

    def test_get_temp_queue(self):
        my_queue = self.conn_handler.get_temp_queue()
        self.assertTrue(my_queue in self.conn_handler.list_queues())

        self.conn_handler.add_to_queue(my_queue,
                                       "SELECT * from qiita.qiita_user")
        self.conn_handler.add_to_queue(my_queue,
                                       "SELECT * from qiita.user_level")
        self.conn_handler.execute_queue(my_queue)

        self.assertTrue(my_queue not in self.conn_handler.list_queues())
Ejemplo n.º 16
0
# This patch recreates all the QIIME mapping files to avoid lower/upper case
# problems. See https://github.com/biocore/qiita/issues/799
#
# heavily based on 7.py

from os.path import basename

from skbio.util import flatten

from qiita_db.sql_connection import SQLConnectionHandler
from qiita_db.metadata_template import PrepTemplate

conn_handler = SQLConnectionHandler()

sql = "SELECT prep_template_id FROM qiita.prep_template"
all_ids = conn_handler.execute_fetchall(sql)

q_name = 'unlink-bad-mapping-files'
conn_handler.create_queue(q_name)

# remove all the bad mapping files
for prep_template_id in all_ids:

    prep_template_id = prep_template_id[0]
    pt = PrepTemplate(prep_template_id)
    fps = pt.get_filepaths()

    # get the QIIME mapping file, note that the way to figure out what is and
    # what's not a qiime mapping file is to check for the existance of the
    # word qiime in the basename of the file path, hacky but that's the way
    # it is being done in qiita_pet/uimodules/raw_data_tab.py
Ejemplo n.º 17
0
Archivo: 15.py Proyecto: RNAer/qiita
# Feb 11, 2015
# This changes all analysis files to be relative path instead of absolute

from os.path import basename, dirname

from qiita_db.util import get_mountpoint
from qiita_db.sql_connection import SQLConnectionHandler

conn_handler = SQLConnectionHandler()

filepaths = conn_handler.execute_fetchall(
    'SELECT f.* from qiita.filepath f JOIN qiita.analysis_filepath afp ON '
    'f.filepath_id = afp.filepath_id')

# retrieve relative filepaths as dictionary for matching
mountpoints = {m[1].rstrip('/\\'): m[0] for m in get_mountpoint(
    'analysis', conn_handler=conn_handler, retrieve_all=True)}

for filepath in filepaths:
    filename = basename(filepath['filepath'])
    # find the ID of the analysis filepath used
    mp_id = mountpoints[dirname(filepath['filepath']).rstrip('/\\')]
    conn_handler.execute(
        'UPDATE qiita.filepath SET filepath = %s, data_directory_id = %s WHERE'
        ' filepath_id = %s',
        [filename, mp_id, filepath['filepath_id']])
Ejemplo n.º 18
0
class TestConnHandler(TestCase):
    def test_create_queue(self):
        self.conn_handler.create_queue("toy_queue")
        self.assertEqual(self.conn_handler.list_queues(), ["toy_queue"])

    def test_close(self):
        self.assertEqual(self.conn_handler._user_conn.closed, 0)
        self.conn_handler.close()
        self.assertNotEqual(self.conn_handler._user_conn.closed, 0)

    def test_run_queue(self):
        self.conn_handler.create_queue("toy_queue")
        self.conn_handler.add_to_queue(
            "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password,"
            "phone) VALUES (%s, %s, %s, %s)",
            ['*****@*****.**', 'Toy', 'pass', '111-111-11112'])
        self.conn_handler.add_to_queue(
            "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, "
            "phone = '222-222-2221' WHERE email = %s",
            ['*****@*****.**'])
        obs = self.conn_handler.execute_queue("toy_queue")
        self.assertEqual(obs, [])
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email = %s",
            ['*****@*****.**'])
        exp = [['*****@*****.**', 1, 'pass', 'Toy', None, None, '222-222-2221',
                None, None, None]]
        self.assertEqual(obs, exp)

    def test_run_queue_many(self):
        sql = ("INSERT INTO qiita.qiita_user (email, name, password,"
               "phone) VALUES (%s, %s, %s, %s)")
        sql_args = [
            ('*****@*****.**', 'p1', 'pass1', '111-111'),
            ('*****@*****.**', 'p2', 'pass2', '111-222')
            ]
        self.conn_handler.create_queue("toy_queue")
        self.conn_handler.add_to_queue(
            "toy_queue", sql, sql_args, many=True)
        self.conn_handler.execute_queue('toy_queue')

        # make sure both users added
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email = %s",
            ['*****@*****.**'])
        exp = [['*****@*****.**', 5, 'pass1', 'p1', None, None, '111-111',
                None, None, None]]
        self.assertEqual(obs, exp)
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email = %s",
            ['*****@*****.**'])
        exp = [['*****@*****.**', 5, 'pass2', 'p2', None, None, '111-222',
                None, None, None]]
        self.assertEqual(obs, exp)

    def test_run_queue_last_return(self):
        self.conn_handler.create_queue("toy_queue")
        self.conn_handler.add_to_queue(
            "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password,"
            "phone) VALUES (%s, %s, %s, %s)",
            ['*****@*****.**', 'Toy', 'pass', '111-111-11112'])
        self.conn_handler.add_to_queue(
            "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, "
            "phone = '222-222-2221' WHERE email = %s RETURNING phone",
            ['*****@*****.**'])
        obs = self.conn_handler.execute_queue("toy_queue")
        self.assertEqual(obs, ['222-222-2221'])

    def test_run_queue_placeholders(self):
        self.conn_handler.create_queue("toy_queue")
        self.conn_handler.add_to_queue(
            "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password,"
            "phone) VALUES (%s, %s, %s, %s) RETURNING email, password",
            ['*****@*****.**', 'Toy', 'pass', '111-111-11112'])
        self.conn_handler.add_to_queue(
            "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, "
            "phone = '222-222-2221' WHERE email = %s AND password = %s",
            ['{0}', '{1}'])
        obs = self.conn_handler.execute_queue("toy_queue")
        self.assertEqual(obs, [])
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email = %s",
            ['*****@*****.**'])
        exp = [['*****@*****.**', 1, 'pass', 'Toy', None, None, '222-222-2221',
                None, None, None]]
        self.assertEqual(obs, exp)

    def test_queue_fail(self):
        """Fail if no results data exists for substitution"""
        self.conn_handler = SQLConnectionHandler()
        self.conn_handler.create_queue("toy_queue")
        self.conn_handler.add_to_queue(
            "toy_queue",
            "INSERT INTO qiita.qiita_user (email, name, password) VALUES "
            "(%s, %s, %s)", ['*****@*****.**', 'Toy', 'pass'])
        self.conn_handler.add_to_queue(
            "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1 "
            "WHERE email = %s and password = %s", [{0}, {1}])

        with self.assertRaises(QiitaDBExecutionError):
            self.conn_handler.execute_queue("toy_queue")

        # make sure roll back correctly
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email = %s",
            ['*****@*****.**'])
        self.assertEqual(obs, [])

    def test_huge_queue(self):
        self.conn_handler = SQLConnectionHandler()
        self.conn_handler.create_queue("toy_queue")
        # add tons of inserts to queue
        for x in range(120):
            self.conn_handler.add_to_queue(
                "toy_queue",
                "INSERT INTO qiita.qiita_user (email, name, password) VALUES "
                "(%s, %s, %s)", ['*****@*****.**' % x, 'Toy', 'pass'])
        # add failing insert as final item in queue
        self.conn_handler.add_to_queue(
            "toy_queue",
            "INSERT INTO qiita.qiita_BADTABLE (email, name, password) VALUES "
            "(%s, %s, %s)", ['*****@*****.**' % x, 'Toy', 'pass'])
        self.conn_handler.add_to_queue(
            "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1 "
            "WHERE email = %s and password = %s", [{0}, {1}])
        with self.assertRaises(QiitaDBExecutionError):
            self.conn_handler.execute_queue("toy_queue")

        # make sure roll back correctly
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.qiita_user WHERE email LIKE "
            "'%[email protected]%'")
        self.assertEqual(obs, [])

    def test_get_temp_queue(self):
        my_queue = self.conn_handler.get_temp_queue()
        self.assertTrue(my_queue in self.conn_handler.list_queues())

        self.conn_handler.add_to_queue(my_queue,
                                       "SELECT * from qiita.qiita_user")
        self.conn_handler.add_to_queue(my_queue,
                                       "SELECT * from qiita.user_level")
        self.conn_handler.execute_queue(my_queue)

        self.assertTrue(my_queue not in self.conn_handler.list_queues())
Ejemplo n.º 19
0
from qiita_db.sql_connection import SQLConnectionHandler
from qiita_db.data import RawData
from qiita_db.util import move_filepaths_to_upload_folder

conn_handler = SQLConnectionHandler()
queue = "PATCH_25"
conn_handler.create_queue(queue)

# the system may contain raw data with no prep template associated to it.
# Retrieve all those raw data ids
sql = """SELECT raw_data_id
         FROM qiita.raw_data
         WHERE raw_data_id NOT IN (
            SELECT DISTINCT raw_data_id FROM qiita.prep_template);"""
rd_ids = [x[0] for x in conn_handler.execute_fetchall(sql)]

# We will delete those RawData. However, if they have files attached, we should
# move them to the uploads folder of the study
sql_detach = """DELETE FROM qiita.study_raw_data
                WHERE raw_data_id = %s AND study_id = %s"""
sql_unlink = "DELETE FROM qiita.raw_filepath WHERE raw_data_id = %s"
sql_delete = "DELETE FROM qiita.raw_data WHERE raw_data_id = %s"
sql_studies = """SELECT study_id FROM qiita.study_raw_data
                 WHERE raw_data_id = %s"""
move_files = []
for rd_id in rd_ids:
    rd = RawData(rd_id)
    filepaths = rd.get_filepaths()
    studies = [s[0] for s in conn_handler.execute_fetchall(sql_studies,
                                                           (rd_id,))]
Ejemplo n.º 20
0
    def create(cls, md_template, raw_data, study, data_type,
               investigation_type=None):
        r"""Creates the metadata template in the database

        Parameters
        ----------
        md_template : DataFrame
            The metadata template file contents indexed by samples Ids
        raw_data : RawData
            The raw_data to which the prep template belongs to.
        study : Study
            The study to which the prep template belongs to.
        data_type : str or int
            The data_type of the prep template
        investigation_type : str, optional
            The investigation type, if relevant

        Returns
        -------
        A new instance of `cls` to access to the PrepTemplate stored in the DB

        Raises
        ------
        QiitaDBColumnError
            If the investigation_type is not valid
            If a required column is missing in md_template
        """
        # If the investigation_type is supplied, make sure it is one of
        # the recognized investigation types
        if investigation_type is not None:
            cls.validate_investigation_type(investigation_type)

        # Get a connection handler
        conn_handler = SQLConnectionHandler()
        queue_name = "CREATE_PREP_TEMPLATE_%d" % raw_data.id
        conn_handler.create_queue(queue_name)

        # Check if the data_type is the id or the string
        if isinstance(data_type, (int, long)):
            data_type_id = data_type
            data_type_str = convert_from_id(data_type, "data_type",
                                            conn_handler)
        else:
            data_type_id = convert_to_id(data_type, "data_type", conn_handler)
            data_type_str = data_type

        md_template = cls._clean_validate_template(md_template, study.id,
                                                   data_type_str, conn_handler)

        # Insert the metadata template
        # We need the prep_id for multiple calls below, which currently is not
        # supported by the queue system. Thus, executing this outside the queue
        prep_id = conn_handler.execute_fetchone(
            "INSERT INTO qiita.prep_template (data_type_id, raw_data_id, "
            "investigation_type) VALUES (%s, %s, %s) RETURNING "
            "prep_template_id", (data_type_id, raw_data.id,
                                 investigation_type))[0]

        cls._add_common_creation_steps_to_queue(md_template, prep_id,
                                                conn_handler, queue_name)

        try:
            conn_handler.execute_queue(queue_name)
        except Exception:
            # Clean up row from qiita.prep_template
            conn_handler.execute(
                "DELETE FROM qiita.prep_template where "
                "{0} = %s".format(cls._id_column), (prep_id,))

            # Check if sample IDs present here but not in sample template
            sql = ("SELECT sample_id from qiita.required_sample_info WHERE "
                   "study_id = %s")
            # Get list of study sample IDs, prep template study IDs,
            # and their intersection
            prep_samples = set(md_template.index.values)
            unknown_samples = prep_samples.difference(
                s[0] for s in conn_handler.execute_fetchall(sql, [study.id]))
            if unknown_samples:
                raise QiitaDBExecutionError(
                    'Samples found in prep template but not sample template: '
                    '%s' % ', '.join(unknown_samples))

            # some other error we haven't seen before so raise it
            raise

        pt = cls(prep_id)
        pt.generate_files()

        return pt
Ejemplo n.º 21
0
# 23 Nov, 2014
# This patch creates all the qiime mapping files for the existing
# prep templates

from qiita_db.util import get_mountpoint
from qiita_db.sql_connection import SQLConnectionHandler
from qiita_db.metadata_template import PrepTemplate

conn_handler = SQLConnectionHandler()

_id, fp_base = get_mountpoint('templates')[0]

for prep_template_id in conn_handler.execute_fetchall(
        "SELECT prep_template_id FROM qiita.prep_template"):
    prep_template_id = prep_template_id[0]
    pt = PrepTemplate(prep_template_id)
    study_id = pt.study_id

    for _, fpt in pt.get_filepaths():
        pt.create_qiime_mapping_file(fpt)
Ejemplo n.º 22
0
    def create_qiime_mapping_file(self):
        """This creates the QIIME mapping file and links it in the db.

        Returns
        -------
        filepath : str
            The filepath of the created QIIME mapping file

        Raises
        ------
        ValueError
            If the prep template is not a subset of the sample template
        QiitaDBWarning
            If the QIIME-required columns are not present in the template

        Notes
        -----
        We cannot ensure that the QIIME-required columns are present in the
        metadata map. However, we have to generate a QIIME-compliant mapping
        file. Since the user may need a QIIME mapping file, but not these
        QIIME-required columns, we are going to create them and
        populate them with the value XXQIITAXX.
        """
        rename_cols = {
            'barcode': 'BarcodeSequence',
            'primer': 'LinkerPrimerSequence',
            'description': 'Description',
        }

        if 'reverselinkerprimer' in self.categories():
            rename_cols['reverselinkerprimer'] = 'ReverseLinkerPrimer'
            new_cols = ['BarcodeSequence', 'LinkerPrimerSequence',
                        'ReverseLinkerPrimer']
        else:
            new_cols = ['BarcodeSequence', 'LinkerPrimerSequence']

        # getting the latest sample template
        conn_handler = SQLConnectionHandler()
        sql = """SELECT filepath_id, filepath
                 FROM qiita.filepath
                    JOIN qiita.sample_template_filepath
                    USING (filepath_id)
                 WHERE study_id=%s
                 ORDER BY filepath_id DESC"""
        sample_template_fname = conn_handler.execute_fetchall(
            sql, (self.study_id,))[0][1]
        _, fp = get_mountpoint('templates')[0]
        sample_template_fp = join(fp, sample_template_fname)

        # reading files via pandas
        st = load_template_to_dataframe(sample_template_fp)
        pt = self.to_dataframe()

        st_sample_names = set(st.index)
        pt_sample_names = set(pt.index)

        if not pt_sample_names.issubset(st_sample_names):
            raise ValueError(
                "Prep template is not a sub set of the sample template, files"
                "%s - samples: %s"
                % (sample_template_fp,
                   ', '.join(pt_sample_names-st_sample_names)))

        mapping = pt.join(st, lsuffix="_prep")
        mapping.rename(columns=rename_cols, inplace=True)

        # Pre-populate the QIIME-required columns with the value XXQIITAXX
        index = mapping.index
        placeholder = ['XXQIITAXX'] * len(index)
        missing = []
        for val in viewvalues(rename_cols):
            if val not in mapping:
                missing.append(val)
                mapping[val] = pd.Series(placeholder, index=index)

        if missing:
            warnings.warn(
                "Some columns required to generate a QIIME-compliant mapping "
                "file are not present in the template. A placeholder value "
                "(XXQIITAXX) has been used to populate these columns. Missing "
                "columns: %s" % ', '.join(missing),
                QiitaDBWarning)

        # Gets the orginal mapping columns and readjust the order to comply
        # with QIIME requirements
        cols = mapping.columns.values.tolist()
        cols.remove('BarcodeSequence')
        cols.remove('LinkerPrimerSequence')
        cols.remove('Description')
        new_cols.extend(cols)
        new_cols.append('Description')
        mapping = mapping[new_cols]

        # figuring out the filepath for the QIIME map file
        _id, fp = get_mountpoint('templates')[0]
        filepath = join(fp, '%d_prep_%d_qiime_%s.txt' % (self.study_id,
                        self.id, strftime("%Y%m%d-%H%M%S")))

        # Save the mapping file
        mapping.to_csv(filepath, index_label='#SampleID', na_rep='',
                       sep='\t')

        # adding the fp to the object
        self.add_filepath(
            filepath,
            fp_id=convert_to_id("qiime_map", "filepath_type"))

        return filepath
Ejemplo n.º 23
0
# Nov 22, 2014
# This patch is to create all the prep/sample template files and link them in
# the database so they are present for download

from os.path import join
from time import strftime

from qiita_db.util import get_mountpoint
from qiita_db.sql_connection import SQLConnectionHandler
from qiita_db.metadata_template import SampleTemplate, PrepTemplate

conn_handler = SQLConnectionHandler()

_id, fp_base = get_mountpoint('templates')[0]

for study_id in conn_handler.execute_fetchall(
        "SELECT study_id FROM qiita.study"):
    study_id = study_id[0]
    if SampleTemplate.exists(study_id):
        st = SampleTemplate(study_id)
        fp = join(fp_base, '%d_%s.txt' % (study_id, strftime("%Y%m%d-%H%M%S")))
        st.to_file(fp)
        st.add_filepath(fp)

for prep_template_id in conn_handler.execute_fetchall(
        "SELECT prep_template_id FROM qiita.prep_template"):
    prep_template_id = prep_template_id[0]
    pt = PrepTemplate(prep_template_id)
    study_id = pt.study_id

    fp = join(fp_base, '%d_prep_%d_%s.txt' % (pt.study_id, prep_template_id,
              strftime("%Y%m%d-%H%M%S")))
Ejemplo n.º 24
0
# Mar 27, 2015
# Need to re-generate the files, given that some headers have changed

from qiita_db.sql_connection import SQLConnectionHandler
from qiita_db.metadata_template import SampleTemplate, PrepTemplate

conn_handler = SQLConnectionHandler()

# Get all the sample templates
sql = """SELECT DISTINCT study_id from qiita.study_sample"""
study_ids = {s[0] for s in conn_handler.execute_fetchall(sql)}

for s_id in study_ids:
    SampleTemplate(s_id).generate_files()

# Get all the prep templates
sql = """SELECT prep_template_id from qiita.prep_template"""
prep_ids = {p[0] for p in conn_handler.execute_fetchall(sql)}
for prep_id in prep_ids:
    PrepTemplate(prep_id).generate_files()