Ejemplo n.º 1
0
    def test_get_qiime_minimal_mapping_single_reverse_primer(self):
        conn_handler = SQLConnectionHandler()
        conn_handler
        sql = """INSERT INTO qiita.prep_columns
                        (prep_template_id, column_name, column_type)
                    VALUES (1, 'reverselinkerprimer', 'varchar');
                 ALTER TABLE qiita.prep_1
                    ADD COLUMN reverselinkerprimer varchar;
                 DELETE FROM qiita.prep_columns
                 WHERE prep_template_id = 1 AND column_name = 'run_prefix';
                 ALTER TABLE qiita.prep_1 DROP COLUMN run_prefix;
                 UPDATE qiita.prep_1 SET reverselinkerprimer = %s
                 """
        conn_handler.execute(sql, ('GTGCCAGCM',))
        prep_template = PrepTemplate(1)
        prep_template.generate_files()
        out_dir = mkdtemp()

        obs_fps = _get_qiime_minimal_mapping(prep_template, out_dir)
        exp_fps = [join(out_dir, 'prep_1_MMF.txt')]

        # Check that the returned list is as expected
        self.assertEqual(obs_fps, exp_fps)
        # Check that the file exists
        self.assertTrue(exists(exp_fps[0]))
        # Check the contents of the file
        with open(exp_fps[0], "U") as f:
            self.assertEqual(f.read(), EXP_PREP_RLP)
Ejemplo n.º 2
0
    def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_1(self):
        # Test that the run prefixes in the prep_template and the file names
        # actually match and raise an error if not
        conn_handler = SQLConnectionHandler()
        sql = ("""
            INSERT INTO qiita.filepath (filepath_id, filepath,
                filepath_type_id, checksum, checksum_algorithm_id,
                data_directory_id) VALUES (19, '1_new.sff', 17, 852952723, 1,
                5);
            INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id) VALUES
                (3, 19);
            INSERT INTO qiita.filepath (filepath_id, filepath,
                filepath_type_id, checksum, checksum_algorithm_id,
                data_directory_id) VALUES (20, '1_error.sff', 17, 852952723,
                1, 5);
            INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id) VALUES
                (3, 20);
            UPDATE qiita.prep_1 SET run_prefix='preprocess_test';
            UPDATE qiita.prep_1 SET run_prefix='new' WHERE
                sample_id = '1.SKB8.640193';
        """)
        conn_handler.execute(sql)

        raw_data = RawData(3)
        params = Preprocessed454Params(1)
        prep_template = PrepTemplate(1)

        with self.assertRaises(ValueError):
            _get_preprocess_fasta_cmd(raw_data, prep_template, params)
Ejemplo n.º 3
0
    def test_get_preprocess_fasta_cmd_sff_run_prefix(self):
        # Need to alter the run_prefix of one sample so we can test the
        # multiple values
        conn_handler = SQLConnectionHandler()
        sql = ("UPDATE qiita.prep_1 SET run_prefix='test1' WHERE "
               "sample_id = '1.SKM9.640192'")
        conn_handler.execute(sql)

        raw_data = RawData(3)
        params = Preprocessed454Params(1)
        prep_template = PrepTemplate(1)

        obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
            raw_data, prep_template, params)

        obs_cmds = obs_cmd.split('; ')
        # assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is
        # working we only need to test for the commands being ran and
        # that n is valid
        self.assertEqual(len(obs_cmds), 8)
        self.assertTrue(obs_cmds[0].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[1].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[2].startswith('split_libraries.py'))
        self.assertIn('-n 1', obs_cmds[2])
        self.assertTrue(obs_cmds[3].startswith('split_libraries.py'))
        self.assertIn('-n 800000', obs_cmds[3])
        self.assertTrue(obs_cmds[4].startswith('cat'))
        self.assertIn('split_library_log.txt', obs_cmds[4])
        self.assertTrue(obs_cmds[5].startswith('cat'))
        self.assertTrue('seqs.fna', obs_cmds[5])
        self.assertTrue(obs_cmds[6].startswith('cat'))
        self.assertIn('seqs_filtered.qual', obs_cmds[6])
Ejemplo n.º 4
0
    def delete(cls, id_):
        r"""Deletes the table from the database

        Parameters
        ----------
        id_ : obj
            The object identifier

        Raises
        ------
        QiitaDBExecutionError
            If the prep template already has a preprocessed data
        QiitaDBUnknownIDError
            If no prep template with id = id_ exists
        """
        table_name = cls._table_name(id_)
        conn_handler = SQLConnectionHandler()

        if not cls.exists(id_):
            raise QiitaDBUnknownIDError(id_, cls.__name__)

        preprocessed_data_exists = conn_handler.execute_fetchone(
            "SELECT EXISTS(SELECT * FROM qiita.prep_template_preprocessed_data"
            " WHERE prep_template_id=%s)", (id_,))[0]

        if preprocessed_data_exists:
            raise QiitaDBExecutionError("Cannot remove prep template %d "
                                        "because a preprocessed data has been"
                                        " already generated using it." % id_)

        # Delete the prep template filepaths
        conn_handler.execute(
            "DELETE FROM qiita.prep_template_filepath WHERE "
            "prep_template_id = %s", (id_, ))

        # Drop the prep_X table
        conn_handler.execute(
            "DROP TABLE qiita.{0}".format(table_name))

        # Remove the rows from common_prep_info
        conn_handler.execute(
            "DELETE FROM qiita.{0} where {1} = %s".format(cls._table,
                                                          cls._id_column),
            (id_,))

        # Remove the rows from prep_columns
        conn_handler.execute(
            "DELETE FROM qiita.{0} where {1} = %s".format(cls._column_table,
                                                          cls._id_column),
            (id_,))

        # Remove the row from prep_template
        conn_handler.execute(
            "DELETE FROM qiita.prep_template where "
            "{0} = %s".format(cls._id_column), (id_,))
Ejemplo n.º 5
0
 def raw_data(self, raw_data):
     conn_handler = SQLConnectionHandler()
     sql = """SELECT (
                 SELECT raw_data_id
                 FROM qiita.prep_template
                 WHERE prep_template_id=%s)
             IS NOT NULL"""
     exists = conn_handler.execute_fetchone(sql, (self.id,))[0]
     if exists:
         raise QiitaDBError(
             "Prep template %d already has a raw data associated"
             % self.id)
     sql = """UPDATE qiita.prep_template
              SET raw_data_id = %s
              WHERE prep_template_id = %s"""
     conn_handler.execute(sql, (raw_data.id, self.id))
Ejemplo n.º 6
0
    def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_2(self):
        # Should raise error
        conn_handler = SQLConnectionHandler()
        sql = ("""
            UPDATE qiita.prep_1 SET run_prefix='test1';
            UPDATE qiita.prep_1 SET run_prefix='test2' WHERE
                sample_id = '1.SKB2.640194';
            UPDATE qiita.prep_1 SET run_prefix='error' WHERE
                sample_id = '1.SKB8.640193';
        """)
        conn_handler.execute(sql)

        raw_data = RawData(3)
        params = Preprocessed454Params(1)
        prep_template = PrepTemplate(1)

        with self.assertRaises(ValueError):
            _get_preprocess_fasta_cmd(raw_data, prep_template, params)
Ejemplo n.º 7
0
    def test_get_preprocess_fasta_cmd_sff_run_prefix_match(self):
        # Test that the run prefixes in the prep_template and the file names
        # actually match and raise an error if not
        conn_handler = SQLConnectionHandler()
        sql = ("""
            INSERT INTO qiita.filepath (filepath_id, filepath,
                filepath_type_id, checksum, checksum_algorithm_id,
                data_directory_id) VALUES (19, '1_new.sff', 17, 852952723, 1,
                5);
            INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id) VALUES
                (3, 19);
            UPDATE qiita.prep_1 SET run_prefix='preprocess_test';
            UPDATE qiita.prep_1 SET run_prefix='new' WHERE
                sample_id = '1.SKB8.640193';
        """)
        conn_handler.execute(sql)

        raw_data = RawData(3)
        params = Preprocessed454Params(1)
        prep_template = PrepTemplate(1)

        obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
            raw_data, prep_template, params)

        obs_cmds = obs_cmd.split('; ')
        # assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is
        # working we only need to test for the commands being ran and
        # that n is valid
        self.assertEqual(len(obs_cmds), 9)
        self.assertTrue(obs_cmds[0].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[1].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[2].startswith('process_sff.py'))
        self.assertTrue(obs_cmds[3].startswith('split_libraries.py'))
        self.assertIn('-n 1', obs_cmds[3])
        self.assertTrue(obs_cmds[4].startswith('split_libraries.py'))
        self.assertIn('-n 800000', obs_cmds[4])
        self.assertTrue(obs_cmds[5].startswith('cat'))
        self.assertIn('split_library_log.txt', obs_cmds[5])
        self.assertTrue(obs_cmds[6].startswith('cat'))
        self.assertIn('seqs.fna', obs_cmds[6])
        self.assertEqual(len(obs_cmds[6].split(' ')), 5)
        self.assertTrue(obs_cmds[7].startswith('cat'))
        self.assertIn('seqs_filtered.qual', obs_cmds[7])
        self.assertEqual(len(obs_cmds[7].split(' ')), 5)
Ejemplo n.º 8
0
    def test_get_qiime_minimal_mapping_single_no_run_prefix(self):
        conn_handler = SQLConnectionHandler()
        sql = """DELETE FROM qiita.prep_columns
                 WHERE prep_template_id = 1 AND column_name = 'run_prefix';
                 ALTER TABLE qiita.prep_1 DROP COLUMN run_prefix"""
        conn_handler.execute(sql)
        prep_template = PrepTemplate(1)
        prep_template.generate_files()
        out_dir = mkdtemp()

        obs_fps = _get_qiime_minimal_mapping(prep_template, out_dir)
        exp_fps = [join(out_dir, 'prep_1_MMF.txt')]

        # Check that the returned list is as expected
        self.assertEqual(obs_fps, exp_fps)
        # Check that the file exists
        self.assertTrue(exists(exp_fps[0]))
        # Check the contents of the file
        with open(exp_fps[0], "U") as f:
            self.assertEqual(f.read(), EXP_PREP)
Ejemplo n.º 9
0
    def investigation_type(self, investigation_type):
        r"""Update the investigation type

        Parameters
        ----------
        investigation_type : str
            The investigation type to set, should be part of the ENA ontology

        Raises
        ------
        QiitaDBColumnError
            If the investigation type is not a valid ENA ontology
        """
        if investigation_type is not None:
            self.validate_investigation_type(investigation_type)

        conn_handler = SQLConnectionHandler()

        conn_handler.execute(
            "UPDATE qiita.prep_template SET investigation_type = %s "
            "WHERE {0} = %s".format(self._id_column),
            (investigation_type, self.id))
Ejemplo n.º 10
0
class TestHandlerBase(AsyncHTTPTestCase):
    database = False

    def get_app(self):
        BaseHandler.get_current_user = Mock(return_value="*****@*****.**")
        self.app = Application()
        return self.app

    def setUp(self):
        if self.database:
            self.conn_handler = SQLConnectionHandler()
            # Drop the schema
            self.conn_handler.execute("DROP SCHEMA qiita CASCADE")
            # Create the schema
            with open(LAYOUT_FP, 'U') as f:
                self.conn_handler.execute(f.read())
            # Initialize the database
            with open(INITIALIZE_FP, 'U') as f:
                self.conn_handler.execute(f.read())
            # Populate the database
            with open(POPULATE_FP, 'U') as f:
                self.conn_handler.execute(f.read())
        super(TestHandlerBase, self).setUp()

    def tearDown(self):
        if self.database:
            del self.conn_handler

    # helpers from http://www.peterbe.com/plog/tricks-asynchttpclient-tornado
    def get(self, url, data=None, headers=None, doseq=True):
        if data is not None:
            if isinstance(data, dict):
                data = urlencode(data, doseq=doseq)
            if '?' in url:
                url += '&%s' % data
            else:
                url += '?%s' % data
        return self._fetch(url, 'GET', headers=headers)

    def post(self, url, data, headers=None, doseq=True):
        if data is not None:
            if isinstance(data, dict):
                data = urlencode(data, doseq=doseq)
        return self._fetch(url, 'POST', data, headers)

    def _fetch(self, url, method, data=None, headers=None):
        self.http_client.fetch(self.get_url(url), self.stop, method=method,
                               body=data, headers=headers)
        return self.wait()
Ejemplo n.º 11
0
    def preprocessing_status(self, state):
        r"""Update the preprocessing status

        Parameters
        ----------
        state : str, {'not_preprocessed', 'preprocessing', 'success', 'failed'}
            The current status of preprocessing

        Raises
        ------
        ValueError
            If the state is not known.
        """
        if (state not in ('not_preprocessed', 'preprocessing', 'success') and
                not state.startswith('failed:')):
            raise ValueError('Unknown state: %s' % state)

        conn_handler = SQLConnectionHandler()

        conn_handler.execute(
            "UPDATE qiita.prep_template SET preprocessing_status = %s "
            "WHERE {0} = %s".format(self._id_column),
            (state, self.id))
Ejemplo n.º 12
0
    def add_filepath(self, filepath, fp_id=None):
        r"""Populates the DB tables for storing the filepath and connects the
        `self` objects with this filepath"""
        # Check that this function has been called from a subclass
        self._check_subclass()

        # Check if the connection handler has been provided. Create a new
        # one if not.
        conn_handler = SQLConnectionHandler()
        fp_id = self._fp_id if fp_id is None else fp_id

        try:
            fpp_id = insert_filepaths([(filepath, fp_id)], None,
                                      "templates", "filepath", conn_handler,
                                      move_files=False)[0]
            values = (self._id, fpp_id)
            conn_handler.execute(
                "INSERT INTO qiita.{0} ({1}, filepath_id) "
                "VALUES (%s, %s)".format(
                    self._filepath_table, self._id_column), values)
        except Exception as e:
            LogEntry.create('Runtime', str(e),
                            info={self.__class__.__name__: self.id})
            raise e
Ejemplo n.º 13
0
    def create(cls, md_template, raw_data, study, data_type,
               investigation_type=None):
        r"""Creates the metadata template in the database

        Parameters
        ----------
        md_template : DataFrame
            The metadata template file contents indexed by samples Ids
        raw_data : RawData
            The raw_data to which the prep template belongs to.
        study : Study
            The study to which the prep template belongs to.
        data_type : str or int
            The data_type of the prep template
        investigation_type : str, optional
            The investigation type, if relevant

        Returns
        -------
        A new instance of `cls` to access to the PrepTemplate stored in the DB

        Raises
        ------
        QiitaDBColumnError
            If the investigation_type is not valid
            If a required column is missing in md_template
        """
        # If the investigation_type is supplied, make sure it is one of
        # the recognized investigation types
        if investigation_type is not None:
            cls.validate_investigation_type(investigation_type)

        # Get a connection handler
        conn_handler = SQLConnectionHandler()
        queue_name = "CREATE_PREP_TEMPLATE_%d" % raw_data.id
        conn_handler.create_queue(queue_name)

        # Check if the data_type is the id or the string
        if isinstance(data_type, (int, long)):
            data_type_id = data_type
            data_type_str = convert_from_id(data_type, "data_type",
                                            conn_handler)
        else:
            data_type_id = convert_to_id(data_type, "data_type", conn_handler)
            data_type_str = data_type

        md_template = cls._clean_validate_template(md_template, study.id,
                                                   data_type_str, conn_handler)

        # Insert the metadata template
        # We need the prep_id for multiple calls below, which currently is not
        # supported by the queue system. Thus, executing this outside the queue
        prep_id = conn_handler.execute_fetchone(
            "INSERT INTO qiita.prep_template (data_type_id, raw_data_id, "
            "investigation_type) VALUES (%s, %s, %s) RETURNING "
            "prep_template_id", (data_type_id, raw_data.id,
                                 investigation_type))[0]

        cls._add_common_creation_steps_to_queue(md_template, prep_id,
                                                conn_handler, queue_name)

        try:
            conn_handler.execute_queue(queue_name)
        except Exception:
            # Clean up row from qiita.prep_template
            conn_handler.execute(
                "DELETE FROM qiita.prep_template where "
                "{0} = %s".format(cls._id_column), (prep_id,))

            # Check if sample IDs present here but not in sample template
            sql = ("SELECT sample_id from qiita.required_sample_info WHERE "
                   "study_id = %s")
            # Get list of study sample IDs, prep template study IDs,
            # and their intersection
            prep_samples = set(md_template.index.values)
            unknown_samples = prep_samples.difference(
                s[0] for s in conn_handler.execute_fetchall(sql, [study.id]))
            if unknown_samples:
                raise QiitaDBExecutionError(
                    'Samples found in prep template but not sample template: '
                    '%s' % ', '.join(unknown_samples))

            # some other error we haven't seen before so raise it
            raise

        pt = cls(prep_id)
        pt.generate_files()

        return pt
Ejemplo n.º 14
0
Archivo: 15.py Proyecto: RNAer/qiita
# Feb 11, 2015
# This changes all analysis files to be relative path instead of absolute

from os.path import basename, dirname

from qiita_db.util import get_mountpoint
from qiita_db.sql_connection import SQLConnectionHandler

conn_handler = SQLConnectionHandler()

filepaths = conn_handler.execute_fetchall(
    'SELECT f.* from qiita.filepath f JOIN qiita.analysis_filepath afp ON '
    'f.filepath_id = afp.filepath_id')

# retrieve relative filepaths as dictionary for matching
mountpoints = {m[1].rstrip('/\\'): m[0] for m in get_mountpoint(
    'analysis', conn_handler=conn_handler, retrieve_all=True)}

for filepath in filepaths:
    filename = basename(filepath['filepath'])
    # find the ID of the analysis filepath used
    mp_id = mountpoints[dirname(filepath['filepath']).rstrip('/\\')]
    conn_handler.execute(
        'UPDATE qiita.filepath SET filepath = %s, data_directory_id = %s WHERE'
        ' filepath_id = %s',
        [filename, mp_id, filepath['filepath_id']])