def test_get_qiime_minimal_mapping_single_reverse_primer(self): conn_handler = SQLConnectionHandler() conn_handler sql = """INSERT INTO qiita.prep_columns (prep_template_id, column_name, column_type) VALUES (1, 'reverselinkerprimer', 'varchar'); ALTER TABLE qiita.prep_1 ADD COLUMN reverselinkerprimer varchar; DELETE FROM qiita.prep_columns WHERE prep_template_id = 1 AND column_name = 'run_prefix'; ALTER TABLE qiita.prep_1 DROP COLUMN run_prefix; UPDATE qiita.prep_1 SET reverselinkerprimer = %s """ conn_handler.execute(sql, ('GTGCCAGCM',)) prep_template = PrepTemplate(1) prep_template.generate_files() out_dir = mkdtemp() obs_fps = _get_qiime_minimal_mapping(prep_template, out_dir) exp_fps = [join(out_dir, 'prep_1_MMF.txt')] # Check that the returned list is as expected self.assertEqual(obs_fps, exp_fps) # Check that the file exists self.assertTrue(exists(exp_fps[0])) # Check the contents of the file with open(exp_fps[0], "U") as f: self.assertEqual(f.read(), EXP_PREP_RLP)
def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_1(self): # Test that the run prefixes in the prep_template and the file names # actually match and raise an error if not conn_handler = SQLConnectionHandler() sql = (""" INSERT INTO qiita.filepath (filepath_id, filepath, filepath_type_id, checksum, checksum_algorithm_id, data_directory_id) VALUES (19, '1_new.sff', 17, 852952723, 1, 5); INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id) VALUES (3, 19); INSERT INTO qiita.filepath (filepath_id, filepath, filepath_type_id, checksum, checksum_algorithm_id, data_directory_id) VALUES (20, '1_error.sff', 17, 852952723, 1, 5); INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id) VALUES (3, 20); UPDATE qiita.prep_1 SET run_prefix='preprocess_test'; UPDATE qiita.prep_1 SET run_prefix='new' WHERE sample_id = '1.SKB8.640193'; """) conn_handler.execute(sql) raw_data = RawData(3) params = Preprocessed454Params(1) prep_template = PrepTemplate(1) with self.assertRaises(ValueError): _get_preprocess_fasta_cmd(raw_data, prep_template, params)
def test_get_preprocess_fasta_cmd_sff_run_prefix(self): # Need to alter the run_prefix of one sample so we can test the # multiple values conn_handler = SQLConnectionHandler() sql = ("UPDATE qiita.prep_1 SET run_prefix='test1' WHERE " "sample_id = '1.SKM9.640192'") conn_handler.execute(sql) raw_data = RawData(3) params = Preprocessed454Params(1) prep_template = PrepTemplate(1) obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd( raw_data, prep_template, params) obs_cmds = obs_cmd.split('; ') # assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is # working we only need to test for the commands being ran and # that n is valid self.assertEqual(len(obs_cmds), 8) self.assertTrue(obs_cmds[0].startswith('process_sff.py')) self.assertTrue(obs_cmds[1].startswith('process_sff.py')) self.assertTrue(obs_cmds[2].startswith('split_libraries.py')) self.assertIn('-n 1', obs_cmds[2]) self.assertTrue(obs_cmds[3].startswith('split_libraries.py')) self.assertIn('-n 800000', obs_cmds[3]) self.assertTrue(obs_cmds[4].startswith('cat')) self.assertIn('split_library_log.txt', obs_cmds[4]) self.assertTrue(obs_cmds[5].startswith('cat')) self.assertTrue('seqs.fna', obs_cmds[5]) self.assertTrue(obs_cmds[6].startswith('cat')) self.assertIn('seqs_filtered.qual', obs_cmds[6])
def delete(cls, id_): r"""Deletes the table from the database Parameters ---------- id_ : obj The object identifier Raises ------ QiitaDBExecutionError If the prep template already has a preprocessed data QiitaDBUnknownIDError If no prep template with id = id_ exists """ table_name = cls._table_name(id_) conn_handler = SQLConnectionHandler() if not cls.exists(id_): raise QiitaDBUnknownIDError(id_, cls.__name__) preprocessed_data_exists = conn_handler.execute_fetchone( "SELECT EXISTS(SELECT * FROM qiita.prep_template_preprocessed_data" " WHERE prep_template_id=%s)", (id_,))[0] if preprocessed_data_exists: raise QiitaDBExecutionError("Cannot remove prep template %d " "because a preprocessed data has been" " already generated using it." % id_) # Delete the prep template filepaths conn_handler.execute( "DELETE FROM qiita.prep_template_filepath WHERE " "prep_template_id = %s", (id_, )) # Drop the prep_X table conn_handler.execute( "DROP TABLE qiita.{0}".format(table_name)) # Remove the rows from common_prep_info conn_handler.execute( "DELETE FROM qiita.{0} where {1} = %s".format(cls._table, cls._id_column), (id_,)) # Remove the rows from prep_columns conn_handler.execute( "DELETE FROM qiita.{0} where {1} = %s".format(cls._column_table, cls._id_column), (id_,)) # Remove the row from prep_template conn_handler.execute( "DELETE FROM qiita.prep_template where " "{0} = %s".format(cls._id_column), (id_,))
def raw_data(self, raw_data): conn_handler = SQLConnectionHandler() sql = """SELECT ( SELECT raw_data_id FROM qiita.prep_template WHERE prep_template_id=%s) IS NOT NULL""" exists = conn_handler.execute_fetchone(sql, (self.id,))[0] if exists: raise QiitaDBError( "Prep template %d already has a raw data associated" % self.id) sql = """UPDATE qiita.prep_template SET raw_data_id = %s WHERE prep_template_id = %s""" conn_handler.execute(sql, (raw_data.id, self.id))
def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_2(self): # Should raise error conn_handler = SQLConnectionHandler() sql = (""" UPDATE qiita.prep_1 SET run_prefix='test1'; UPDATE qiita.prep_1 SET run_prefix='test2' WHERE sample_id = '1.SKB2.640194'; UPDATE qiita.prep_1 SET run_prefix='error' WHERE sample_id = '1.SKB8.640193'; """) conn_handler.execute(sql) raw_data = RawData(3) params = Preprocessed454Params(1) prep_template = PrepTemplate(1) with self.assertRaises(ValueError): _get_preprocess_fasta_cmd(raw_data, prep_template, params)
def test_get_preprocess_fasta_cmd_sff_run_prefix_match(self): # Test that the run prefixes in the prep_template and the file names # actually match and raise an error if not conn_handler = SQLConnectionHandler() sql = (""" INSERT INTO qiita.filepath (filepath_id, filepath, filepath_type_id, checksum, checksum_algorithm_id, data_directory_id) VALUES (19, '1_new.sff', 17, 852952723, 1, 5); INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id) VALUES (3, 19); UPDATE qiita.prep_1 SET run_prefix='preprocess_test'; UPDATE qiita.prep_1 SET run_prefix='new' WHERE sample_id = '1.SKB8.640193'; """) conn_handler.execute(sql) raw_data = RawData(3) params = Preprocessed454Params(1) prep_template = PrepTemplate(1) obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd( raw_data, prep_template, params) obs_cmds = obs_cmd.split('; ') # assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is # working we only need to test for the commands being ran and # that n is valid self.assertEqual(len(obs_cmds), 9) self.assertTrue(obs_cmds[0].startswith('process_sff.py')) self.assertTrue(obs_cmds[1].startswith('process_sff.py')) self.assertTrue(obs_cmds[2].startswith('process_sff.py')) self.assertTrue(obs_cmds[3].startswith('split_libraries.py')) self.assertIn('-n 1', obs_cmds[3]) self.assertTrue(obs_cmds[4].startswith('split_libraries.py')) self.assertIn('-n 800000', obs_cmds[4]) self.assertTrue(obs_cmds[5].startswith('cat')) self.assertIn('split_library_log.txt', obs_cmds[5]) self.assertTrue(obs_cmds[6].startswith('cat')) self.assertIn('seqs.fna', obs_cmds[6]) self.assertEqual(len(obs_cmds[6].split(' ')), 5) self.assertTrue(obs_cmds[7].startswith('cat')) self.assertIn('seqs_filtered.qual', obs_cmds[7]) self.assertEqual(len(obs_cmds[7].split(' ')), 5)
def test_get_qiime_minimal_mapping_single_no_run_prefix(self): conn_handler = SQLConnectionHandler() sql = """DELETE FROM qiita.prep_columns WHERE prep_template_id = 1 AND column_name = 'run_prefix'; ALTER TABLE qiita.prep_1 DROP COLUMN run_prefix""" conn_handler.execute(sql) prep_template = PrepTemplate(1) prep_template.generate_files() out_dir = mkdtemp() obs_fps = _get_qiime_minimal_mapping(prep_template, out_dir) exp_fps = [join(out_dir, 'prep_1_MMF.txt')] # Check that the returned list is as expected self.assertEqual(obs_fps, exp_fps) # Check that the file exists self.assertTrue(exists(exp_fps[0])) # Check the contents of the file with open(exp_fps[0], "U") as f: self.assertEqual(f.read(), EXP_PREP)
def investigation_type(self, investigation_type): r"""Update the investigation type Parameters ---------- investigation_type : str The investigation type to set, should be part of the ENA ontology Raises ------ QiitaDBColumnError If the investigation type is not a valid ENA ontology """ if investigation_type is not None: self.validate_investigation_type(investigation_type) conn_handler = SQLConnectionHandler() conn_handler.execute( "UPDATE qiita.prep_template SET investigation_type = %s " "WHERE {0} = %s".format(self._id_column), (investigation_type, self.id))
class TestHandlerBase(AsyncHTTPTestCase): database = False def get_app(self): BaseHandler.get_current_user = Mock(return_value="*****@*****.**") self.app = Application() return self.app def setUp(self): if self.database: self.conn_handler = SQLConnectionHandler() # Drop the schema self.conn_handler.execute("DROP SCHEMA qiita CASCADE") # Create the schema with open(LAYOUT_FP, 'U') as f: self.conn_handler.execute(f.read()) # Initialize the database with open(INITIALIZE_FP, 'U') as f: self.conn_handler.execute(f.read()) # Populate the database with open(POPULATE_FP, 'U') as f: self.conn_handler.execute(f.read()) super(TestHandlerBase, self).setUp() def tearDown(self): if self.database: del self.conn_handler # helpers from http://www.peterbe.com/plog/tricks-asynchttpclient-tornado def get(self, url, data=None, headers=None, doseq=True): if data is not None: if isinstance(data, dict): data = urlencode(data, doseq=doseq) if '?' in url: url += '&%s' % data else: url += '?%s' % data return self._fetch(url, 'GET', headers=headers) def post(self, url, data, headers=None, doseq=True): if data is not None: if isinstance(data, dict): data = urlencode(data, doseq=doseq) return self._fetch(url, 'POST', data, headers) def _fetch(self, url, method, data=None, headers=None): self.http_client.fetch(self.get_url(url), self.stop, method=method, body=data, headers=headers) return self.wait()
def preprocessing_status(self, state): r"""Update the preprocessing status Parameters ---------- state : str, {'not_preprocessed', 'preprocessing', 'success', 'failed'} The current status of preprocessing Raises ------ ValueError If the state is not known. """ if (state not in ('not_preprocessed', 'preprocessing', 'success') and not state.startswith('failed:')): raise ValueError('Unknown state: %s' % state) conn_handler = SQLConnectionHandler() conn_handler.execute( "UPDATE qiita.prep_template SET preprocessing_status = %s " "WHERE {0} = %s".format(self._id_column), (state, self.id))
def add_filepath(self, filepath, fp_id=None): r"""Populates the DB tables for storing the filepath and connects the `self` objects with this filepath""" # Check that this function has been called from a subclass self._check_subclass() # Check if the connection handler has been provided. Create a new # one if not. conn_handler = SQLConnectionHandler() fp_id = self._fp_id if fp_id is None else fp_id try: fpp_id = insert_filepaths([(filepath, fp_id)], None, "templates", "filepath", conn_handler, move_files=False)[0] values = (self._id, fpp_id) conn_handler.execute( "INSERT INTO qiita.{0} ({1}, filepath_id) " "VALUES (%s, %s)".format( self._filepath_table, self._id_column), values) except Exception as e: LogEntry.create('Runtime', str(e), info={self.__class__.__name__: self.id}) raise e
def create(cls, md_template, raw_data, study, data_type, investigation_type=None): r"""Creates the metadata template in the database Parameters ---------- md_template : DataFrame The metadata template file contents indexed by samples Ids raw_data : RawData The raw_data to which the prep template belongs to. study : Study The study to which the prep template belongs to. data_type : str or int The data_type of the prep template investigation_type : str, optional The investigation type, if relevant Returns ------- A new instance of `cls` to access to the PrepTemplate stored in the DB Raises ------ QiitaDBColumnError If the investigation_type is not valid If a required column is missing in md_template """ # If the investigation_type is supplied, make sure it is one of # the recognized investigation types if investigation_type is not None: cls.validate_investigation_type(investigation_type) # Get a connection handler conn_handler = SQLConnectionHandler() queue_name = "CREATE_PREP_TEMPLATE_%d" % raw_data.id conn_handler.create_queue(queue_name) # Check if the data_type is the id or the string if isinstance(data_type, (int, long)): data_type_id = data_type data_type_str = convert_from_id(data_type, "data_type", conn_handler) else: data_type_id = convert_to_id(data_type, "data_type", conn_handler) data_type_str = data_type md_template = cls._clean_validate_template(md_template, study.id, data_type_str, conn_handler) # Insert the metadata template # We need the prep_id for multiple calls below, which currently is not # supported by the queue system. Thus, executing this outside the queue prep_id = conn_handler.execute_fetchone( "INSERT INTO qiita.prep_template (data_type_id, raw_data_id, " "investigation_type) VALUES (%s, %s, %s) RETURNING " "prep_template_id", (data_type_id, raw_data.id, investigation_type))[0] cls._add_common_creation_steps_to_queue(md_template, prep_id, conn_handler, queue_name) try: conn_handler.execute_queue(queue_name) except Exception: # Clean up row from qiita.prep_template conn_handler.execute( "DELETE FROM qiita.prep_template where " "{0} = %s".format(cls._id_column), (prep_id,)) # Check if sample IDs present here but not in sample template sql = ("SELECT sample_id from qiita.required_sample_info WHERE " "study_id = %s") # Get list of study sample IDs, prep template study IDs, # and their intersection prep_samples = set(md_template.index.values) unknown_samples = prep_samples.difference( s[0] for s in conn_handler.execute_fetchall(sql, [study.id])) if unknown_samples: raise QiitaDBExecutionError( 'Samples found in prep template but not sample template: ' '%s' % ', '.join(unknown_samples)) # some other error we haven't seen before so raise it raise pt = cls(prep_id) pt.generate_files() return pt
# Feb 11, 2015 # This changes all analysis files to be relative path instead of absolute from os.path import basename, dirname from qiita_db.util import get_mountpoint from qiita_db.sql_connection import SQLConnectionHandler conn_handler = SQLConnectionHandler() filepaths = conn_handler.execute_fetchall( 'SELECT f.* from qiita.filepath f JOIN qiita.analysis_filepath afp ON ' 'f.filepath_id = afp.filepath_id') # retrieve relative filepaths as dictionary for matching mountpoints = {m[1].rstrip('/\\'): m[0] for m in get_mountpoint( 'analysis', conn_handler=conn_handler, retrieve_all=True)} for filepath in filepaths: filename = basename(filepath['filepath']) # find the ID of the analysis filepath used mp_id = mountpoints[dirname(filepath['filepath']).rstrip('/\\')] conn_handler.execute( 'UPDATE qiita.filepath SET filepath = %s, data_directory_id = %s WHERE' ' filepath_id = %s', [filename, mp_id, filepath['filepath_id']])