def preprocessed_data(self): conn_handler = SQLConnectionHandler() prep_datas = conn_handler.execute_fetchall( "SELECT preprocessed_data_id FROM " "qiita.prep_template_preprocessed_data WHERE prep_template_id=%s", (self.id,)) return [x[0] for x in prep_datas]
def get_filepaths(self): r"""Retrieves the list of (filepath_id, filepath)""" # Check that this function has been called from a subclass self._check_subclass() # Check if the connection handler has been provided. Create a new # one if not. conn_handler = SQLConnectionHandler() try: filepath_ids = conn_handler.execute_fetchall( "SELECT filepath_id, filepath FROM qiita.filepath WHERE " "filepath_id IN (SELECT filepath_id FROM qiita.{0} WHERE " "{1}=%s) ORDER BY filepath_id DESC".format( self._filepath_table, self._id_column), (self.id, )) except Exception as e: LogEntry.create('Runtime', str(e), info={self.__class__.__name__: self.id}) raise e _, fb = get_mountpoint('templates')[0] base_fp = partial(join, fb) return [(fpid, base_fp(fp)) for fpid, fp in filepath_ids]
def to_dataframe(self): """Returns the metadata template as a dataframe Returns ------- pandas DataFrame The metadata in the template,indexed on sample id """ conn_handler = SQLConnectionHandler() cols = get_table_cols(self._table, conn_handler) if 'study_id' in cols: cols.remove('study_id') dyncols = get_table_cols(self._table_name(self._id), conn_handler) # remove sample_id from dyncols so not repeated dyncols.remove('sample_id') # Get all metadata for the template sql = """SELECT {0}, {1} FROM qiita.{2} req INNER JOIN qiita.{3} dyn on req.sample_id = dyn.sample_id WHERE req.{4} = %s""".format( ", ".join("req.%s" % c for c in cols), ", ".join("dyn.%s" % d for d in dyncols), self._table, self._table_name(self._id), self._id_column) meta = conn_handler.execute_fetchall(sql, [self._id]) cols = cols + dyncols # Create the dataframe and clean it up a bit df = pd.DataFrame((list(x) for x in meta), columns=cols) df.set_index('sample_id', inplace=True, drop=True) # Turn id cols to value cols for col, value in viewitems(self.str_cols_handlers): df[col].replace(value, inplace=True) df.rename(columns=self.translate_cols_dict, inplace=True) return df
def status(self): """The status of the prep template Returns ------- str The status of the prep template Notes ----- The status of a prep template is inferred by the status of the processed data generated from this prep template. If no processed data has been generated with this prep template; then the status is 'sandbox'. """ conn_handler = SQLConnectionHandler() sql = """SELECT processed_data_status FROM qiita.processed_data_status pds JOIN qiita.processed_data pd USING (processed_data_status_id) JOIN qiita.preprocessed_processed_data ppd_pd USING (processed_data_id) JOIN qiita.prep_template_preprocessed_data pt_ppd USING (preprocessed_data_id) WHERE pt_ppd.prep_template_id=%s""" pd_statuses = conn_handler.execute_fetchall(sql, (self._id,)) return infer_status(pd_statuses)
def __call__(self, searchstr, user): """Runs a Study query and returns matching studies and samples Parameters ---------- searchstr : str Search string to use user : User object User making the search. Needed for permissions checks. Returns ------- dict Found samples in format {study_id: [[samp_id1, meta1, meta2, ...], [samp_id2, meta1, meta2, ...], ...} list metadata column names searched for Notes ----- Metadata information for each sample is in the same order as the metadata columns list returned Metadata column names and string searches are case-sensitive """ study_sql, sample_sql, meta_headers = \ self._parse_study_search_string(searchstr, True) conn_handler = SQLConnectionHandler() # get all studies containing the metadata headers requested study_ids = {x[0] for x in conn_handler.execute_fetchall(study_sql)} # strip to only studies user has access to if user.level not in {'admin', 'dev', 'superuser'}: study_ids = study_ids.intersection(Study.get_by_status('public') | user.user_studies | user.shared_studies) results = {} # run search on each study to get out the matching samples for sid in study_ids: study_res = conn_handler.execute_fetchall(sample_sql.format(sid)) if study_res: # only add study to results if actually has samples in results results[sid] = study_res self.results = results self.meta_headers = meta_headers return results, meta_headers
def update(self, md_template): r"""Update values in the template Parameters ---------- md_template : DataFrame The metadata template file contents indexed by samples Ids Raises ------ QiitaDBError If md_template and db do not have the same sample ids If md_template and db do not have the same column headers If self.can_be_updated is not True """ conn_handler = SQLConnectionHandler() # Clean and validate the metadata template given new_map = self._clean_validate_template(md_template, self.study_id, self.columns_restrictions) # Retrieving current metadata current_map = self._transform_to_dict(conn_handler.execute_fetchall( "SELECT * FROM qiita.{0}".format(self._table_name(self.id)))) current_map = pd.DataFrame.from_dict(current_map, orient='index') # simple validations of sample ids and column names samples_diff = set(new_map.index).difference(current_map.index) if samples_diff: raise QiitaDBError('The new template differs from what is stored ' 'in database by these samples names: %s' % ', '.join(samples_diff)) columns_diff = set(new_map.columns).difference(current_map.columns) if columns_diff: raise QiitaDBError('The new template differs from what is stored ' 'in database by these columns names: %s' % ', '.join(columns_diff)) # here we are comparing two dataframes following: # http://stackoverflow.com/a/17095620/4228285 current_map.sort(axis=0, inplace=True) current_map.sort(axis=1, inplace=True) new_map.sort(axis=0, inplace=True) new_map.sort(axis=1, inplace=True) map_diff = (current_map != new_map).stack() map_diff = map_diff[map_diff] map_diff.index.names = ['id', 'column'] changed_cols = map_diff.index.get_level_values('column').unique() if not self.can_be_updated(columns=set(changed_cols)): raise QiitaDBError('The new template is modifying fields that ' 'cannot be modified. Try removing the target ' 'gene fields or deleting the processed data. ' 'You are trying to modify: %s' % ', '.join(changed_cols)) for col in changed_cols: self.update_category(col, new_map[col].to_dict()) self.generate_files()
def __call__(self, searchstr, user): """Runs a Study query and returns matching studies and samples Parameters ---------- searchstr : str Search string to use user : User object User making the search. Needed for permissions checks. Returns ------- dict Found samples in format {study_id: [[samp_id1, meta1, meta2, ...], [samp_id2, meta1, meta2, ...], ...} list metadata column names searched for Notes ----- Metadata information for each sample is in the same order as the metadata columns list returned Metadata column names and string searches are case-sensitive """ study_sql, sample_sql, meta_headers = \ self._parse_study_search_string(searchstr, True) conn_handler = SQLConnectionHandler() # get all studies containing the metadata headers requested study_ids = {x[0] for x in conn_handler.execute_fetchall(study_sql)} # strip to only studies user has access to if user.level not in {'admin', 'dev', 'superuser'}: study_ids = study_ids.intersection( Study.get_by_status('public') + user.user_studies + user.shared_studies) results = {} # run search on each study to get out the matching samples for sid in study_ids: study_res = conn_handler.execute_fetchall(sample_sql.format(sid)) if study_res: # only add study to results if actually has samples in results results[sid] = study_res return results, meta_headers
def to_file(self, fp, samples=None): r"""Writes the MetadataTemplate to the file `fp` in tab-delimited format Parameters ---------- fp : str Path to the output file samples : set, optional If supplied, only the specified samples will be written to the file """ conn_handler = SQLConnectionHandler() metadata_map = self._transform_to_dict(conn_handler.execute_fetchall( "SELECT * FROM qiita.{0} WHERE {1}=%s".format(self._table, self._id_column), (self.id,))) dyn_vals = self._transform_to_dict(conn_handler.execute_fetchall( "SELECT * FROM qiita.{0}".format(self._table_name(self.id)))) for k in metadata_map: for key, value in viewitems(self.translate_cols_dict): id_ = metadata_map[k][key] metadata_map[k][value] = self.str_cols_handlers[key][id_] del metadata_map[k][key] metadata_map[k].update(dyn_vals[k]) metadata_map[k].pop('study_id', None) # Remove samples that are not in the samples list, if it was supplied if samples is not None: for sid, d in metadata_map.items(): if sid not in samples: metadata_map.pop(sid) # Write remaining samples to file headers = sorted(list(metadata_map.values())[0].keys()) with open(fp, 'w') as f: # First write the headers f.write("sample_name\t%s\n" % '\t'.join(headers)) # Write the values for each sample id for sid, d in sorted(metadata_map.items()): values = [str(d[h]) for h in headers] values.insert(0, sid) f.write("%s\n" % '\t'.join(values))
def metadata_headers(): """Returns metadata headers available Returns ------- list Alphabetical list of all metadata headers available """ conn_handler = SQLConnectionHandler() return [x[0] for x in conn_handler.execute_fetchall( "SELECT DISTINCT column_name FROM qiita.study_sample_columns " "ORDER BY column_name")]
def metadata_headers(): """Returns metadata headers available Returns ------- list Alphabetical list of all metadata headers available """ conn_handler = SQLConnectionHandler() return [x[0] for x in conn_handler.execute_fetchall( "SELECT DISTINCT column_name FROM qiita.study_sample_columns " "UNION SELECT column_name FROM information_schema.columns " "WHERE table_name = 'required_sample_info' " "ORDER BY column_name")]
def to_dataframe(self): """Returns the metadata template as a dataframe Returns ------- pandas DataFrame The metadata in the template,indexed on sample id """ conn_handler = SQLConnectionHandler() cols = sorted(get_table_cols(self._table_name(self._id))) # Get all metadata for the template sql = "SELECT {0} FROM qiita.{1}".format(", ".join(cols), self._table_name(self.id)) meta = conn_handler.execute_fetchall(sql, (self._id,)) # Create the dataframe and clean it up a bit df = pd.DataFrame((list(x) for x in meta), columns=cols) df.set_index('sample_id', inplace=True, drop=True) return df
def qiime_map_fp(self): """The QIIME mapping filepath attached to the prep template Returns ------- str The filepath of the QIIME mapping file """ conn_handler = SQLConnectionHandler() sql = """SELECT filepath_id, filepath FROM qiita.filepath JOIN qiita.{0} USING (filepath_id) JOIN qiita.filepath_type USING (filepath_type_id) WHERE {1} = %s AND filepath_type = 'qiime_map' ORDER BY filepath_id DESC""".format(self._filepath_table, self._id_column) fn = conn_handler.execute_fetchall(sql, (self._id,))[0][1] base_dir = get_mountpoint('templates')[0][1] return join(base_dir, fn)
def update(self, md_template): r"""Update values in the sample template Parameters ---------- md_template : DataFrame The metadata template file contents indexed by samples Ids Raises ------ QiitaDBError If md_template and db do not have the same sample ids If md_template and db do not have the same column headers """ conn_handler = SQLConnectionHandler() # Clean and validate the metadata template given new_map = self._clean_validate_template(md_template, self.id, conn_handler) # Retrieving current metadata current_map = self._transform_to_dict(conn_handler.execute_fetchall( "SELECT * FROM qiita.{0} WHERE {1}=%s".format(self._table, self._id_column), (self.id,))) dyn_vals = self._transform_to_dict(conn_handler.execute_fetchall( "SELECT * FROM qiita.{0}".format(self._table_name(self.id)))) for k in current_map: current_map[k].update(dyn_vals[k]) current_map[k].pop('study_id', None) # converting sql results to dataframe current_map = pd.DataFrame.from_dict(current_map, orient='index') # simple validations of sample ids and column names samples_diff = set( new_map.index.tolist()) - set(current_map.index.tolist()) if samples_diff: raise QiitaDBError('The new sample template differs from what is ' 'stored in database by these samples names: %s' % ', '.join(samples_diff)) columns_diff = set(new_map.columns) - set(current_map.columns) if columns_diff: raise QiitaDBError('The new sample template differs from what is ' 'stored in database by these columns names: %s' % ', '.join(columns_diff)) # here we are comparing two dataframes following: # http://stackoverflow.com/a/17095620/4228285 current_map.sort(axis=0, inplace=True) current_map.sort(axis=1, inplace=True) new_map.sort(axis=0, inplace=True) new_map.sort(axis=1, inplace=True) map_diff = (current_map != new_map).stack() map_diff = map_diff[map_diff] map_diff.index.names = ['id', 'column'] changed_cols = map_diff.index.get_level_values('column').unique() for col in changed_cols: self.update_category(col, new_map[col].to_dict()) self.generate_files()
def create_qiime_mapping_file(self, prep_template_fp): """This creates the QIIME mapping file and links it in the db. Parameters ---------- prep_template_fp : str The prep template filepath that should be concatenated to the sample template go used to generate a new QIIME mapping file Returns ------- filepath : str The filepath of the created QIIME mapping file Raises ------ ValueError If the prep template is not a subset of the sample template """ rename_cols = { 'barcode': 'BarcodeSequence', 'barcodesequence': 'BarcodeSequence', 'primer': 'LinkerPrimerSequence', 'linkerprimersequence': 'LinkerPrimerSequence', 'description': 'Description', } # getting the latest sample template conn_handler = SQLConnectionHandler() sql = """SELECT filepath_id, filepath FROM qiita.filepath JOIN qiita.sample_template_filepath USING (filepath_id) WHERE study_id=%s ORDER BY filepath_id DESC""" sample_template_fname = conn_handler.execute_fetchall( sql, (self.study_id,))[0][1] _, fp = get_mountpoint('templates')[0] sample_template_fp = join(fp, sample_template_fname) # reading files via pandas st = load_template_to_dataframe(sample_template_fp) pt = load_template_to_dataframe(prep_template_fp) st_sample_names = set(st.index) pt_sample_names = set(pt.index) if not pt_sample_names.issubset(st_sample_names): raise ValueError( "Prep template is not a sub set of the sample template, files:" "%s %s - samples: %s" % (sample_template_fp, prep_template_fp, str(pt_sample_names-st_sample_names))) mapping = pt.join(st, lsuffix="_prep") mapping.rename(columns=rename_cols, inplace=True) # Gets the orginal mapping columns and readjust the order to comply # with QIIME requirements cols = mapping.columns.values.tolist() cols.remove('BarcodeSequence') cols.remove('LinkerPrimerSequence') cols.remove('Description') new_cols = ['BarcodeSequence', 'LinkerPrimerSequence'] new_cols.extend(cols) new_cols.append('Description') mapping = mapping[new_cols] # figuring out the filepath for the QIIME map file _id, fp = get_mountpoint('templates')[0] filepath = join(fp, '%d_prep_%d_qiime_%s.txt' % (self.study_id, self.id, strftime("%Y%m%d-%H%M%S"))) # Save the mapping file mapping.to_csv(filepath, index_label='#SampleID', na_rep='unknown', sep='\t') # adding the fp to the object self.add_filepath(filepath) return filepath
class TestConnHandler(TestCase): def test_create_queue(self): self.conn_handler.create_queue("toy_queue") self.assertEqual(self.conn_handler.list_queues(), ["toy_queue"]) def test_run_queue(self): self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password," "phone) VALUES (%s, %s, %s, %s)", ['*****@*****.**', 'Toy', 'pass', '111-111-11112']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, " "phone = '222-222-2221' WHERE email = %s", ['*****@*****.**']) obs = self.conn_handler.execute_queue("toy_queue") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) exp = [['*****@*****.**', 1, 'pass', 'Toy', None, None, '222-222-2221', None, None, None]] self.assertEqual(obs, exp) def test_run_queue_many(self): sql = ("INSERT INTO qiita.qiita_user (email, name, password," "phone) VALUES (%s, %s, %s, %s)") sql_args = [ ('*****@*****.**', 'p1', 'pass1', '111-111'), ('*****@*****.**', 'p2', 'pass2', '111-222') ] self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", sql, sql_args, many=True) self.conn_handler.execute_queue('toy_queue') # make sure both users added obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) exp = [['*****@*****.**', 5, 'pass1', 'p1', None, None, '111-111', None, None, None]] self.assertEqual(obs, exp) obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) exp = [['*****@*****.**', 5, 'pass2', 'p2', None, None, '111-222', None, None, None]] self.assertEqual(obs, exp) def test_run_queue_last_return(self): self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password," "phone) VALUES (%s, %s, %s, %s)", ['*****@*****.**', 'Toy', 'pass', '111-111-11112']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, " "phone = '222-222-2221' WHERE email = %s RETURNING phone", ['*****@*****.**']) obs = self.conn_handler.execute_queue("toy_queue") self.assertEqual(obs, ['222-222-2221']) def test_run_queue_placeholders(self): self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password," "phone) VALUES (%s, %s, %s, %s) RETURNING email, password", ['*****@*****.**', 'Toy', 'pass', '111-111-11112']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, " "phone = '222-222-2221' WHERE email = %s AND password = %s", ['{0}', '{1}']) obs = self.conn_handler.execute_queue("toy_queue") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) exp = [['*****@*****.**', 1, 'pass', 'Toy', None, None, '222-222-2221', None, None, None]] self.assertEqual(obs, exp) def test_queue_fail(self): """Fail if no results data exists for substitution""" self.conn_handler = SQLConnectionHandler() self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password) VALUES " "(%s, %s, %s)", ['*****@*****.**', 'Toy', 'pass']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1 " "WHERE email = %s and password = %s", [{0}, {1}]) with self.assertRaises(QiitaDBExecutionError): self.conn_handler.execute_queue("toy_queue") # make sure roll back correctly obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) self.assertEqual(obs, []) def test_huge_queue(self): self.conn_handler = SQLConnectionHandler() self.conn_handler.create_queue("toy_queue") # add tons of inserts to queue for x in range(120): self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password) VALUES " "(%s, %s, %s)", ['*****@*****.**' % x, 'Toy', 'pass']) # add failing insert as final item in queue self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_BADTABLE (email, name, password) VALUES " "(%s, %s, %s)", ['*****@*****.**' % x, 'Toy', 'pass']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1 " "WHERE email = %s and password = %s", [{0}, {1}]) with self.assertRaises(QiitaDBExecutionError): self.conn_handler.execute_queue("toy_queue") # make sure roll back correctly obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email LIKE " "'%[email protected]%'") self.assertEqual(obs, []) def test_get_temp_queue(self): my_queue = self.conn_handler.get_temp_queue() self.assertTrue(my_queue in self.conn_handler.list_queues()) self.conn_handler.add_to_queue(my_queue, "SELECT * from qiita.qiita_user") self.conn_handler.add_to_queue(my_queue, "SELECT * from qiita.user_level") self.conn_handler.execute_queue(my_queue) self.assertTrue(my_queue not in self.conn_handler.list_queues())
# This patch recreates all the QIIME mapping files to avoid lower/upper case # problems. See https://github.com/biocore/qiita/issues/799 # # heavily based on 7.py from os.path import basename from skbio.util import flatten from qiita_db.sql_connection import SQLConnectionHandler from qiita_db.metadata_template import PrepTemplate conn_handler = SQLConnectionHandler() sql = "SELECT prep_template_id FROM qiita.prep_template" all_ids = conn_handler.execute_fetchall(sql) q_name = 'unlink-bad-mapping-files' conn_handler.create_queue(q_name) # remove all the bad mapping files for prep_template_id in all_ids: prep_template_id = prep_template_id[0] pt = PrepTemplate(prep_template_id) fps = pt.get_filepaths() # get the QIIME mapping file, note that the way to figure out what is and # what's not a qiime mapping file is to check for the existance of the # word qiime in the basename of the file path, hacky but that's the way # it is being done in qiita_pet/uimodules/raw_data_tab.py
# Feb 11, 2015 # This changes all analysis files to be relative path instead of absolute from os.path import basename, dirname from qiita_db.util import get_mountpoint from qiita_db.sql_connection import SQLConnectionHandler conn_handler = SQLConnectionHandler() filepaths = conn_handler.execute_fetchall( 'SELECT f.* from qiita.filepath f JOIN qiita.analysis_filepath afp ON ' 'f.filepath_id = afp.filepath_id') # retrieve relative filepaths as dictionary for matching mountpoints = {m[1].rstrip('/\\'): m[0] for m in get_mountpoint( 'analysis', conn_handler=conn_handler, retrieve_all=True)} for filepath in filepaths: filename = basename(filepath['filepath']) # find the ID of the analysis filepath used mp_id = mountpoints[dirname(filepath['filepath']).rstrip('/\\')] conn_handler.execute( 'UPDATE qiita.filepath SET filepath = %s, data_directory_id = %s WHERE' ' filepath_id = %s', [filename, mp_id, filepath['filepath_id']])
class TestConnHandler(TestCase): def test_create_queue(self): self.conn_handler.create_queue("toy_queue") self.assertEqual(self.conn_handler.list_queues(), ["toy_queue"]) def test_close(self): self.assertEqual(self.conn_handler._user_conn.closed, 0) self.conn_handler.close() self.assertNotEqual(self.conn_handler._user_conn.closed, 0) def test_run_queue(self): self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password," "phone) VALUES (%s, %s, %s, %s)", ['*****@*****.**', 'Toy', 'pass', '111-111-11112']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, " "phone = '222-222-2221' WHERE email = %s", ['*****@*****.**']) obs = self.conn_handler.execute_queue("toy_queue") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) exp = [['*****@*****.**', 1, 'pass', 'Toy', None, None, '222-222-2221', None, None, None]] self.assertEqual(obs, exp) def test_run_queue_many(self): sql = ("INSERT INTO qiita.qiita_user (email, name, password," "phone) VALUES (%s, %s, %s, %s)") sql_args = [ ('*****@*****.**', 'p1', 'pass1', '111-111'), ('*****@*****.**', 'p2', 'pass2', '111-222') ] self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", sql, sql_args, many=True) self.conn_handler.execute_queue('toy_queue') # make sure both users added obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) exp = [['*****@*****.**', 5, 'pass1', 'p1', None, None, '111-111', None, None, None]] self.assertEqual(obs, exp) obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) exp = [['*****@*****.**', 5, 'pass2', 'p2', None, None, '111-222', None, None, None]] self.assertEqual(obs, exp) def test_run_queue_last_return(self): self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password," "phone) VALUES (%s, %s, %s, %s)", ['*****@*****.**', 'Toy', 'pass', '111-111-11112']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, " "phone = '222-222-2221' WHERE email = %s RETURNING phone", ['*****@*****.**']) obs = self.conn_handler.execute_queue("toy_queue") self.assertEqual(obs, ['222-222-2221']) def test_run_queue_placeholders(self): self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password," "phone) VALUES (%s, %s, %s, %s) RETURNING email, password", ['*****@*****.**', 'Toy', 'pass', '111-111-11112']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, " "phone = '222-222-2221' WHERE email = %s AND password = %s", ['{0}', '{1}']) obs = self.conn_handler.execute_queue("toy_queue") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) exp = [['*****@*****.**', 1, 'pass', 'Toy', None, None, '222-222-2221', None, None, None]] self.assertEqual(obs, exp) def test_queue_fail(self): """Fail if no results data exists for substitution""" self.conn_handler = SQLConnectionHandler() self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password) VALUES " "(%s, %s, %s)", ['*****@*****.**', 'Toy', 'pass']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1 " "WHERE email = %s and password = %s", [{0}, {1}]) with self.assertRaises(QiitaDBExecutionError): self.conn_handler.execute_queue("toy_queue") # make sure roll back correctly obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) self.assertEqual(obs, []) def test_huge_queue(self): self.conn_handler = SQLConnectionHandler() self.conn_handler.create_queue("toy_queue") # add tons of inserts to queue for x in range(120): self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password) VALUES " "(%s, %s, %s)", ['*****@*****.**' % x, 'Toy', 'pass']) # add failing insert as final item in queue self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_BADTABLE (email, name, password) VALUES " "(%s, %s, %s)", ['*****@*****.**' % x, 'Toy', 'pass']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1 " "WHERE email = %s and password = %s", [{0}, {1}]) with self.assertRaises(QiitaDBExecutionError): self.conn_handler.execute_queue("toy_queue") # make sure roll back correctly obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email LIKE " "'%[email protected]%'") self.assertEqual(obs, []) def test_get_temp_queue(self): my_queue = self.conn_handler.get_temp_queue() self.assertTrue(my_queue in self.conn_handler.list_queues()) self.conn_handler.add_to_queue(my_queue, "SELECT * from qiita.qiita_user") self.conn_handler.add_to_queue(my_queue, "SELECT * from qiita.user_level") self.conn_handler.execute_queue(my_queue) self.assertTrue(my_queue not in self.conn_handler.list_queues())
from qiita_db.sql_connection import SQLConnectionHandler from qiita_db.data import RawData from qiita_db.util import move_filepaths_to_upload_folder conn_handler = SQLConnectionHandler() queue = "PATCH_25" conn_handler.create_queue(queue) # the system may contain raw data with no prep template associated to it. # Retrieve all those raw data ids sql = """SELECT raw_data_id FROM qiita.raw_data WHERE raw_data_id NOT IN ( SELECT DISTINCT raw_data_id FROM qiita.prep_template);""" rd_ids = [x[0] for x in conn_handler.execute_fetchall(sql)] # We will delete those RawData. However, if they have files attached, we should # move them to the uploads folder of the study sql_detach = """DELETE FROM qiita.study_raw_data WHERE raw_data_id = %s AND study_id = %s""" sql_unlink = "DELETE FROM qiita.raw_filepath WHERE raw_data_id = %s" sql_delete = "DELETE FROM qiita.raw_data WHERE raw_data_id = %s" sql_studies = """SELECT study_id FROM qiita.study_raw_data WHERE raw_data_id = %s""" move_files = [] for rd_id in rd_ids: rd = RawData(rd_id) filepaths = rd.get_filepaths() studies = [s[0] for s in conn_handler.execute_fetchall(sql_studies, (rd_id,))]
def create(cls, md_template, raw_data, study, data_type, investigation_type=None): r"""Creates the metadata template in the database Parameters ---------- md_template : DataFrame The metadata template file contents indexed by samples Ids raw_data : RawData The raw_data to which the prep template belongs to. study : Study The study to which the prep template belongs to. data_type : str or int The data_type of the prep template investigation_type : str, optional The investigation type, if relevant Returns ------- A new instance of `cls` to access to the PrepTemplate stored in the DB Raises ------ QiitaDBColumnError If the investigation_type is not valid If a required column is missing in md_template """ # If the investigation_type is supplied, make sure it is one of # the recognized investigation types if investigation_type is not None: cls.validate_investigation_type(investigation_type) # Get a connection handler conn_handler = SQLConnectionHandler() queue_name = "CREATE_PREP_TEMPLATE_%d" % raw_data.id conn_handler.create_queue(queue_name) # Check if the data_type is the id or the string if isinstance(data_type, (int, long)): data_type_id = data_type data_type_str = convert_from_id(data_type, "data_type", conn_handler) else: data_type_id = convert_to_id(data_type, "data_type", conn_handler) data_type_str = data_type md_template = cls._clean_validate_template(md_template, study.id, data_type_str, conn_handler) # Insert the metadata template # We need the prep_id for multiple calls below, which currently is not # supported by the queue system. Thus, executing this outside the queue prep_id = conn_handler.execute_fetchone( "INSERT INTO qiita.prep_template (data_type_id, raw_data_id, " "investigation_type) VALUES (%s, %s, %s) RETURNING " "prep_template_id", (data_type_id, raw_data.id, investigation_type))[0] cls._add_common_creation_steps_to_queue(md_template, prep_id, conn_handler, queue_name) try: conn_handler.execute_queue(queue_name) except Exception: # Clean up row from qiita.prep_template conn_handler.execute( "DELETE FROM qiita.prep_template where " "{0} = %s".format(cls._id_column), (prep_id,)) # Check if sample IDs present here but not in sample template sql = ("SELECT sample_id from qiita.required_sample_info WHERE " "study_id = %s") # Get list of study sample IDs, prep template study IDs, # and their intersection prep_samples = set(md_template.index.values) unknown_samples = prep_samples.difference( s[0] for s in conn_handler.execute_fetchall(sql, [study.id])) if unknown_samples: raise QiitaDBExecutionError( 'Samples found in prep template but not sample template: ' '%s' % ', '.join(unknown_samples)) # some other error we haven't seen before so raise it raise pt = cls(prep_id) pt.generate_files() return pt
# 23 Nov, 2014 # This patch creates all the qiime mapping files for the existing # prep templates from qiita_db.util import get_mountpoint from qiita_db.sql_connection import SQLConnectionHandler from qiita_db.metadata_template import PrepTemplate conn_handler = SQLConnectionHandler() _id, fp_base = get_mountpoint('templates')[0] for prep_template_id in conn_handler.execute_fetchall( "SELECT prep_template_id FROM qiita.prep_template"): prep_template_id = prep_template_id[0] pt = PrepTemplate(prep_template_id) study_id = pt.study_id for _, fpt in pt.get_filepaths(): pt.create_qiime_mapping_file(fpt)
def create_qiime_mapping_file(self): """This creates the QIIME mapping file and links it in the db. Returns ------- filepath : str The filepath of the created QIIME mapping file Raises ------ ValueError If the prep template is not a subset of the sample template QiitaDBWarning If the QIIME-required columns are not present in the template Notes ----- We cannot ensure that the QIIME-required columns are present in the metadata map. However, we have to generate a QIIME-compliant mapping file. Since the user may need a QIIME mapping file, but not these QIIME-required columns, we are going to create them and populate them with the value XXQIITAXX. """ rename_cols = { 'barcode': 'BarcodeSequence', 'primer': 'LinkerPrimerSequence', 'description': 'Description', } if 'reverselinkerprimer' in self.categories(): rename_cols['reverselinkerprimer'] = 'ReverseLinkerPrimer' new_cols = ['BarcodeSequence', 'LinkerPrimerSequence', 'ReverseLinkerPrimer'] else: new_cols = ['BarcodeSequence', 'LinkerPrimerSequence'] # getting the latest sample template conn_handler = SQLConnectionHandler() sql = """SELECT filepath_id, filepath FROM qiita.filepath JOIN qiita.sample_template_filepath USING (filepath_id) WHERE study_id=%s ORDER BY filepath_id DESC""" sample_template_fname = conn_handler.execute_fetchall( sql, (self.study_id,))[0][1] _, fp = get_mountpoint('templates')[0] sample_template_fp = join(fp, sample_template_fname) # reading files via pandas st = load_template_to_dataframe(sample_template_fp) pt = self.to_dataframe() st_sample_names = set(st.index) pt_sample_names = set(pt.index) if not pt_sample_names.issubset(st_sample_names): raise ValueError( "Prep template is not a sub set of the sample template, files" "%s - samples: %s" % (sample_template_fp, ', '.join(pt_sample_names-st_sample_names))) mapping = pt.join(st, lsuffix="_prep") mapping.rename(columns=rename_cols, inplace=True) # Pre-populate the QIIME-required columns with the value XXQIITAXX index = mapping.index placeholder = ['XXQIITAXX'] * len(index) missing = [] for val in viewvalues(rename_cols): if val not in mapping: missing.append(val) mapping[val] = pd.Series(placeholder, index=index) if missing: warnings.warn( "Some columns required to generate a QIIME-compliant mapping " "file are not present in the template. A placeholder value " "(XXQIITAXX) has been used to populate these columns. Missing " "columns: %s" % ', '.join(missing), QiitaDBWarning) # Gets the orginal mapping columns and readjust the order to comply # with QIIME requirements cols = mapping.columns.values.tolist() cols.remove('BarcodeSequence') cols.remove('LinkerPrimerSequence') cols.remove('Description') new_cols.extend(cols) new_cols.append('Description') mapping = mapping[new_cols] # figuring out the filepath for the QIIME map file _id, fp = get_mountpoint('templates')[0] filepath = join(fp, '%d_prep_%d_qiime_%s.txt' % (self.study_id, self.id, strftime("%Y%m%d-%H%M%S"))) # Save the mapping file mapping.to_csv(filepath, index_label='#SampleID', na_rep='', sep='\t') # adding the fp to the object self.add_filepath( filepath, fp_id=convert_to_id("qiime_map", "filepath_type")) return filepath
# Nov 22, 2014 # This patch is to create all the prep/sample template files and link them in # the database so they are present for download from os.path import join from time import strftime from qiita_db.util import get_mountpoint from qiita_db.sql_connection import SQLConnectionHandler from qiita_db.metadata_template import SampleTemplate, PrepTemplate conn_handler = SQLConnectionHandler() _id, fp_base = get_mountpoint('templates')[0] for study_id in conn_handler.execute_fetchall( "SELECT study_id FROM qiita.study"): study_id = study_id[0] if SampleTemplate.exists(study_id): st = SampleTemplate(study_id) fp = join(fp_base, '%d_%s.txt' % (study_id, strftime("%Y%m%d-%H%M%S"))) st.to_file(fp) st.add_filepath(fp) for prep_template_id in conn_handler.execute_fetchall( "SELECT prep_template_id FROM qiita.prep_template"): prep_template_id = prep_template_id[0] pt = PrepTemplate(prep_template_id) study_id = pt.study_id fp = join(fp_base, '%d_prep_%d_%s.txt' % (pt.study_id, prep_template_id, strftime("%Y%m%d-%H%M%S")))
# Mar 27, 2015 # Need to re-generate the files, given that some headers have changed from qiita_db.sql_connection import SQLConnectionHandler from qiita_db.metadata_template import SampleTemplate, PrepTemplate conn_handler = SQLConnectionHandler() # Get all the sample templates sql = """SELECT DISTINCT study_id from qiita.study_sample""" study_ids = {s[0] for s in conn_handler.execute_fetchall(sql)} for s_id in study_ids: SampleTemplate(s_id).generate_files() # Get all the prep templates sql = """SELECT prep_template_id from qiita.prep_template""" prep_ids = {p[0] for p in conn_handler.execute_fetchall(sql)} for prep_id in prep_ids: PrepTemplate(prep_id).generate_files()