def test_create_templates_from_qiime_mapping_file_reverse_linker(self): with TRN: TRN.add("SELECT last_value FROM " "qiita.prep_template_prep_template_id_seq") curr_id = TRN.execute_fetchflatten()[0] obs_st, obs_pt = create_templates_from_qiime_mapping_file( StringIO(QIIME_MAP_WITH_REVERSE_LINKER_PRIMER), self.new_study, "16S") # Be green: clean the environment for template in [obs_st, obs_pt]: for _, fp in template.get_filepaths(): self._clean_up_files.append(fp) self.assertEqual(obs_st.id, self.new_study.id) self.assertEqual(obs_pt.id, curr_id + 1) # Check that each template has the correct columns exp = {"physical_specimen_location", "physical_specimen_remaining", "dna_extracted", "sample_type", "host_subject_id", "latitude", "longitude", "taxon_id", "scientific_name", "collection_timestamp", "description"} self.assertEqual(set(obs_st.categories()), exp) exp = {"barcode", "primer", "center_name", "run_prefix", "platform", "library_construction_protocol", "instrument_model", "experiment_design_description", "reverselinkerprimer"} self.assertEqual(set(obs_pt.categories()), exp)
def test_create_templates_from_qiime_mapping_file_reverse_linker(self): with TRN: TRN.add("SELECT last_value FROM " "qiita.prep_template_prep_template_id_seq") curr_id = TRN.execute_fetchflatten()[0] obs_st, obs_pt = create_templates_from_qiime_mapping_file( StringIO(QIIME_MAP_WITH_REVERSE_LINKER_PRIMER), self.new_study, "16S") # Be green: clean the environment for template in [obs_st, obs_pt]: for _, fp in template.get_filepaths(): self._clean_up_files.append(fp) self.assertEqual(obs_st.id, self.new_study.id) self.assertEqual(obs_pt.id, curr_id + 1) # Check that each template has the correct columns exp = { "physical_specimen_location", "physical_specimen_remaining", "dna_extracted", "sample_type", "host_subject_id", "latitude", "longitude", "taxon_id", "scientific_name", "collection_timestamp", "description" } self.assertEqual(set(obs_st.categories()), exp) exp = { "barcode", "primer", "center_name", "run_prefix", "platform", "library_construction_protocol", "instrument_model", "experiment_design_description", "reverselinkerprimer" } self.assertEqual(set(obs_pt.categories()), exp)
def preprocessed_data(self): with TRN: sql = """SELECT preprocessed_data_id FROM qiita.prep_template_preprocessed_data WHERE prep_template_id=%s""" TRN.add(sql, [self.id]) return TRN.execute_fetchflatten()
def test_full_query_and_insertion(self): # let's archive different values from different jobs with TRN: # 3 - close reference picking # 3 - success sql = """SELECT processing_job_id FROM qiita.processing_job WHERE command_id = 3 AND processing_job_status_id = 3""" TRN.add(sql) jobs = TRN.execute_fetchflatten() # this is so we can also tests the parent merging scheme # 1 - split libraries sql = """UPDATE qiita.command_parameter SET check_biom_merge = True WHERE command_id = 1 and parameter_name = 'barcode_type'""" TRN.add(sql) TRN.execute() exp_all_features = {} for j in jobs: featureA = 'AA - %s' % j featureB = 'BB - %s' % j # testing that nothing is there data = {'job_id': j, 'features': [featureA, featureB]} obs = self.post( '/qiita_db/archive/observations/', headers=self.header, data=data) exp = {} self.assertEqual(obs.code, 200) self.assertEqual(loads(obs.body), exp) # inserting and testing insertion data = {'path': j, 'value': dumps({featureA: 'CA', featureB: 'CB'})} obs = self.patch( '/qiita_db/archive/observations/', headers=self.header, data=data) exp = {featureA: 'CA', featureB: 'CB'} self.assertEqual(obs.code, 200) self.assertEqual(loads(obs.body), exp) exp_all_features[featureA] = 'CA' exp_all_features[featureB] = 'CB' # testing retrieve all featues obs = Archive.retrieve_feature_values() self.assertEqual(obs, exp_all_features) # this doesn't exist so should be empty obs = Archive.retrieve_feature_values(archive_merging_scheme='') self.assertEqual(obs, {}) obs = Archive.retrieve_feature_values( archive_merging_scheme='Pick closed-reference OTUs | Split ' 'libraries FASTQ (barcode_type: golay_12)') self.assertEqual(obs, exp_all_features)
def test_execute_fetchflatten(self): with TRN: sql = """INSERT INTO qiita.test_table (str_column, int_column) VALUES (%s, %s)""" args = [["insert1", 1], ["insert2", 2], ["insert3", 3]] TRN.add(sql, args, many=True) sql = "SELECT str_column, int_column FROM qiita.test_table" TRN.add(sql) sql = "SELECT int_column FROM qiita.test_table" TRN.add(sql) obs = TRN.execute_fetchflatten() self.assertEqual(obs, [1, 2, 3]) sql = "SELECT 42" TRN.add(sql) obs = TRN.execute_fetchflatten(idx=3) self.assertEqual(obs, ["insert1", 1, "insert2", 2, "insert3", 3])
def _get_sample_ids(self): r"""Returns all the available samples for the metadata template Returns ------- set of str The set of all available sample ids """ with TRN: sql = "SELECT sample_id FROM qiita.{0} WHERE {1}=%s".format(self._table, self._id_column) TRN.add(sql, [self._id]) return set(TRN.execute_fetchflatten())
def metadata_headers(): """Returns metadata headers available Returns ------- list Alphabetical list of all metadata headers available """ with TRN: sql = """SELECT DISTINCT column_name FROM qiita.study_sample_columns ORDER BY column_name""" TRN.add(sql) return TRN.execute_fetchflatten()
def __call__(self, searchstr, user): """Runs a Study query and returns matching studies and samples Parameters ---------- searchstr : str Search string to use user : User object User making the search. Needed for permissions checks. Returns ------- dict Found samples in format {study_id: [[samp_id1, meta1, meta2, ...], [samp_id2, meta1, meta2, ...], ...} list metadata column names searched for Notes ----- Metadata information for each sample is in the same order as the metadata columns list returned Metadata column names and string searches are case-sensitive """ with TRN: study_sql, sample_sql, meta_headers = \ self._parse_study_search_string(searchstr, True) # get all studies containing the metadata headers requested TRN.add(study_sql) study_ids = set(TRN.execute_fetchflatten()) # strip to only studies user has access to if user.level not in {'admin', 'dev', 'superuser'}: study_ids = study_ids.intersection( Study.get_by_status('public') | user.user_studies | user.shared_studies) results = {} # run search on each study to get out the matching samples for sid in study_ids: TRN.add(sample_sql.format(sid)) study_res = TRN.execute_fetchindex() if study_res: # only add study to results if actually has samples # in results results[sid] = study_res self.results = results self.meta_headers = meta_headers return results, meta_headers
def is_test_environment(): """Checks if Qiita is running in a test environment Returns ------- bool Whether Qiita is running in a test environment or not Notes ----- Qiita is running in a test environment if: - It is connected to a test database, AND - The config file indicates that this is a test environment """ # Check that we are not in a production environment with TRN: TRN.add("SELECT test FROM settings") test_db = TRN.execute_fetchflatten()[0] return qiita_config.test_environment and test_db
# Feberuary 7, 2015 # This patch recreates all the QIIME mapping files to avoid lower/upper case # problems. See https://github.com/biocore/qiita/issues/799 # # heavily based on 7.py from os.path import basename from qiita_db.sql_connection import TRN from qiita_db.metadata_template import PrepTemplate with TRN: sql = "SELECT prep_template_id FROM qiita.prep_template" TRN.add(sql) all_ids = TRN.execute_fetchflatten() # remove all the bad mapping files for prep_template_id in all_ids: pt = PrepTemplate(prep_template_id) fps = pt.get_filepaths() # get the QIIME mapping file, note that the way to figure out what is # and what's not a qiime mapping file is to check for the existance of # the word qiime in the basename of the file path, hacky but that's # the way it is being done in qiita_pet/uimodules/raw_data_tab.py mapping_files = [f for f in fps if '_qiime_' in basename(f[1])] table = 'prep_template_filepath' column = 'prep_template_id' # unlink all the qiime mapping files for this prep template object
def calculate(finfo): try: size = getsize(finfo['fullpath']) except (FileNotFoundError, PermissionError): return finfo, None, None checksum = compute_checksum(finfo['fullpath']) return finfo['filepath_id'], checksum, size # get all filepaths and their filepath information; takes ~10 min with TRN: TRN.add("SELECT filepath_id FROM qiita.filepath") files = [] for fid in TRN.execute_fetchflatten(): files.append(get_filepath_information(fid)) # just get the filepath ids that haven't been processed, the file format # of this file is filepath_id[tab]checksum[tab]filesize fpath = join(dirname(abspath(__file__)), '74.py.cache.tsv') processed = [] if exists(fpath): with open(fpath, 'r') as f: processed = [ int(line.split('\t')[0]) for line in f.read().split('\n') if line != '' ] files_curr = [f for f in files if f['filepath_id'] not in processed] # let's use 20 processor and in each iteration use 120 files
from qiita_db.sql_connection import TRN with TRN: sql = """SELECT DISTINCT table_name FROM information_schema.columns WHERE table_name LIKE '%_bk'""" TRN.add(sql) tables = ['qiita.%s' % t for t in TRN.execute_fetchflatten()] chunk_size = 200 for i in range(0, len(tables), chunk_size): chunk = tables[i:chunk_size + i] sql = "DROP TABLE %s" % ', '.join(chunk) with TRN: TRN.add(sql) TRN.execute() TRN.commit()
def create(cls, md_template, study, data_type, investigation_type=None): r"""Creates the metadata template in the database Parameters ---------- md_template : DataFrame The metadata template file contents indexed by samples Ids study : Study The study to which the prep template belongs to. data_type : str or int The data_type of the prep template investigation_type : str, optional The investigation type, if relevant Returns ------- A new instance of `cls` to access to the PrepTemplate stored in the DB Raises ------ QiitaDBColumnError If the investigation_type is not valid If a required column is missing in md_template """ with TRN: # If the investigation_type is supplied, make sure it is one of # the recognized investigation types if investigation_type is not None: cls.validate_investigation_type(investigation_type) # Check if the data_type is the id or the string if isinstance(data_type, (int, long)): data_type_id = data_type data_type_str = convert_from_id(data_type, "data_type") else: data_type_id = convert_to_id(data_type, "data_type") data_type_str = data_type pt_cols = PREP_TEMPLATE_COLUMNS if data_type_str in TARGET_GENE_DATA_TYPES: pt_cols = deepcopy(PREP_TEMPLATE_COLUMNS) pt_cols.update(PREP_TEMPLATE_COLUMNS_TARGET_GENE) md_template = cls._clean_validate_template(md_template, study.id, pt_cols) # Insert the metadata template sql = """INSERT INTO qiita.prep_template (data_type_id, investigation_type) VALUES (%s, %s) RETURNING prep_template_id""" TRN.add(sql, [data_type_id, investigation_type]) prep_id = TRN.execute_fetchlast() try: cls._common_creation_steps(md_template, prep_id) except Exception: # Check if sample IDs present here but not in sample template sql = """SELECT sample_id from qiita.study_sample WHERE study_id = %s""" # Get list of study sample IDs, prep template study IDs, # and their intersection TRN.add(sql, [study.id]) prep_samples = set(md_template.index.values) unknown_samples = prep_samples.difference( TRN.execute_fetchflatten()) if unknown_samples: raise QiitaDBExecutionError( 'Samples found in prep template but not sample ' 'template: %s' % ', '.join(unknown_samples)) # some other error we haven't seen before so raise it raise # Link the prep template with the study sql = """INSERT INTO qiita.study_prep_template (study_id, prep_template_id) VALUES (%s, %s)""" TRN.add(sql, [study.id, prep_id]) TRN.execute() pt = cls(prep_id) pt.generate_files() return pt
# Nov 28, 2017 (only in py file) # Adding a new command into Qiita/Alpha: delete_analysis from qiita_db.software import Software, Command from qiita_db.sql_connection import TRN # Create the delete study command Command.create(Software.from_name_and_version('Qiita', 'alpha'), 'delete_analysis', 'Deletes a full analysis', {'analysis_id': ['integer', None]}) # Make sure that all validate commands have the "analysis" parameter with TRN: # Get all validate commands that are missing the analysis parameter sql = """SELECT command_id FROM qiita.software_command sc WHERE name = 'Validate' AND NOT ( SELECT EXISTS(SELECT * FROM qiita.command_parameter WHERE parameter_name = 'analysis' AND command_id = sc.command_id));""" TRN.add(sql) sql = """INSERT INTO qiita.command_parameter (command_id, parameter_name, parameter_type, required, default_value, name_order, check_biom_merge) VALUES (6, 'analysis', 'analysis', false, NULL, NULL, false)""" sql_params = [[cmd_id, 'analysis', 'analysis', False, None, None, False] for cmd_id in TRN.execute_fetchflatten()] TRN.add(sql, sql_params, many=True) TRN.execute()
def postgres_test(**kwargs): """Open a connection and query postgres""" from qiita_db.sql_connection import TRN with TRN: TRN.add("SELECT 42") return TRN.execute_fetchflatten()[0]
# Mar 27, 2015 # Need to re-generate the files, given that some headers have changed from qiita_db.sql_connection import TRN from qiita_db.metadata_template import SampleTemplate, PrepTemplate with TRN: # Get all the sample templates TRN.add("SELECT DISTINCT study_id from qiita.study_sample") study_ids = TRN.execute_fetchflatten() for s_id in study_ids: SampleTemplate(s_id).generate_files() # Get all the prep templates TRN.add("SELECT DISTINCT prep_template_id from qiita.prep_template") prep_ids = TRN.execute_fetchflatten() for prep_id in prep_ids: PrepTemplate(prep_id).generate_files()
from qiita_db.sql_connection import TRN # Due to the size of these changes we will with TRN: # select all table and column names from all sample template sql = """SELECT DISTINCT table_name FROM information_schema.columns WHERE (table_name LIKE 'sample_%' OR table_name LIKE 'prep_%') AND table_name NOT LIKE '%template%'""" TRN.add(sql) all_tables = TRN.execute_fetchflatten() for table in all_tables: with TRN: sql = """SELECT column_name FROM information_schema.columns WHERE table_name = %s ORDER BY column_name""" TRN.add(sql, [table]) for column in TRN.execute_fetchflatten(): sql = "ALTER TABLE qiita.%s ALTER COLUMN %s TYPE VARCHAR" % ( table, column) TRN.add(sql) TRN.execute()
nans = tuple(NA_VALUES) false_vals = tuple(FALSE_VALUES) true_vals = tuple(TRUE_VALUES) st_update = set() pr_update = set() with TRN: sql = """SELECT table_name FROM information_schema.tables WHERE table_schema='qiita' AND (table_name SIMILAR TO 'sample\_[0-9]+' OR table_name SIMILAR TO 'prep\_[0-9]+')""" TRN.add(sql) tables = TRN.execute_fetchflatten() cols_sql = """SELECT column_name FROM information_schema.columns WHERE table_name = %s AND data_type = 'character varying'""" alter_sql = """ALTER TABLE qiita.{0} ALTER COLUMN {1} TYPE bool USING CASE WHEN {1} IN %s THEN FALSE WHEN {1} IN %s THEN TRUE END""" null_sql = "UPDATE qiita.{0} SET {1} = NULL WHERE {1} IN %s" ssc_update_sql = """UPDATE qiita.study_sample_columns SET column_type = 'bool' WHERE study_id = %s AND column_name = %s"""
# make the RawData to be effectively just a container for the raw files, # which is how it was acting previously. from qiita_db.sql_connection import TRN from qiita_db.data import RawData from qiita_db.util import move_filepaths_to_upload_folder with TRN: # the system may contain raw data with no prep template associated to it. # Retrieve all those raw data ids sql = """SELECT raw_data_id FROM qiita.raw_data WHERE raw_data_id NOT IN ( SELECT DISTINCT raw_data_id FROM qiita.prep_template);""" TRN.add(sql) rd_ids = TRN.execute_fetchflatten() # We will delete those RawData. However, if they have files attached, we # should move them to the uploads folder of the study sql_detach = """DELETE FROM qiita.study_raw_data WHERE raw_data_id = %s AND study_id = %s""" sql_unlink = "DELETE FROM qiita.raw_filepath WHERE raw_data_id = %s" sql_delete = "DELETE FROM qiita.raw_data WHERE raw_data_id = %s" sql_studies = """SELECT study_id FROM qiita.study_raw_data WHERE raw_data_id = %s""" move_files = [] for rd_id in rd_ids: rd = RawData(rd_id) filepaths = rd.get_filepaths() TRN.add(sql_studies, [rd_id]) studies = TRN.execute_fetchflatten()
# 23 Nov, 2014 # This patch creates all the qiime mapping files for the existing # prep templates from qiita_db.util import get_mountpoint from qiita_db.sql_connection import TRN from qiita_db.metadata_template import PrepTemplate with TRN: _id, fp_base = get_mountpoint('templates')[0] TRN.add("SELECT prep_template_id FROM qiita.prep_template") for prep_template_id in TRN.execute_fetchflatten(): pt = PrepTemplate(prep_template_id) study_id = pt.study_id for _, fpt in pt.get_filepaths(): pt.create_qiime_mapping_file(fpt)
import pandas as pd from os.path import join, dirname, abspath, exists from qiita_db.sql_connection import TRN with TRN: sql = """SELECT filepath_id FROM qiita.filepath""" TRN.add(sql) fids = TRN.execute_fetchflatten() fpath = join(dirname(abspath(__file__)), 'support_files', 'patches', 'python_patches', '74.py.cache.tsv') if not exists(fpath): raise ValueError("%s doesn't exits, have you run step 1?" % fpath) df = pd.read_csv(fpath, sep='\t', index_col=0, dtype=str, names=['filepath_id', 'checksum', 'fp_size']) cache = df.to_dict('index') args = [] for fid in fids: if fid not in cache: print('missing: %d', fid) else: args.append([cache[fid]['fp_size'], cache[fid]['checksum'], fid]) with TRN: sql = """UPDATE qiita.filepath SET fp_size = %s, checksum = %s
# The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- from qiita_db.sql_connection import TRN # Due to the size of these changes we will with TRN: # select all table and column names from all sample template sql = """SELECT DISTINCT table_name FROM information_schema.columns WHERE (table_name LIKE 'sample_%' OR table_name LIKE 'prep_%') AND table_name NOT LIKE '%template%'""" TRN.add(sql) all_tables = TRN.execute_fetchflatten() for table in all_tables: with TRN: sql = """SELECT column_name FROM information_schema.columns WHERE table_name = %s ORDER BY column_name""" TRN.add(sql, [table]) for column in TRN.execute_fetchflatten(): sql = "ALTER TABLE qiita.%s ALTER COLUMN %s TYPE VARCHAR" % ( table, column) TRN.add(sql) TRN.execute()
def calculate(finfo): try: size = getsize(finfo['fullpath']) except (FileNotFoundError, PermissionError): return finfo, None, None checksum = compute_checksum(finfo['fullpath']) return finfo['filepath_id'], checksum, size # get all filepaths and their filepath information; takes ~10 min with TRN: TRN.add("SELECT filepath_id FROM qiita.filepath") files = [] for fid in TRN.execute_fetchflatten(): files.append(get_filepath_information(fid)) # just get the filepath ids that haven't been processed, the file format # of this file is filepath_id[tab]checksum[tab]filesize fpath = join(dirname(abspath(__file__)), '74.py.cache.tsv') processed = [] if exists(fpath): with open(fpath, 'r') as f: processed = [int(line.split('\t')[0]) for line in f.read().split('\n') if line != ''] files_curr = [f for f in files if f['filepath_id'] not in processed] # let's use 20 processor and in each iteration use 120 files fids = 120