def delete_sample_template(self, study, user, callback): """Delete sample template Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done """ sample_template_id = int(self.get_argument('sample_template_id')) try: SampleTemplate.delete(sample_template_id) msg = ("Sample template %d has been deleted from study: " "<b><i>%s</i></b>" % (sample_template_id, study.title)) msg_level = "success" except Exception as e: msg = "Couldn't remove %d sample template: %s" % ( sample_template_id, str(e)) msg_level = "danger" callback((msg, msg_level, 'study_information_tab', None, None))
def sample_template_columns_get_req(study_id, column, user): """Returns the columns of the sample template Parameters ---------- study_id: int The study to retrieve the sample information summary column: str The column of interest, if None send all columns user: qiita_db.user The user performing the request Returns ------- list of str The result of the search Raises ------ HTTPError 404 If the sample template doesn't exist """ # Check if the current user has access to the study and if the sample # template exists sample_template_checks(study_id, user, check_exists=True) if column is None: reply = SampleTemplate(study_id).categories() else: reply = list(SampleTemplate(study_id).get_category(column).values()) return reply
def update_sample_template(self, study, user, callback): """Update a sample template from the POST method Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done Raises ------ HTTPError If the sample template file does not exists """ # If we are on this function, the argument "sample_template" must # defined. If not, let tornado raise its error sample_template = self.get_argument('sample_template') # Define here the message and message level in case of success msg = "The sample template '%s' has been updated" % sample_template msg_level = "success" # Get the uploads folder _, base_fp = get_mountpoint("uploads")[0] # Get the path of the sample template in the uploads folder fp_rsp = join(base_fp, str(study.id), sample_template) if not exists(fp_rsp): # The file does not exist, fail nicely raise HTTPError(400, "This file doesn't exist: %s" % fp_rsp) try: with warnings.catch_warnings(record=True) as warns: # deleting previous uploads and inserting new one st = SampleTemplate(study.id) df = load_template_to_dataframe(fp_rsp) st.extend(df) st.update(df) remove(fp_rsp) # join all the warning messages into one. Note that this info # will be ignored if an exception is raised if warns: msg = '\n'.join(set(str(w.message) for w in warns)) msg_level = 'warning' except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, KeyError, CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e: # Some error occurred while processing the sample template # Show the error to the user so they can fix the template msg = html_error_message % ('updating the sample template:', basename(fp_rsp), str(e)) msg = convert_text_html(msg) msg_level = "danger" callback((msg, msg_level, None, None, None))
def sample_template_overview_handler_get_request(study_id, user): # Check if the current user has access to the sample template sample_template_checks(study_id, user) # Check if the sample template exists exists = SampleTemplate.exists(study_id) # The following information should always be provided: # The files that have been uploaded to the system and can be a # sample template file files = [f for _, f, _ in get_files_from_uploads_folders(study_id) if f.endswith(('txt', 'tsv', 'xlsx'))] # If there is a job associated with the sample information, the job id job = None job_info = r_client.get(SAMPLE_TEMPLATE_KEY_FORMAT % study_id) if job_info: job = loads(job_info)['job_id'] # Specific information if it exists or not: data_types = [] st_fp_id = None old_files = [] num_samples = 0 num_cols = 0 if exists: # If it exists we need to provide: # The id of the sample template file so the user can download it and # the list of old filepaths st = SampleTemplate(study_id) all_st_files = st.get_filepaths() # The current sample template file is the first one in the list # (pop(0)) and we are interested only in the id ([0]) st_fp_id = all_st_files.pop(0)[0] # For the old filepaths we are only interested in their basename old_files = [basename(fp) for _, fp in all_st_files] # The number of samples - this is a space efficient way of counting # the number of samples. Doing len(list(st.keys())) creates a list # that we are not using num_samples = sum(1 for _ in st.keys()) # The number of columns num_cols = len(st.categories()) else: # It doesn't exist, we also need to provide the data_types in case # the user uploads a QIIME mapping file data_types = sorted(data_types_get_req()['data_types']) return {'exists': exists, 'uploaded_files': files, 'data_types': data_types, 'user_can_edit': Study(study_id).can_edit(user), 'job': job, 'download_id': st_fp_id, 'old_files': old_files, 'num_samples': num_samples, 'num_columns': num_cols}
def tearDown(self): for fp in self._clean_up_files: if exists(fp): remove(fp) study_id = self.new_study.id for pt in self.new_study.prep_templates(): PrepTemplate.delete(pt.id) if SampleTemplate.exists(study_id): SampleTemplate.delete(study_id) Study.delete(study_id)
def create_sample_template(fp, study, is_mapping_file, data_type=None): """Creates a sample template Parameters ---------- fp : str The file path to the template file study : qiita_db.study.Study The study to add the sample template to is_mapping_file : bool Whether `fp` contains a mapping file or a sample template data_type : str, optional If `is_mapping_file` is True, the data type of the prep template to be created Returns ------- dict of {str: str} A dict of the form {'status': str, 'message': str} """ # The imports need to be in here because this code is executed in # the ipython workers import warnings from os import remove from qiita_db.metadata_template.sample_template import SampleTemplate from qiita_db.metadata_template.util import load_template_to_dataframe from qiita_ware.metadata_pipeline import ( create_templates_from_qiime_mapping_file) status = 'success' msg = '' try: with warnings.catch_warnings(record=True) as warns: if is_mapping_file: create_templates_from_qiime_mapping_file(fp, study, data_type) else: SampleTemplate.create(load_template_to_dataframe(fp), study) remove(fp) # join all the warning messages into one. Note that this # info will be ignored if an exception is raised if warns: msg = '\n'.join(set(str(w.message) for w in warns)) status = 'warning' except Exception as e: # Some error occurred while processing the sample template # Show the error to the user so they can fix the template status = 'danger' msg = str(e) return {'status': status, 'message': msg}
def get(self, prep_template_id): pid = int(prep_template_id) pt = PrepTemplate(pid) sid = pt.study_id self._check_permissions(sid) st = SampleTemplate(sid) text = st.to_dataframe(samples=list(pt)).to_csv(None, sep='\t') self._finish_generate_files( 'sample_information_from_prep_%s.tsv' % pid, text)
def display_template(self, preprocessed_data_id, msg, msg_level): """Simple function to avoid duplication of code""" preprocessed_data_id = int(preprocessed_data_id) try: preprocessed_data = Artifact(preprocessed_data_id) except QiitaDBUnknownIDError: raise HTTPError( 404, "Artifact %d does not exist!" % preprocessed_data_id) else: user = self.current_user if user.level != 'admin': raise HTTPError( 403, "No permissions of admin, " "get/VAMPSSubmitHandler: %s!" % user.id) prep_template = PrepTemplate(preprocessed_data.prep_template) sample_template = SampleTemplate(preprocessed_data.study) study = Study(preprocessed_data.study) stats = [('Number of samples', len(prep_template)), ('Number of metadata headers', len(sample_template.categories()))] demux = [ path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == 'preprocessed_demux' ] demux_length = len(demux) if not demux_length: msg = ("Study does not appear to have demultiplexed " "sequences associated") msg_level = 'danger' elif demux_length > 1: msg = ("Study appears to have multiple demultiplexed files!") msg_level = 'danger' elif demux_length == 1: demux_file = demux[0] demux_file_stats = demux_stats(demux_file) stats.append(('Number of sequences', demux_file_stats.n)) msg_level = 'success' self.render('vamps_submission.html', study_title=study.title, stats=stats, message=msg, study_id=study.id, level=msg_level, preprocessed_data_id=preprocessed_data_id)
def display_template(self, study, user, msg, msg_level, full_access, top_tab=None, sub_tab=None, prep_tab=None): """Simple function to avoid duplication of code""" study_status = study.status user_level = user.level sample_template_exists = SampleTemplate.exists(study.id) if sample_template_exists: st = SampleTemplate(study.id) missing_cols = st.check_restrictions( [SAMPLE_TEMPLATE_COLUMNS['qiita_main']]) allow_approval = len(missing_cols) == 0 approval_deny_msg = ( "Processed data approval request is disabled due to missing " "columns in the sample template: %s" % ', '.join(missing_cols)) else: allow_approval = False approval_deny_msg = "" # The general information of the study can be changed if the study is # not public or if the user is an admin, in which case they can always # modify the information of the study show_edit_btn = study_status != 'public' or user_level == 'admin' # Make the error message suitable for html msg = msg.replace('\n', "<br/>") self.render('study_description.html', message=msg, level=msg_level, study=study, study_title=study.title, study_alias=study.info['study_alias'], show_edit_btn=show_edit_btn, show_data_tabs=sample_template_exists, full_access=full_access, allow_approval=allow_approval, approval_deny_msg=approval_deny_msg, top_tab=top_tab, sub_tab=sub_tab, prep_tab=prep_tab)
def sample_template_checks(study_id, user, check_exists=False): """Performs different checks and raises errors if any of the checks fail Parameters ---------- study_id : int The study id user : qiita_db.user.User The user trying to access the study check_exists : bool, optional If true, check if the sample template exists Raises ------ HTTPError 404 if the study does not exist 403 if the user does not have access to the study 404 if check_exists == True and the sample template doesn't exist """ try: study = Study(int(study_id)) except QiitaDBUnknownIDError: raise HTTPError(404, reason='Study does not exist') if not study.has_access(user): raise HTTPError(403, reason='User does not have access to study') # Check if the sample template exists if check_exists and not SampleTemplate.exists(study_id): raise HTTPError(404, reason="Study %s doesn't have sample information" % study_id)
def sample_template_samples_get_req(samp_id, user_id): """Returns list of samples in the sample template Parameters ---------- samp_id : int or str typecastable to int SampleTemplate id to get info for user_id : str User requesting the sample template info Returns ------- dict Returns summary information in the form {'status': str, 'message': str, 'samples': list of str} samples is list of samples in the template """ exists = _check_sample_template_exists(int(samp_id)) if exists['status'] != 'success': return exists access_error = check_access(samp_id, user_id) if access_error: return access_error return { 'status': 'success', 'message': '', 'samples': sorted(x for x in SampleTemplate(int(samp_id))) }
def get(self, study_id): sid = int(study_id) self._check_permissions(sid) self._generate_files( 'sample_accession', SampleTemplate(sid).ebi_sample_accessions, 'ebi_sample_accessions_study_%s.tsv' % sid)
def sample_template_patch_request(user_id, req_op, req_path, req_value=None, req_from=None): """Modifies an attribute of the artifact Parameters ---------- user_id : str The id of the user performing the patch operation req_op : str The operation to perform on the artifact req_path : str The prep information and attribute to patch req_value : str, optional The value that needs to be modified req_from : str, optional The original path of the element Returns ------- dict of {str, str} A dictionary with the following keys: - status: str, whether if the request is successful or not - message: str, if the request is unsuccessful, a human readable error """ if req_op == 'remove': req_path = [v for v in req_path.split('/') if v] if len(req_path) != 3: return {'status': 'error', 'message': 'Incorrect path parameter'} st_id = req_path[0] attribute = req_path[1] attr_id = req_path[2] # Check if the user actually has access to the template st = SampleTemplate(st_id) access_error = check_access(st.study_id, user_id) if access_error: return access_error # Offload the deletion of the sample or column to the cluster job_id = safe_submit(user_id, delete_sample_or_column, SampleTemplate, int(st_id), attribute, attr_id) # Store the job id attaching it to the sample template id r_client.set(SAMPLE_TEMPLATE_KEY_FORMAT % st_id, dumps({'job_id': job_id})) return {'status': 'success', 'message': ''} else: return { 'status': 'error', 'message': 'Operation "%s" not supported. ' 'Current supported operations: remove' % req_op }
def test_update_sample_template(self): fd, fp = mkstemp(suffix=".txt") close(fd) with open(fp, 'w') as f: f.write("sample_name\tnew_col\n1.SKD6.640190\tnew_value") self._clean_up_files.append(fp) job = self._create_job('update_sample_template', { 'study': 1, 'template_fp': fp }) private_task(job.id) self.assertEqual(job.status, 'success') self.assertEqual( SampleTemplate(1)['1.SKD6.640190']['new_col'], 'new_value') obs = r_client.get("sample_template_1") self.assertIsNotNone(obs) obs = loads(obs) self.assertCountEqual(obs, ['job_id', 'alert_type', 'alert_msg']) self.assertEqual(obs['job_id'], job.id) self.assertEqual(obs['alert_type'], 'warning') self.assertIn( 'The following columns have been added to the existing ' 'template: new_col', obs['alert_msg']) # making sure that the error name is not in the messages self.assertNotIn('QiitaDBWarning', obs['alert_msg'])
def test_delete_sample_template(self): # Error case job = self._create_job('delete_sample_template', {'study': 1}) private_task(job.id) self.assertEqual(job.status, 'error') self.assertIn( "Sample template cannot be erased because there are " "prep templates associated", job.log.msg) # Success case info = { "timeseries_type_id": '1', "metadata_complete": 'true', "mixs_compliant": 'true', "number_samples_collected": 25, "number_samples_promised": 28, "study_alias": "TDST", "study_description": "Test delete sample template", "study_abstract": "Test delete sample template", "principal_investigator_id": StudyPerson(1) } study = Study.create(User('*****@*****.**'), "Delete Sample Template test", info) metadata = pd.DataFrame.from_dict( { 'Sample1': { 'physical_specimen_location': 'location1', 'physical_specimen_remaining': 'true', 'dna_extracted': 'true', 'sample_type': 'type1', 'collection_timestamp': '2014-05-29 12:24:15', 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 1', 'latitude': '42.42', 'longitude': '41.41', 'taxon_id': '9606', 'scientific_name': 'h**o sapiens' } }, orient='index', dtype=str) SampleTemplate.create(metadata, study) job = self._create_job('delete_sample_template', {'study': study.id}) private_task(job.id) self.assertEqual(job.status, 'success') self.assertFalse(SampleTemplate.exists(study.id))
def display_template(self, preprocessed_data_id, msg, msg_level): """Simple function to avoid duplication of code""" preprocessed_data_id = int(preprocessed_data_id) try: preprocessed_data = Artifact(preprocessed_data_id) except QiitaDBUnknownIDError: raise HTTPError(404, "Artifact %d does not exist!" % preprocessed_data_id) else: user = self.current_user if user.level != "admin": raise HTTPError(403, "No permissions of admin, " "get/VAMPSSubmitHandler: %s!" % user.id) prep_template = PrepTemplate(preprocessed_data.prep_template) sample_template = SampleTemplate(preprocessed_data.study) study = Study(preprocessed_data.study) stats = [ ("Number of samples", len(prep_template)), ("Number of metadata headers", len(sample_template.categories())), ] demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == "preprocessed_demux"] demux_length = len(demux) if not demux_length: msg = "Study does not appear to have demultiplexed " "sequences associated" msg_level = "danger" elif demux_length > 1: msg = "Study appears to have multiple demultiplexed files!" msg_level = "danger" elif demux_length == 1: demux_file = demux[0] demux_file_stats = demux_stats(demux_file) stats.append(("Number of sequences", demux_file_stats.n)) msg_level = "success" self.render( "vamps_submission.html", study_title=study.title, stats=stats, message=msg, study_id=study.id, level=msg_level, preprocessed_data_id=preprocessed_data_id, )
def get(self, message="", msg_level=None): all_emails_except_current = yield Task(self._get_all_emails) all_emails_except_current.remove(self.current_user.id) avail_meta = SampleTemplate.metadata_headers() +\ get_table_cols("study") self.render('list_studies.html', availmeta=avail_meta, all_emails_except_current=all_emails_except_current, message=message, msg_level=msg_level)
def sample_template_summary_get_req(study_id, user): """Returns a summary of the sample template metadata columns Parameters ---------- study_id: int The study to retrieve the sample information summary user: qiita_db.user The user performing the request Returns ------- dict of {str: object} Keys are metadata categories and the values are list of tuples. Each tuple is an observed value in the category and the number of times it's seen. Raises ------ HTTPError 404 If the sample template doesn't exist """ # Check if the current user has access to the study and if the sample # template exists sample_template_checks(study_id, user, check_exists=True) st = SampleTemplate(study_id) df = st.to_dataframe() # Drop the study_id column if it exists if 'study_id' in df.columns: df.drop('study_id', axis=1, inplace=True) res = {} for column in df.columns: counts = df[column].value_counts() res[str(column)] = [(str(key), counts[key]) for key in natsorted( counts.index, key=lambda x: unicode(x, errors='ignore'))] return res
def update_sample_template(study_id, fp): """Updates a sample template Parameters ---------- study_id : int Study id whose template is going to be updated fp : str The file path to the template file Returns ------- dict of {str: str} A dict of the form {'status': str, 'message': str} """ import warnings from os import remove from qiita_db.metadata_template.util import load_template_to_dataframe from qiita_db.metadata_template.sample_template import SampleTemplate msg = '' status = 'success' try: with warnings.catch_warnings(record=True) as warns: # deleting previous uploads and inserting new one st = SampleTemplate(study_id) df = load_template_to_dataframe(fp) st.extend(df) st.update(df) remove(fp) # join all the warning messages into one. Note that this info # will be ignored if an exception is raised if warns: msg = '\n'.join(set(str(w.message) for w in warns)) status = 'warning' except Exception as e: status = 'danger' msg = str(e) return {'status': status, 'message': msg}
def test_get_lat_longs_EMP_portal(self): info = { 'timeseries_type_id': 1, 'lab_person_id': None, 'principal_investigator_id': 3, 'metadata_complete': False, 'mixs_compliant': True, 'study_description': 'desc', 'study_alias': 'alias', 'study_abstract': 'abstract'} study = Study.create(User('*****@*****.**'), 'test_study_1', efo=[1], info=info) Portal('EMP').add_studies([study.id]) md = { 'my.sample': { 'physical_specimen_location': 'location1', 'physical_specimen_remaining': True, 'dna_extracted': True, 'sample_type': 'type1', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 4', 'str_column': 'Value for sample 4', 'int_column': 4, 'latitude': 42.42, 'longitude': 41.41, 'taxon_id': 9606, 'scientific_name': 'h**o sapiens'} } md_ext = pd.DataFrame.from_dict(md, orient='index') SampleTemplate.create(md_ext, study) qiita_config.portal = 'EMP' obs = get_lat_longs() exp = [[42.42, 41.41]] self.assertItemsEqual(obs, exp)
def sample_template_filepaths_get_req(study_id, user_id): """Returns all the filepaths attached to the sample template Parameters ---------- study_id : int The current study object id user_id : str The current user object id Returns ------- dict Filepaths in the form {'status': status, 'message': msg, 'filepaths': filepaths} status can be success, warning, or error depending on result message has the warnings or errors filepaths is a list of tuple of int and str All files in the sample template, as [(id, URL), ...] """ exists = _check_sample_template_exists(int(study_id)) if exists['status'] != 'success': return exists access_error = check_access(study_id, user_id) if access_error: return access_error try: template = SampleTemplate(int(study_id)) except QiitaDBUnknownIDError as e: return {'status': 'error', 'message': str(e)} return { 'status': 'success', 'message': '', 'filepaths': template.get_filepaths() }
def sample_template_overview_handler_get_request(study_id, user): # Check if the current user has access to the sample template sample_template_checks(study_id, user) # Check if the sample template exists exists = SampleTemplate.exists(study_id) # The following information should always be provided: # The files that have been uploaded to the system and can be a # sample template file files = [ f for _, f in get_files_from_uploads_folders(study_id) if f.endswith(('txt', 'tsv')) ] # If there is a job associated with the sample information, the job id job = None job_info = r_client.get(SAMPLE_TEMPLATE_KEY_FORMAT % study_id) if job_info: job = loads(job_info)['job_id'] # Specific information if it exists or not: data_types = [] st_fp_id = None old_files = [] num_samples = 0 num_cols = 0 if exists: # If it exists we need to provide: # The id of the sample template file so the user can download it and # the list of old filepaths st = SampleTemplate(study_id) all_st_files = st.get_filepaths() # The current sample template file is the first one in the list # (pop(0)) and we are interested only in the id ([0]) st_fp_id = all_st_files.pop(0)[0] # For the old filepaths we are only interested in their basename old_files = [basename(fp) for _, fp in all_st_files] # The number of samples - this is a space efficient way of counting # the number of samples. Doing len(list(st.keys())) creates a list # that we are not using num_samples = sum(1 for _ in st.keys()) # The number of columns num_cols = len(st.categories()) else: # It doesn't exist, we also need to provide the data_types in case # the user uploads a QIIME mapping file data_types = sorted(data_types_get_req()['data_types']) return { 'exists': exists, 'uploaded_files': files, 'data_types': data_types, 'user_can_edit': Study(study_id).can_edit(user), 'job': job, 'download_id': st_fp_id, 'old_files': old_files, 'num_samples': num_samples, 'num_columns': num_cols }
def test_delete_sample_template(self): # Error case job = self._create_job('delete_sample_template', {'study': 1}) private_task(job.id) self.assertEqual(job.status, 'error') self.assertIn("Sample template cannot be erased because there are " "prep templates associated", job.log.msg) # Success case info = {"timeseries_type_id": '1', "metadata_complete": 'true', "mixs_compliant": 'true', "number_samples_collected": 25, "number_samples_promised": 28, "study_alias": "TDST", "study_description": "Test delete sample template", "study_abstract": "Test delete sample template", "principal_investigator_id": StudyPerson(1)} study = Study.create(User('*****@*****.**'), "Delete Sample Template test", info) metadata = pd.DataFrame.from_dict( {'Sample1': {'physical_specimen_location': 'location1', 'physical_specimen_remaining': 'true', 'dna_extracted': 'true', 'sample_type': 'type1', 'collection_timestamp': '2014-05-29 12:24:15', 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 1', 'latitude': '42.42', 'longitude': '41.41', 'taxon_id': '9606', 'scientific_name': 'h**o sapiens'}}, orient='index', dtype=str) SampleTemplate.create(metadata, study) job = self._create_job('delete_sample_template', {'study': study.id}) private_task(job.id) self.assertEqual(job.status, 'success') self.assertFalse(SampleTemplate.exists(study.id))
def sample_template_filepaths_get_req(study_id, user_id): """Returns all the filepaths attached to the sample template Parameters ---------- study_id : int The current study object id user_id : str The current user object id Returns ------- dict Filepaths in the form {'status': status, 'message': msg, 'filepaths': filepaths} status can be success, warning, or error depending on result message has the warnings or errors filepaths is a list of tuple of int and str All files in the sample template, as [(id, URL), ...] """ exists = _check_sample_template_exists(int(study_id)) if exists['status'] != 'success': return exists access_error = check_access(study_id, user_id) if access_error: return access_error try: template = SampleTemplate(int(study_id)) except QiitaDBUnknownIDError as e: return {'status': 'error', 'message': str(e)} return {'status': 'success', 'message': '', 'filepaths': template.get_filepaths() }
def sample_template_get_req(samp_id, user_id): """Gets the json of the full sample template Parameters ---------- samp_id : int or int castable string SampleTemplate id to get info for user_id : str User requesting the sample template info Returns ------- dict of objects {'status': status, 'message': msg, 'template': dict of {str: {str: object, ...}, ...} template is dictionary where the keys access_error the metadata samples and the values are a dictionary of column and value. Format {sample: {column: value, ...}, ...} """ exists = _check_sample_template_exists(int(samp_id)) if exists['status'] != 'success': return exists access_error = check_access(int(samp_id), user_id) if access_error: return access_error template = SampleTemplate(int(samp_id)) access_error = check_access(template.study_id, user_id) if access_error: return access_error df = template.to_dataframe() return { 'status': 'success', 'message': '', 'template': df.to_dict(orient='index') }
def sample_template_category_get_req(category, samp_id, user_id): """Returns dict of values for each sample in the given category Parameters ---------- category : str Metadata category to get values for samp_id : int or str typecastable to int SampleTemplate id to get info for user_id : str User requesting the sample template info Returns ------- dict Returns information in the form {'status': str, 'message': str, 'values': dict of {str: object}} """ exists = _check_sample_template_exists(int(samp_id)) if exists['status'] != 'success': return exists access_error = check_access(samp_id, user_id) if access_error: return access_error st = SampleTemplate(int(samp_id)) try: values = st.get_category(category) except QiitaDBColumnError: return { 'status': 'error', 'message': 'Category %s does not exist in sample template' % category } return {'status': 'success', 'message': '', 'values': values}
def sample_template_category_get_req(category, samp_id, user_id): """Returns dict of values for each sample in the given category Parameters ---------- category : str Metadata category to get values for samp_id : int or str typecastable to int SampleTemplate id to get info for user_id : str User requesting the sample template info Returns ------- dict Returns information in the form {'status': str, 'message': str, 'values': dict of {str: object}} """ exists = _check_sample_template_exists(int(samp_id)) if exists['status'] != 'success': return exists access_error = check_access(samp_id, user_id) if access_error: return access_error st = SampleTemplate(int(samp_id)) try: values = st.get_category(category) except QiitaDBColumnError: return {'status': 'error', 'message': 'Category %s does not exist in sample template' % category} return {'status': 'success', 'message': '', 'values': values}
def delete_sample_template(study_id): """Delete a sample template Parameters ---------- study_id : int Study id whose template is going to be deleted Returns ------- dict of {str: str} A dict of the form {'status': str, 'message': str} """ from qiita_db.metadata_template.sample_template import SampleTemplate msg = '' status = 'success' try: SampleTemplate.delete(study_id) except Exception as e: status = 'danger' msg = str(e) return {'status': status, 'message': msg}
def sample_template_get_req(samp_id, user_id): """Gets the json of the full sample template Parameters ---------- samp_id : int or int castable string SampleTemplate id to get info for user_id : str User requesting the sample template info Returns ------- dict of objects {'status': status, 'message': msg, 'template': dict of {str: {str: object, ...}, ...} template is dictionary where the keys access_error the metadata samples and the values are a dictionary of column and value. Format {sample: {column: value, ...}, ...} """ exists = _check_sample_template_exists(int(samp_id)) if exists['status'] != 'success': return exists access_error = check_access(int(samp_id), user_id) if access_error: return access_error template = SampleTemplate(int(samp_id)) access_error = check_access(template.study_id, user_id) if access_error: return access_error df = template.to_dataframe() return {'status': 'success', 'message': '', 'template': df.to_dict(orient='index')}
def _check_sample_template_exists(samp_id): """Make sure a sample template exists in the system Parameters ---------- samp_id : int or str castable to int SampleTemplate id to check Returns ------- dict {'status': status, 'message': msg} """ if not SampleTemplate.exists(int(samp_id)): return { 'status': 'error', 'message': 'Sample template %d does not exist' % int(samp_id) } return {'status': 'success', 'message': ''}
def _check_sample_template_exists(samp_id): """Make sure a sample template exists in the system Parameters ---------- samp_id : int or str castable to int SampleTemplate id to check Returns ------- dict {'status': status, 'message': msg} """ if not SampleTemplate.exists(int(samp_id)): return {'status': 'error', 'message': 'Sample template %d does not exist' % int(samp_id) } return {'status': 'success', 'message': ''}
def test_delete_sample_or_column(self): st = SampleTemplate(1) # Delete a sample template column job = self._create_job( 'delete_sample_or_column', { 'obj_class': 'SampleTemplate', 'obj_id': 1, 'sample_or_col': 'columns', 'name': 'season_environment' }) private_task(job.id) self.assertEqual(job.status, 'success') self.assertNotIn('season_environment', st.categories()) # Delete a sample template sample - need to add one # sample that we will remove npt.assert_warns( QiitaDBWarning, st.extend, pd.DataFrame.from_dict({'Sample1': { 'taxon_id': '9606' }}, orient='index', dtype=str)) self.assertIn('1.Sample1', st.keys()) job = self._create_job( 'delete_sample_or_column', { 'obj_class': 'SampleTemplate', 'obj_id': 1, 'sample_or_col': 'samples', 'name': '1.Sample1' }) private_task(job.id) self.assertEqual(job.status, 'success') self.assertNotIn('1.Sample1', st.keys()) # Delete a prep template column pt = PrepTemplate(1) job = self._create_job( 'delete_sample_or_column', { 'obj_class': 'PrepTemplate', 'obj_id': 1, 'sample_or_col': 'columns', 'name': 'target_subfragment' }) private_task(job.id) self.assertEqual(job.status, 'success') self.assertNotIn('target_subfragment', pt.categories()) # Delete a prep template sample metadata = pd.DataFrame.from_dict( { '1.SKB8.640193': { 'barcode': 'GTCCGCAAGTTA', 'primer': 'GTGCCAGCMGCCGCGGTAA' }, '1.SKD8.640184': { 'barcode': 'CGTAGAGCTCTC', 'primer': 'GTGCCAGCMGCCGCGGTAA' } }, orient='index', dtype=str) pt = npt.assert_warns(QiitaDBWarning, PrepTemplate.create, metadata, Study(1), "16S") job = self._create_job( 'delete_sample_or_column', { 'obj_class': 'PrepTemplate', 'obj_id': pt.id, 'sample_or_col': 'samples', 'name': '1.SKD8.640184' }) private_task(job.id) self.assertNotIn('1.SKD8.640184', pt.keys()) # Test exceptions job = self._create_job( 'delete_sample_or_column', { 'obj_class': 'UnknownClass', 'obj_id': 1, 'sample_or_col': 'columns', 'name': 'column' }) private_task(job.id) self.assertEqual(job.status, 'error') self.assertIn( 'Unknown value "UnknownClass". Choose between ' '"SampleTemplate" and "PrepTemplate"', job.log.msg) job = self._create_job( 'delete_sample_or_column', { 'obj_class': 'SampleTemplate', 'obj_id': 1, 'sample_or_col': 'unknown', 'name': 'column' }) private_task(job.id) self.assertEqual(job.status, 'error') self.assertIn( 'Unknown value "unknown". Choose between "samples" ' 'and "columns"', job.log.msg)
def get(self): data = self.get_argument("data", None) study_id = self.get_argument("study_id", None) prep_id = self.get_argument("prep_id", None) data_type = self.get_argument("data_type", None) dtypes = get_data_types().keys() templates = ['sample_information', 'prep_information'] valid_data = ['raw', 'biom'] + templates to_download = [] if data is None or (study_id is None and prep_id is None) or \ data not in valid_data: raise HTTPError(422, reason='You need to specify both data (the ' 'data type you want to download - %s) and ' 'study_id or prep_id' % '/'.join(valid_data)) elif data_type is not None and data_type not in dtypes: raise HTTPError(422, reason='Not a valid data_type. Valid types ' 'are: %s' % ', '.join(dtypes)) elif data in templates and prep_id is None and study_id is None: raise HTTPError(422, reason='If downloading a sample or ' 'preparation file you need to define study_id or' ' prep_id') elif data in templates: if data_type is not None: raise HTTPError(422, reason='If requesting an information ' 'file you cannot specify the data_type') elif prep_id is not None and data == 'prep_information': fname = 'preparation_information_%s' % prep_id prep_id = int(prep_id) try: infofile = PrepTemplate(prep_id) except QiitaDBUnknownIDError: raise HTTPError( 422, reason='Preparation information does not exist') elif study_id is not None and data == 'sample_information': fname = 'sample_information_%s' % study_id study_id = int(study_id) try: infofile = SampleTemplate(study_id) except QiitaDBUnknownIDError: raise HTTPError( 422, reason='Sample information does not exist') else: raise HTTPError(422, reason='Review your parameters, not a ' 'valid combination') x = retrieve_filepaths( infofile._filepath_table, infofile._id_column, infofile.id, sort='descending')[0] basedir_len = len(get_db_files_base_dir()) + 1 fp = x['fp'][basedir_len:] to_download.append((fp, fp, str(x['checksum']), str(x['fp_size']))) self._write_nginx_file_list(to_download) zip_fn = '%s_%s.zip' % ( fname, datetime.now().strftime('%m%d%y-%H%M%S')) self._set_nginx_headers(zip_fn) else: study_id = int(study_id) try: study = Study(study_id) except QiitaDBUnknownIDError: raise HTTPError(422, reason='Study does not exist') else: public_raw_download = study.public_raw_download if study.status != 'public': raise HTTPError(404, reason='Study is not public. If this ' 'is a mistake contact: ' '*****@*****.**') elif data == 'raw' and not public_raw_download: raise HTTPError(422, reason='No raw data access. If this ' 'is a mistake contact: ' '*****@*****.**') else: # raw data artifacts = [a for a in study.artifacts(dtype=data_type) if not a.parents] # bioms if data == 'biom': artifacts = study.artifacts( dtype=data_type, artifact_type='BIOM') for a in artifacts: if a.visibility != 'public': continue to_download.extend(self._list_artifact_files_nginx(a)) if not to_download: raise HTTPError(422, reason='Nothing to download. If ' 'this is a mistake contact: ' '*****@*****.**') else: self._write_nginx_file_list(to_download) zip_fn = 'study_%d_%s_%s.zip' % ( study_id, data, datetime.now().strftime( '%m%d%y-%H%M%S')) self._set_nginx_headers(zip_fn) self.finish()
def render(self, study): study_info = study.info id = study.id abstract = study_info['study_abstract'] description = study_info['study_description'] publications = [] for doi, pmid in study.publications: if doi is not None: publications.append(doi_linkifier([doi])) if pmid is not None: publications.append(pubmed_linkifier([pmid])) publications = ", ".join(publications) princ_inv = StudyPerson(study_info['principal_investigator_id']) pi_link = study_person_linkifier((princ_inv.email, princ_inv.name)) number_samples_promised = study_info['number_samples_promised'] number_samples_collected = study_info['number_samples_collected'] metadata_complete = study_info['metadata_complete'] data_types = sorted(viewitems(get_data_types()), key=itemgetter(1)) # Retrieve the files from the uploads folder, so the user can choose # the sample template of the study. Filter them to only include the # ones that ends with 'txt' or 'tsv'. files = [f for _, f in get_files_from_uploads_folders(str(study.id)) if f.endswith(('txt', 'tsv'))] # If the sample template exists, retrieve all its filepaths if SampleTemplate.exists(study.id): sample_templates = SampleTemplate(study.id).get_filepaths() else: # If the sample template does not exist, just pass an empty list sample_templates = [] # Check if the request came from a local source is_local_request = is_localhost(self.request.headers['host']) # The user can choose the sample template only if the study is # sandboxed or the current user is an admin show_select_sample = ( study.status == 'sandbox' or self.current_user.level == 'admin') # Ebi information ebi_status = study.ebi_submission_status ebi_accession = study.ebi_study_accession if ebi_accession: ebi_accession = (EBI_LINKIFIER.format(ebi_accession)) return self.render_string( "study_description_templates/study_information_tab.html", abstract=abstract, description=description, id=id, publications=publications, principal_investigator=pi_link, number_samples_promised=number_samples_promised, number_samples_collected=number_samples_collected, metadata_complete=metadata_complete, show_select_sample=show_select_sample, files=files, study_id=study.id, sample_templates=sample_templates, is_local_request=is_local_request, data_types=data_types, ebi_status=ebi_status, ebi_accession=ebi_accession)
def generate_new_study_with_preprocessed_data(self): """Creates a new study up to the processed data for testing""" info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 3, "number_samples_promised": 3, "study_alias": "Test EBI", "study_description": "Study for testing EBI", "study_abstract": "Study for testing EBI", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } study = Study.create(User('*****@*****.**'), "Test EBI study", info) metadata_dict = { 'Sample1': { 'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 1' }, 'Sample2': { 'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 2' }, 'Sample3': { 'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 3' } } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) SampleTemplate.create(metadata, study) metadata_dict = { 'Sample1': { 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTC', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 1" }, 'Sample2': { 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTA', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 2" }, 'Sample3': { 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTT', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 3" }, } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics') fna_fp = join(self.temp_dir, 'seqs.fna') demux_fp = join(self.temp_dir, 'demux.seqs') with open(fna_fp, 'w') as f: f.write(FASTA_EXAMPLE_2.format(study.id)) with File(demux_fp, 'w') as f: to_hdf5(fna_fp, f) ppd = Artifact.create([(demux_fp, 6)], "Demultiplexed", prep_template=pt) return ppd
def generate_new_study_with_preprocessed_data(self): """Creates a new study up to the processed data for testing""" # ignoring warnings generated when adding templates simplefilter("ignore") info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 3, "number_samples_promised": 3, "study_alias": "Test EBI", "study_description": "Study for testing EBI", "study_abstract": "Study for testing EBI", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } study = Study.create(User('*****@*****.**'), "Test EBI study", [1], info) metadata_dict = { 'Sample1': {'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 1'}, 'Sample2': {'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 2'}, 'Sample3': {'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 3'} } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) SampleTemplate.create(metadata, study) metadata_dict = { 'Sample1': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTC', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 1"}, 'Sample2': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTA', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 2"}, 'Sample3': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTT', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 3"}, } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics') fna_fp = join(self.temp_dir, 'seqs.fna') demux_fp = join(self.temp_dir, 'demux.seqs') with open(fna_fp, 'w') as f: f.write(FASTA_EXAMPLE_2.format(study.id)) with File(demux_fp, 'w') as f: to_hdf5(fna_fp, f) ppd = Artifact.create( [(demux_fp, 6)], "Demultiplexed", prep_template=pt) return ppd
def generate_new_study_with_preprocessed_data(self): """Creates a new study up to the processed data for testing""" info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 3, "number_samples_promised": 3, "study_alias": "Test EBI", "study_description": "Study for testing EBI", "study_abstract": "Study for testing EBI", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1), } study = Study.create(User("*****@*****.**"), "Test EBI study", [1], info) metadata_dict = { "Sample1": { "collection_timestamp": datetime(2015, 6, 1, 7, 0, 0), "physical_specimen_location": "location1", "taxon_id": 9606, "scientific_name": "h**o sapiens", "Description": "Test Sample 1", }, "Sample2": { "collection_timestamp": datetime(2015, 6, 2, 7, 0, 0), "physical_specimen_location": "location1", "taxon_id": 9606, "scientific_name": "h**o sapiens", "Description": "Test Sample 2", }, "Sample3": { "collection_timestamp": datetime(2015, 6, 3, 7, 0, 0), "physical_specimen_location": "location1", "taxon_id": 9606, "scientific_name": "h**o sapiens", "Description": "Test Sample 3", }, } metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) SampleTemplate.create(metadata, study) metadata_dict = { "Sample1": { "primer": "GTGCCAGCMGCCGCGGTAA", "barcode": "CGTAGAGCTCTC", "center_name": "KnightLab", "platform": "ILLUMINA", "instrument_model": "Illumina MiSeq", "library_construction_protocol": "Protocol ABC", "experiment_design_description": "Random value 1", }, "Sample2": { "primer": "GTGCCAGCMGCCGCGGTAA", "barcode": "CGTAGAGCTCTA", "center_name": "KnightLab", "platform": "ILLUMINA", "instrument_model": "Illumina MiSeq", "library_construction_protocol": "Protocol ABC", "experiment_design_description": "Random value 2", }, "Sample3": { "primer": "GTGCCAGCMGCCGCGGTAA", "barcode": "CGTAGAGCTCTT", "center_name": "KnightLab", "platform": "ILLUMINA", "instrument_model": "Illumina MiSeq", "library_construction_protocol": "Protocol ABC", "experiment_design_description": "Random value 3", }, } metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) pt = PrepTemplate.create(metadata, study, "16S", "Metagenomics") fna_fp = join(self.temp_dir, "seqs.fna") demux_fp = join(self.temp_dir, "demux.seqs") with open(fna_fp, "w") as f: f.write(FASTA_EXAMPLE_2.format(study.id)) with File(demux_fp, "w") as f: to_hdf5(fna_fp, f) ppd = Artifact.create([(demux_fp, 6)], "Demultiplexed", prep_template=pt) return ppd
def test_sample_template_handler_patch_request(self): user = User('*****@*****.**') # Test user doesn't have access with self.assertRaisesRegexp(HTTPError, 'User does not have access to study'): sample_template_handler_patch_request( User('*****@*****.**'), "remove", "/1/columns/season_environment/") # Test study doesn't exist with self.assertRaisesRegexp(HTTPError, 'Study does not exist'): sample_template_handler_patch_request( user, "remove", "/10000/columns/season_environment/") # Test sample template doesn't exist new_study = self._create_study('Patching test') with self.assertRaisesRegexp(HTTPError, "Study %s doesn't have sample information" % new_study.id): sample_template_handler_patch_request( user, "remove", "/%s/columns/season_environment/" % new_study.id) # Test wrong operation value with self.assertRaisesRegexp( HTTPError, 'Operation add not supported. Current supported ' 'operations: remove.'): sample_template_handler_patch_request( user, 'add', '/1/columns/season_environment') # Test wrong path parameter < 2 with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'): sample_template_handler_patch_request(user, 'ignored', '1') # TESTS FOR OPERATION: remove # Test wrong path parameter with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'): sample_template_handler_patch_request( user, 'remove', '/1/season_environment/') # Add sample information to the new study so we can delete one column # without affecting the other tests md = pd.DataFrame.from_dict( {'Sample1': {'col1': 'val1', 'col2': 'val2'}}, orient='index', dtype=str) st = SampleTemplate.create(md, new_study) # Test success obs = sample_template_handler_patch_request( user, "remove", "/%s/columns/col2/" % new_study.id) self.assertEqual(obs.keys(), ['job']) job_info = r_client.get('sample_template_%s' % new_study.id) self.assertIsNotNone(job_info) # Wait until the job is done wait_for_processing_job(loads(job_info)['job_id']) self.assertNotIn('col2', st.categories()) # TESTS FOR OPERATION: replace # Test incorrect path parameter with replace with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'): sample_template_handler_patch_request(user, "replace", "/1/") # Test attribute not found with self.assertRaisesRegexp(HTTPError, 'Attribute name not found'): sample_template_handler_patch_request(user, "replace", "/1/name") # Test missing value with self.assertRaisesRegexp(HTTPError, 'Value is required when updating sample ' 'information'): sample_template_handler_patch_request(user, "replace", "/1/data") # Test file doesn't exist with self.assertRaisesRegexp(HTTPError, 'Filepath not found'): sample_template_handler_patch_request(user, "replace", "/1/data", req_value='DoesNotExist') # Test success obs = sample_template_handler_patch_request( user, "replace", "/1/data", req_value='uploaded_file.txt') self.assertEqual(obs.keys(), ['job']) job_info = r_client.get('sample_template_1') self.assertIsNotNone(job_info) # Wait until the job is done wait_for_processing_job(loads(job_info)['job_id'])
def sample_template_summary_get_req(samp_id, user_id): """Returns a summary of the sample template metadata columns Parameters ---------- samp_id : int SampleTemplate id to get info for user_id : str User requesting the sample template info Returns ------- dict Returns summary information in the form {'status': str, 'message': str, 'info': dict of {str: object} status can be success, warning, or error depending on result message has the warnings or errors info dictionary contains the keys as the metadata categories and the values are list of tuples. Each tuple is an observed value in the category and the number of times its seen. Format {num_samples: value, category: [(val1, count1), (val2, count2), ...], ...} """ access_error = check_access(samp_id, user_id) if access_error: return access_error processing, alert_type, alert_msg = get_sample_template_processing_status( samp_id) exists = _check_sample_template_exists(int(samp_id)) if exists['status'] != 'success': return {'status': 'success', 'message': '', 'num_samples': 0, 'num_columns': 0, 'editable': not processing, 'alert_type': alert_type, 'alert_message': alert_msg, 'stats': {}} template = SampleTemplate(int(samp_id)) df = template.to_dataframe() editable = (Study(template.study_id).can_edit(User(user_id)) and not processing) out = {'status': 'success', 'message': '', 'num_samples': df.shape[0], 'num_columns': df.shape[1], 'editable': editable, 'alert_type': alert_type, 'alert_message': alert_msg, 'stats': {}} # drop the samp_id column if it exists if 'study_id' in df.columns: df.drop('study_id', axis=1, inplace=True) for column in df.columns: counts = df[column].value_counts() out['stats'][str(column)] = [(str(key), counts[key]) for key in natsorted(counts.index)] return out
def test_delete_sample_or_column(self): st = SampleTemplate(1) # Delete a sample template column job = self._create_job('delete_sample_or_column', {'obj_class': 'SampleTemplate', 'obj_id': 1, 'sample_or_col': 'columns', 'name': 'season_environment'}) private_task(job.id) self.assertEqual(job.status, 'success') self.assertNotIn('season_environment', st.categories()) # Delete a sample template sample - need to add one # sample that we will remove npt.assert_warns( QiitaDBWarning, st.extend, pd.DataFrame.from_dict({'Sample1': {'taxon_id': '9606'}}, orient='index', dtype=str)) self.assertIn('1.Sample1', st.keys()) job = self._create_job('delete_sample_or_column', {'obj_class': 'SampleTemplate', 'obj_id': 1, 'sample_or_col': 'samples', 'name': '1.Sample1'}) private_task(job.id) self.assertEqual(job.status, 'success') self.assertNotIn('1.Sample1', st.keys()) # Delete a prep template column pt = PrepTemplate(1) job = self._create_job('delete_sample_or_column', {'obj_class': 'PrepTemplate', 'obj_id': 1, 'sample_or_col': 'columns', 'name': 'target_subfragment'}) private_task(job.id) self.assertEqual(job.status, 'success') self.assertNotIn('target_subfragment', pt.categories()) # Delete a prep template sample metadata = pd.DataFrame.from_dict( {'1.SKB8.640193': {'barcode': 'GTCCGCAAGTTA', 'primer': 'GTGCCAGCMGCCGCGGTAA'}, '1.SKD8.640184': {'barcode': 'CGTAGAGCTCTC', 'primer': 'GTGCCAGCMGCCGCGGTAA'}}, orient='index', dtype=str) pt = npt.assert_warns(QiitaDBWarning, PrepTemplate.create, metadata, Study(1), "16S") job = self._create_job('delete_sample_or_column', {'obj_class': 'PrepTemplate', 'obj_id': pt.id, 'sample_or_col': 'samples', 'name': '1.SKD8.640184'}) private_task(job.id) self.assertNotIn('1.SKD8.640184', pt.keys()) # Test exceptions job = self._create_job('delete_sample_or_column', {'obj_class': 'UnknownClass', 'obj_id': 1, 'sample_or_col': 'columns', 'name': 'column'}) private_task(job.id) self.assertEqual(job.status, 'error') self.assertIn('Unknown value "UnknownClass". Choose between ' '"SampleTemplate" and "PrepTemplate"', job.log.msg) job = self._create_job('delete_sample_or_column', {'obj_class': 'SampleTemplate', 'obj_id': 1, 'sample_or_col': 'unknown', 'name': 'column'}) private_task(job.id) self.assertEqual(job.status, 'error') self.assertIn('Unknown value "unknown". Choose between "samples" ' 'and "columns"', job.log.msg)
# a few notes: just getting the preps with duplicated values; ignoring # column 'sample_id' and tables 'study_sample', 'prep_template', # 'prep_template_sample' sql = """SELECT table_name, array_agg(column_name::text) FROM information_schema.columns WHERE column_name IN %s AND table_name LIKE 'sample_%%' AND table_name NOT IN ( 'prep_template', 'prep_template_sample') GROUP BY table_name""" # note that we are looking for those columns with duplicated names in # the headers TRN.add(sql, [tuple(set(cols_sample))]) for table, columns in viewitems(dict(TRN.execute_fetchindex())): # [1] the format is table_# so taking the # st = SampleTemplate(int(table.split('_')[1])) # getting just the columns of interest st_df = st.to_dataframe()[columns] # converting to datetime for col in columns: st_df[col] = st_df[col].apply(transform_date) st.update(st_df) if cols_prep: with TRN: # a few notes: just getting the preps with duplicated values; ignoring # column 'sample_id' and tables 'study_sample', 'prep_template', # 'prep_template_sample' sql = """SELECT table_name, array_agg(column_name::text) FROM information_schema.columns WHERE column_name IN %s
def sample_template_summary_get_req(samp_id, user_id): """Returns a summary of the sample template metadata columns Parameters ---------- samp_id : int SampleTemplate id to get info for user_id : str User requesting the sample template info Returns ------- dict Returns summary information in the form {'status': str, 'message': str, 'info': dict of {str: object} status can be success, warning, or error depending on result message has the warnings or errors info dictionary contains the keys as the metadata categories and the values are list of tuples. Each tuple is an observed value in the category and the number of times its seen. Format {num_samples: value, category: [(val1, count1), (val2, count2), ...], ...} """ access_error = check_access(samp_id, user_id) if access_error: return access_error job_info = r_client.get(SAMPLE_TEMPLATE_KEY_FORMAT % samp_id) if job_info: job_info = loads(job_info) job_id = job_info['job_id'] if job_id: redis_info = loads(r_client.get(job_id)) processing = redis_info['status_msg'] == 'Running' if processing: alert_type = 'info' alert_msg = 'This sample template is currently being processed' elif redis_info['status_msg'] == 'Success': alert_type = redis_info['return']['status'] alert_msg = redis_info['return']['message'].replace('\n', '</br>') payload = {'job_id': None, 'status': alert_type, 'message': alert_msg} r_client.set(SAMPLE_TEMPLATE_KEY_FORMAT % samp_id, dumps(payload)) else: alert_type = redis_info['return']['status'] alert_msg = redis_info['return']['message'].replace('\n', '</br>') else: processing = False alert_type = job_info['status'] alert_msg = job_info['message'].replace('\n', '</br>') else: processing = False alert_type = '' alert_msg = '' exists = _check_sample_template_exists(int(samp_id)) if exists['status'] != 'success': return {'status': 'success', 'message': '', 'num_samples': 0, 'num_columns': 0, 'editable': not processing, 'alert_type': alert_type, 'alert_message': alert_msg, 'stats': {}} template = SampleTemplate(int(samp_id)) df = template.to_dataframe() editable = (Study(template.study_id).can_edit(User(user_id)) and not processing) out = {'status': 'success', 'message': '', 'num_samples': df.shape[0], 'num_columns': df.shape[1], 'editable': editable, 'alert_type': alert_type, 'alert_message': alert_msg, 'stats': {}} # drop the samp_id column if it exists if 'study_id' in df.columns: df.drop('study_id', axis=1, inplace=True) for column in df.columns: counts = df[column].value_counts() out['stats'][str(column)] = [(str(key), counts[key]) for key in natsorted(counts.index)] return out
# a few notes: just getting the preps with duplicated values; ignoring # column 'sample_id' and tables 'study_sample', 'prep_template', # 'prep_template_sample' sql = """SELECT table_name, array_agg(column_name::text) FROM information_schema.columns WHERE column_name IN %s AND table_name LIKE 'sample_%%' AND table_name NOT IN ( 'prep_template', 'prep_template_sample') GROUP BY table_name""" # note that we are looking for those columns with duplicated names in # the headers TRN.add(sql, [tuple(set(cols_sample))]) for table, columns in dict(TRN.execute_fetchindex()).items(): # [1] the format is table_# so taking the # st = SampleTemplate(int(table.split('_')[1])) # getting just the columns of interest st_df = st.to_dataframe()[columns] # converting to datetime for col in columns: st_df[col] = st_df[col].apply(transform_date) st.update(st_df) if cols_prep: with TRN: # a few notes: just getting the preps with duplicated values; ignoring # column 'sample_id' and tables 'study_sample', 'prep_template', # 'prep_template_sample' sql = """SELECT table_name, array_agg(column_name::text) FROM information_schema.columns WHERE column_name IN %s
def test_delete_sample_or_column(self): st = SampleTemplate(1) # Delete a sample template column obs = delete_sample_or_column(SampleTemplate, 1, "columns", "season_environment") exp = {'status': "success", 'message': ""} self.assertEqual(obs, exp) self.assertNotIn('season_environment', st.categories()) # Delete a sample template sample - need to add one sample that we # will remove npt.assert_warns( QiitaDBWarning, st.extend, pd.DataFrame.from_dict({'Sample1': { 'taxon_id': '9606' }}, orient='index', dtype=str)) self.assertIn('1.Sample1', st.keys()) obs = delete_sample_or_column(SampleTemplate, 1, "samples", "1.Sample1") exp = {'status': "success", 'message': ""} self.assertEqual(obs, exp) self.assertNotIn('1.Sample1', st.keys()) # Delete a prep template column pt = PrepTemplate(2) obs = delete_sample_or_column(PrepTemplate, 2, "columns", "target_subfragment") exp = {'status': "success", 'message': ""} self.assertEqual(obs, exp) self.assertNotIn('target_subfragment', pt.categories()) # Delte a prep template sample metadata = pd.DataFrame.from_dict( { '1.SKB8.640193': { 'barcode': 'GTCCGCAAGTTA', 'primer': 'GTGCCAGCMGCCGCGGTAA' }, '1.SKD8.640184': { 'barcode': 'CGTAGAGCTCTC', 'primer': 'GTGCCAGCMGCCGCGGTAA' } }, orient='index', dtype=str) pt = npt.assert_warns(QiitaDBWarning, PrepTemplate.create, metadata, Study(1), "16S") obs = delete_sample_or_column(PrepTemplate, pt.id, "samples", '1.SKD8.640184') exp = {'status': "success", 'message': ""} self.assertEqual(obs, exp) self.assertNotIn('1.SKD8.640184', pt.categories()) # Exception obs = delete_sample_or_column(PrepTemplate, 2, "samples", "1.SKM9.640192") exp = { 'status': "danger", 'message': "Prep info file '2' has files attached, you cannot " "delete samples." } self.assertEqual(obs, exp) # No "samples" or "columns" obs = delete_sample_or_column(PrepTemplate, 2, "not_samples", "NOP") exp = { 'status': 'danger', 'message': 'Unknown value "not_samples". Choose between ' '"samples" and "columns"' } self.assertEqual(obs, exp)
def create_templates_from_qiime_mapping_file(fp, study, data_type): """Creates a sample template and a prep template from qiime mapping file Parameters ---------- fp : str or file-like object Path to the QIIME mapping file study : Study The study to which the sample template belongs to data_type : str or int The data_type of the prep_template Returns ------- (SampleTemplate, PrepTemplate) The templates created from the QIIME mapping file """ qiime_map = load_template_to_dataframe(fp, index='#SampleID') # There are a few columns in the QIIME mapping file that are special and # we know how to deal with them rename_cols = { 'BarcodeSequence': 'barcode', 'LinkerPrimerSequence': 'primer', 'Description': 'description', } if 'ReverseLinkerPrimer' in qiime_map: rename_cols['ReverseLinkerPrimer'] = 'reverselinkerprimer' missing = set(rename_cols).difference(qiime_map.columns) if missing: raise QiitaWareError( "Error generating the templates from the QIIME mapping file. " "Missing QIIME mapping file columns: %s" % ', '.join(missing)) qiime_map.rename(columns=rename_cols, inplace=True) # Fix the casing in the columns that we control qiime_map.columns = [c.lower() if c.lower() in CONTROLLED_COLS else c for c in qiime_map.columns] # Figure out which columns belong to the prep template def _col_iterator(restriction_set): for restriction in viewvalues(restriction_set): for cols in viewkeys(restriction.columns): yield cols pt_cols = set(col for col in _col_iterator(PREP_TEMPLATE_COLUMNS)) data_type_str = (convert_from_id(data_type, "data_type") if isinstance(data_type, int) else data_type) if data_type_str in TARGET_GENE_DATA_TYPES: pt_cols.update( col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE)) pt_cols.add('reverselinkerprimer') qiime_cols = set(qiime_map.columns) pt_cols = qiime_cols.intersection(pt_cols) st_cols = qiime_cols.difference(pt_cols) st_md = qiime_map.ix[:, st_cols] pt_md = qiime_map.ix[:, pt_cols] return (SampleTemplate.create(st_md, study), PrepTemplate.create(pt_md, study, data_type))
def process_sample_template(self, study, user, callback): """Process a sample template from the POST method Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done Raises ------ HTTPError If the sample template file does not exists """ # If we are on this function, the arguments "sample_template" and # "data_type" must be defined. If not, let tornado raise its error sample_template = self.get_argument('sample_template') data_type = self.get_argument('data_type') # Get the uploads folder _, base_fp = get_mountpoint("uploads")[0] # Get the path of the sample template in the uploads folder fp_rsp = join(base_fp, str(study.id), sample_template) if not exists(fp_rsp): # The file does not exist, fail nicely raise HTTPError(404, "This file doesn't exist: %s" % fp_rsp) # Define here the message and message level in case of success msg = "The sample template '%s' has been added" % sample_template msg_level = "success" is_mapping_file = looks_like_qiime_mapping_file(fp_rsp) try: if is_mapping_file and not data_type: raise ValueError("Please, choose a data type if uploading a " "QIIME mapping file") with warnings.catch_warnings(record=True) as warns: if is_mapping_file: create_templates_from_qiime_mapping_file(fp_rsp, study, int(data_type)) else: SampleTemplate.create(load_template_to_dataframe(fp_rsp), study) remove(fp_rsp) # join all the warning messages into one. Note that this # info will be ignored if an exception is raised if warns: msg = '; '.join([convert_text_html(str(w.message)) for w in warns]) msg_level = 'warning' except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, KeyError, CParserError, QiitaDBDuplicateHeaderError, QiitaDBError, QiitaWareError) as e: # Some error occurred while processing the sample template # Show the error to the user so they can fix the template error_msg = ('parsing the QIIME mapping file' if is_mapping_file else 'parsing the sample template') msg = html_error_message % (error_msg, basename(fp_rsp), str(e)) msg = convert_text_html(msg) msg_level = "danger" callback((msg, msg_level, None, None, None))
def sample_template_summary_get_req(samp_id, user_id): """Returns a summary of the sample template metadata columns Parameters ---------- samp_id : int SampleTemplate id to get info for user_id : str User requesting the sample template info Returns ------- dict Returns summary information in the form {'status': str, 'message': str, 'info': dict of {str: object} status can be success, warning, or error depending on result message has the warnings or errors info dictionary contains the keys as the metadata categories and the values are list of tuples. Each tuple is an observed value in the category and the number of times its seen. Format {num_samples: value, category: [(val1, count1), (val2, count2), ...], ...} """ access_error = check_access(samp_id, user_id) if access_error: return access_error processing, alert_type, alert_msg = get_sample_template_processing_status( samp_id) exists = _check_sample_template_exists(int(samp_id)) if exists['status'] != 'success': return { 'status': 'success', 'message': '', 'num_samples': 0, 'num_columns': 0, 'editable': not processing, 'alert_type': alert_type, 'alert_message': alert_msg, 'stats': {} } template = SampleTemplate(int(samp_id)) df = template.to_dataframe() editable = (Study(template.study_id).can_edit(User(user_id)) and not processing) out = { 'status': 'success', 'message': '', 'num_samples': df.shape[0], 'num_columns': df.shape[1], 'editable': editable, 'alert_type': alert_type, 'alert_message': alert_msg, 'stats': {} } # drop the samp_id column if it exists if 'study_id' in df.columns: df.drop('study_id', axis=1, inplace=True) for column in df.columns: counts = df[column].value_counts() out['stats'][str(column)] = [ (str(key), counts[key]) for key in natsorted(counts.index, key=lambda x: unicode(x, errors='ignore')) ] return out
with TRN: # a few notes: just getting the preps with duplicated values; ignoring # column 'sample_id' and tables 'study_sample', 'prep_template', # 'prep_template_sample' sql = """SELECT table_name, array_agg(column_name::text) FROM information_schema.columns WHERE column_name IN %s AND column_name != 'sample_id' AND table_name LIKE 'prep_%%' AND table_name NOT IN ( 'prep_template', 'prep_template_sample') GROUP BY table_name""" # note that we are looking for those columns with duplicated names in # the headers headers = set(PrepTemplate.metadata_headers()) & \ set(SampleTemplate.metadata_headers()) if headers: TRN.add(sql, [tuple(headers)]) overlapping = dict(TRN.execute_fetchindex()) else: overlapping = None if overlapping is not None: # finding actual duplicates for table_name, cols in viewitems(overlapping): # leaving print so when we patch in the main system we know that # nothing was renamed or deal with that print table_name with TRN: for c in cols:
def test_delete_sample_or_column(self): st = SampleTemplate(1) # Delete a sample template column obs = delete_sample_or_column(SampleTemplate, 1, "columns", "season_environment") exp = {'status': "success", 'message': ""} self.assertEqual(obs, exp) self.assertNotIn('season_environment', st.categories()) # Delete a sample template sample - need to add one sample that we # will remove npt.assert_warns( QiitaDBWarning, st.extend, pd.DataFrame.from_dict({'Sample1': {'taxon_id': '9606'}}, orient='index', dtype=str)) self.assertIn('1.Sample1', st.keys()) obs = delete_sample_or_column(SampleTemplate, 1, "samples", "1.Sample1") exp = {'status': "success", 'message': ""} self.assertEqual(obs, exp) self.assertNotIn('1.Sample1', st.keys()) # Delete a prep template column pt = PrepTemplate(2) obs = delete_sample_or_column(PrepTemplate, 2, "columns", "target_subfragment") exp = {'status': "success", 'message': ""} self.assertEqual(obs, exp) self.assertNotIn('target_subfragment', pt.categories()) # Delte a prep template sample metadata = pd.DataFrame.from_dict( {'1.SKB8.640193': {'barcode': 'GTCCGCAAGTTA', 'primer': 'GTGCCAGCMGCCGCGGTAA'}, '1.SKD8.640184': {'barcode': 'CGTAGAGCTCTC', 'primer': 'GTGCCAGCMGCCGCGGTAA'}}, orient='index', dtype=str) pt = npt.assert_warns(QiitaDBWarning, PrepTemplate.create, metadata, Study(1), "16S") obs = delete_sample_or_column(PrepTemplate, pt.id, "samples", '1.SKD8.640184') exp = {'status': "success", 'message': ""} self.assertEqual(obs, exp) self.assertNotIn('1.SKD8.640184', pt.categories()) # Exception obs = delete_sample_or_column(PrepTemplate, 2, "samples", "1.SKM9.640192") exp = {'status': "danger", 'message': "Prep info file '2' has files attached, you cannot " "delete samples."} self.assertEqual(obs, exp) # No "samples" or "columns" obs = delete_sample_or_column(PrepTemplate, 2, "not_samples", "NOP") exp = {'status': 'danger', 'message': 'Unknown value "not_samples". Choose between ' '"samples" and "columns"'} self.assertEqual(obs, exp)
def sample_template_patch_request(user_id, req_op, req_path, req_value=None, req_from=None): """Modifies an attribute of the artifact Parameters ---------- user_id : str The id of the user performing the patch operation req_op : str The operation to perform on the artifact req_path : str The prep information and attribute to patch req_value : str, optional The value that needs to be modified req_from : str, optional The original path of the element Returns ------- dict of {str, str} A dictionary with the following keys: - status: str, whether if the request is successful or not - message: str, if the request is unsuccessful, a human readable error """ if req_op == 'remove': req_path = [v for v in req_path.split('/') if v] # format # column: study_id/row_id/columns/column_name # sample: study_id/row_id/samples/sample_id if len(req_path) != 4: return {'status': 'error', 'message': 'Incorrect path parameter'} st_id = req_path[0] row_id = req_path[1] attribute = req_path[2] attr_id = req_path[3] # Check if the user actually has access to the template st = SampleTemplate(st_id) access_error = check_access(st.study_id, user_id) if access_error: return access_error qiita_plugin = Software.from_name_and_version('Qiita', 'alpha') cmd = qiita_plugin.get_command('delete_sample_or_column') params = Parameters.load(cmd, values_dict={ 'obj_class': 'SampleTemplate', 'obj_id': int(st_id), 'sample_or_col': attribute, 'name': attr_id }) job = ProcessingJob.create(User(user_id), params) # Store the job id attaching it to the sample template id r_client.set(SAMPLE_TEMPLATE_KEY_FORMAT % st_id, dumps({'job_id': job.id})) job.submit() return {'status': 'success', 'message': '', 'row_id': row_id} else: return { 'status': 'error', 'message': 'Operation "%s" not supported. ' 'Current supported operations: remove' % req_op, 'row_id': 0 }