def delete_sample_template(self, study, user, callback): """Delete sample template Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done """ sample_template_id = int(self.get_argument('sample_template_id')) try: SampleTemplate.delete(sample_template_id) msg = ("Sample template %d has been deleted from study: " "<b><i>%s</i></b>" % (sample_template_id, study.title)) msg_level = "success" except Exception as e: msg = "Couldn't remove %d sample template: %s" % ( sample_template_id, str(e)) msg_level = "danger" callback((msg, msg_level, 'study_information_tab', None, None))
def submit_VAMPS(preprocessed_data_id): """Submit preprocessed data to VAMPS Parameters ---------- preprocessed_data_id : int The preprocesssed data id """ preprocessed_data = PreprocessedData(preprocessed_data_id) study = Study(preprocessed_data.study) sample_template = SampleTemplate(study.sample_template) prep_template = PrepTemplate(preprocessed_data.prep_template) status = preprocessed_data.submitted_to_vamps_status() if status in ('submitting', 'success'): raise ValueError("Cannot resubmit! Current status is: %s" % status) preprocessed_data.update_vamps_status('submitting') # Generating a tgz targz_folder = mkdtemp(prefix=qiita_config.working_dir) targz_fp = join(targz_folder, '%d_%d_%d.tgz' % (study.id, prep_template.id, preprocessed_data.id)) targz = taropen(targz_fp, mode='w:gz') # adding sample/prep samp_fp = join(targz_folder, 'sample_metadata.txt') sample_template.to_file(samp_fp) targz.add(samp_fp, arcname='sample_metadata.txt') prep_fp = join(targz_folder, 'prep_metadata.txt') prep_template.to_file(prep_fp) targz.add(prep_fp, arcname='prep_metadata.txt') # adding preprocessed data for _, fp, fp_type in preprocessed_data.get_filepaths(): if fp_type == 'preprocessed_fasta': targz.add(fp, arcname='preprocessed_fasta.fna') targz.close() # submitting cmd = ("curl -F user=%s -F pass='******' -F uploadFile=@%s -F " "press=UploadFile %s" % (qiita_config.vamps_user, qiita_config.vamps_pass, targz_fp, qiita_config.vamps_url)) obs, _, _ = system_call(cmd) exp = ("<html>\n<head>\n<title>Process Uploaded File</title>\n</head>\n" "<body>\n</body>\n</html>") if obs != exp: preprocessed_data.update_vamps_status('failure') return False else: preprocessed_data.update_vamps_status('success') return True
def update_sample_template(self, study, user, callback): """Update a sample template from the POST method Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done Raises ------ HTTPError If the sample template file does not exists """ # If we are on this function, the argument "sample_template" must # defined. If not, let tornado raise its error sample_template = self.get_argument('sample_template') # Define here the message and message level in case of success msg = "The sample template '%s' has been updated" % sample_template msg_level = "success" # Get the uploads folder _, base_fp = get_mountpoint("uploads")[0] # Get the path of the sample template in the uploads folder fp_rsp = join(base_fp, str(study.id), sample_template) if not exists(fp_rsp): # The file does not exist, fail nicely raise HTTPError(400, "This file doesn't exist: %s" % fp_rsp) try: with warnings.catch_warnings(record=True) as warns: # deleting previous uploads and inserting new one st = SampleTemplate(study.id) df = load_template_to_dataframe(fp_rsp) st.extend(df) st.update(df) remove(fp_rsp) # join all the warning messages into one. Note that this info # will be ignored if an exception is raised if warns: msg = '\n'.join(set(str(w.message) for w in warns)) msg_level = 'warning' except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, KeyError, CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e: # Some error occurred while processing the sample template # Show the error to the user so they can fix the template msg = html_error_message % ('updating the sample template:', basename(fp_rsp), str(e)) msg = convert_text_html(msg) msg_level = "danger" callback((msg, msg_level, None, None, None))
def submit_VAMPS(preprocessed_data_id): """Submit preprocessed data to VAMPS Parameters ---------- preprocessed_data_id : int The preprocesssed data id """ preprocessed_data = PreprocessedData(preprocessed_data_id) study = Study(preprocessed_data.study) sample_template = SampleTemplate(study.sample_template) prep_template = PrepTemplate(preprocessed_data.prep_template) status = preprocessed_data.submitted_to_vamps_status() if status in ('submitting', 'success'): raise ValueError("Cannot resubmit! Current status is: %s" % status) preprocessed_data.update_vamps_status('submitting') # Generating a tgz targz_folder = mkdtemp(prefix=qiita_config.working_dir) targz_fp = join( targz_folder, '%d_%d_%d.tgz' % (study.id, prep_template.id, preprocessed_data.id)) targz = taropen(targz_fp, mode='w:gz') # adding sample/prep samp_fp = join(targz_folder, 'sample_metadata.txt') sample_template.to_file(samp_fp) targz.add(samp_fp, arcname='sample_metadata.txt') prep_fp = join(targz_folder, 'prep_metadata.txt') prep_template.to_file(prep_fp) targz.add(prep_fp, arcname='prep_metadata.txt') # adding preprocessed data for _, fp, fp_type in preprocessed_data.get_filepaths(): if fp_type == 'preprocessed_fasta': targz.add(fp, arcname='preprocessed_fasta.fna') targz.close() # submitting cmd = ("curl -F user=%s -F pass='******' -F uploadFile=@%s -F " "press=UploadFile %s" % (qiita_config.vamps_user, qiita_config.vamps_pass, targz_fp, qiita_config.vamps_url)) obs, _, _ = system_call(cmd) exp = ("<html>\n<head>\n<title>Process Uploaded File</title>\n</head>\n" "<body>\n</body>\n</html>") if obs != exp: preprocessed_data.update_vamps_status('failure') return False else: preprocessed_data.update_vamps_status('success') return True
def update_sample_template(self, study, user, callback): """Update a sample template from the POST method Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done Raises ------ HTTPError If the sample template file does not exists """ # If we are on this function, the argument "sample_template" must # defined. If not, let tornado raise its error sample_template = self.get_argument('sample_template') # Define here the message and message level in case of success msg = "The sample template '%s' has been updated" % sample_template msg_level = "success" # Get the uploads folder _, base_fp = get_mountpoint("uploads")[0] # Get the path of the sample template in the uploads folder fp_rsp = join(base_fp, str(study.id), sample_template) if not exists(fp_rsp): # The file does not exist, fail nicely raise HTTPError(400, "This file doesn't exist: %s" % fp_rsp) try: with warnings.catch_warnings(record=True) as warns: # deleting previous uploads and inserting new one st = SampleTemplate(study.id) st.update(load_template_to_dataframe(fp_rsp)) # join all the warning messages into one. Note that this info # will be ignored if an exception is raised if warns: msg = '; '.join([str(w.message) for w in warns]) msg_level = 'warning' except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, KeyError, CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e: # Some error occurred while processing the sample template # Show the error to the user so they can fix the template msg = html_error_message % ('updating the sample template:', basename(fp_rsp), str(e)) msg_level = "danger" callback((msg, msg_level, None, None, None))
def remove_add_study_template(self, raw_data, study_id, fp_rsp): """Replace prep templates, raw data, and sample template with a new one """ for rd in raw_data(): rd = RawData(rd) for pt in rd.prep_templates: if PrepTemplate.exists(pt): PrepTemplate.delete(pt) if SampleTemplate.exists(study_id): SampleTemplate.delete(study_id) SampleTemplate.create(load_template_to_dataframe(fp_rsp), Study(study_id)) remove(fp_rsp)
def render(self, study): study_info = study.info id = study.id abstract = study_info['study_abstract'] description = study_info['study_description'] pmids = ", ".join([pubmed_linkifier([pmid]) for pmid in study.pmids]) princ_inv = StudyPerson(study_info['principal_investigator_id']) pi_link = study_person_linkifier((princ_inv.email, princ_inv.name)) number_samples_promised = study_info['number_samples_promised'] number_samples_collected = study_info['number_samples_collected'] metadata_complete = study_info['metadata_complete'] data_types = sorted(viewitems(get_data_types()), key=itemgetter(1)) # Retrieve the files from the uploads folder, so the user can choose # the sample template of the study. Filter them to only include the # ones that ends with 'txt' or 'tsv'. files = [f for _, f in get_files_from_uploads_folders(str(study.id)) if f.endswith(('txt', 'tsv'))] # If the sample template exists, retrieve all its filepaths if SampleTemplate.exists(study.id): sample_templates = SampleTemplate(study.id).get_filepaths() else: # If the sample template does not exist, just pass an empty list sample_templates = [] # Check if the request came from a local source is_local_request = is_localhost(self.request.headers['host']) # The user can choose the sample template only if the study is # sandboxed or the current user is an admin show_select_sample = ( study.status == 'sandbox' or self.current_user.level == 'admin') # Ebi information ebi_status = study.ebi_submission_status ebi_accession = study.ebi_study_accession if ebi_accession: ebi_accession = (EBI_LINKIFIER.format(ebi_accession)) return self.render_string( "study_description_templates/study_information_tab.html", abstract=abstract, description=description, id=id, pmids=pmids, principal_investigator=pi_link, number_samples_promised=number_samples_promised, number_samples_collected=number_samples_collected, metadata_complete=metadata_complete, show_select_sample=show_select_sample, files=files, study_id=study.id, sample_templates=sample_templates, is_local_request=is_local_request, data_types=data_types, ebi_status=ebi_status, ebi_accession=ebi_accession)
def display_template(self, preprocessed_data_id, msg, msg_level): """Simple function to avoid duplication of code""" preprocessed_data_id = int(preprocessed_data_id) try: preprocessed_data = PreprocessedData(preprocessed_data_id) except QiitaDBUnknownIDError: raise HTTPError(404, "PreprocessedData %d does not exist!" % preprocessed_data_id) else: user = self.current_user if user.level != "admin": raise HTTPError(403, "No permissions of admin, " "get/VAMPSSubmitHandler: %s!" % user.id) prep_template = PrepTemplate(preprocessed_data.prep_template) sample_template = SampleTemplate(preprocessed_data.study) study = Study(preprocessed_data.study) stats = [ ("Number of samples", len(prep_template)), ("Number of metadata headers", len(sample_template.categories())), ] demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == "preprocessed_demux"] demux_length = len(demux) if not demux_length: msg = "Study does not appear to have demultiplexed " "sequences associated" msg_level = "danger" elif demux_length > 1: msg = "Study appears to have multiple demultiplexed files!" msg_level = "danger" elif demux_length == 1: demux_file = demux[0] demux_file_stats = demux_stats(demux_file) stats.append(("Number of sequences", demux_file_stats.n)) msg_level = "success" self.render( "vamps_submission.html", study_title=study.title, stats=stats, message=msg, study_id=study.id, level=msg_level, preprocessed_data_id=preprocessed_data_id, )
def render(self, study): study_info = study.info abstract = study_info['study_abstract'] description = study_info['study_description'] pmids = ", ".join([pubmed_linkifier([pmid]) for pmid in study.pmids]) princ_inv = StudyPerson(study_info['principal_investigator_id']) pi_link = study_person_linkifier((princ_inv.email, princ_inv.name)) number_samples_promised = study_info['number_samples_promised'] number_samples_collected = study_info['number_samples_collected'] metadata_complete = study_info['metadata_complete'] # Retrieve the files from the uploads folder, so the user can choose # the sample template of the study files = [f for _, f in get_files_from_uploads_folders(str(study.id))] # If the sample template exists, retrieve all its filepaths if SampleTemplate.exists(study.id): sample_templates = SampleTemplate(study.id).get_filepaths() else: # If the sample template does not exist, just pass an empty list sample_templates = [] # Check if the request came from a local source is_local_request = self._is_local() # The user can choose the sample template only if the study is # sandboxed or the current user is an admin show_select_sample = ( study.status == 'sandbox' or self.current_user.level == 'admin') return self.render_string( "study_description_templates/study_information_tab.html", abstract=abstract, description=description, pmids=pmids, principal_investigator=pi_link, number_samples_promised=number_samples_promised, number_samples_collected=number_samples_collected, metadata_complete=metadata_complete, show_select_sample=show_select_sample, files=files, study_id=study.id, sample_templates=sample_templates, is_local_request=is_local_request)
def display_template(self, preprocessed_data_id, msg, msg_level): """Simple function to avoid duplication of code""" preprocessed_data_id = int(preprocessed_data_id) try: preprocessed_data = PreprocessedData(preprocessed_data_id) except QiitaDBUnknownIDError: raise HTTPError(404, "PreprocessedData %d does not exist!" % preprocessed_data_id) else: user = User(self.current_user) if user.level != 'admin': raise HTTPError(403, "No permissions of admin, " "get/EBISubmitHandler: %s!" % user.id) prep_template = PrepTemplate(preprocessed_data.prep_template) sample_template = SampleTemplate(preprocessed_data.study) study = Study(preprocessed_data.study) stats = [('Number of samples', len(prep_template)), ('Number of metadata headers', len(sample_template.metadata_headers()))] demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == 'preprocessed_demux'] demux_length = len(demux) if not demux_length: msg = ("Study does not appear to have demultiplexed " "sequences associated") msg_level = 'danger' elif demux_length > 1: msg = ("Study appears to have multiple demultiplexed files!") msg_level = 'danger' elif demux_length == 1: demux_file = demux[0] demux_file_stats = demux_stats(demux_file) stats.append(('Number of sequences', demux_file_stats.n)) msg_level = 'success' self.render('ebi_submission.html', user=self.current_user, study_title=study.title, stats=stats, message=msg, study_id=study.id, level=msg_level, preprocessed_data_id=preprocessed_data_id, investigation_type=prep_template.investigation_type)
def display_template(self, preprocessed_data_id, msg, msg_level): """Simple function to avoid duplication of code""" preprocessed_data_id = int(preprocessed_data_id) try: preprocessed_data = PreprocessedData(preprocessed_data_id) except QiitaDBUnknownIDError: raise HTTPError(404, "PreprocessedData %d does not exist!" % preprocessed_data_id) else: user = self.current_user if user.level != 'admin': raise HTTPError(403, "No permissions of admin, " "get/EBISubmitHandler: %s!" % user.id) prep_template = PrepTemplate(preprocessed_data.prep_template) sample_template = SampleTemplate(preprocessed_data.study) study = Study(preprocessed_data.study) stats = [('Number of samples', len(prep_template)), ('Number of metadata headers', len(sample_template.metadata_headers()))] demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == 'preprocessed_demux'] demux_length = len(demux) if not demux_length: msg = ("Study does not appear to have demultiplexed " "sequences associated") msg_level = 'danger' elif demux_length > 1: msg = ("Study appears to have multiple demultiplexed files!") msg_level = 'danger' elif demux_length == 1: demux_file = demux[0] demux_file_stats = demux_stats(demux_file) stats.append(('Number of sequences', demux_file_stats.n)) msg_level = 'success' self.render('ebi_submission.html', study_title=study.title, stats=stats, message=msg, study_id=study.id, level=msg_level, preprocessed_data_id=preprocessed_data_id, investigation_type=prep_template.investigation_type)
def test_to_file(self): """to file writes a tab delimited file with all the metadata""" fd, fp = mkstemp() close(fd) st = SampleTemplate.create(self.metadata, self.new_study) st.to_file(fp) self._clean_up_files.append(fp) with open(fp, 'U') as f: obs = f.read() self.assertEqual(obs, EXP_SAMPLE_TEMPLATE)
def get(self, message="", msg_level=None): all_emails_except_current = yield Task(self._get_all_emails) all_emails_except_current.remove(self.current_user.id) avail_meta = SampleTemplate.metadata_headers() +\ get_table_cols("study") self.render('list_studies.html', availmeta=avail_meta, all_emails_except_current=all_emails_except_current, message=message, msg_level=msg_level)
def display_template(self, study, user, msg, msg_level, full_access, top_tab=None, sub_tab=None, prep_tab=None): """Simple function to avoid duplication of code""" study_status = study.status user_level = user.level sample_template_exists = SampleTemplate.exists(study.id) if sample_template_exists: st = SampleTemplate(study.id) missing_cols = st.check_restrictions([SAMPLE_TEMPLATE_COLUMNS["qiita_main"]]) allow_approval = len(missing_cols) == 0 approval_deny_msg = ( "Processed data approval request is disabled due to missing " "columns in the sample template: %s" % ", ".join(missing_cols) ) else: allow_approval = False approval_deny_msg = "" # The general information of the study can be changed if the study is # not public or if the user is an admin, in which case they can always # modify the information of the study show_edit_btn = study_status != "public" or user_level == "admin" # Make the error message suitable for html msg = msg.replace("\n", "<br/>") self.render( "study_description.html", message=msg, level=msg_level, study=study, study_title=study.title, study_alias=study.info["study_alias"], show_edit_btn=show_edit_btn, show_data_tabs=sample_template_exists, full_access=full_access, allow_approval=allow_approval, approval_deny_msg=approval_deny_msg, top_tab=top_tab, sub_tab=sub_tab, prep_tab=prep_tab, )
def remove_add_study_template(self, raw_data, study_id, fp_rsp, data_type, is_mapping_file): """Replace prep templates, raw data, and sample template with a new one """ if is_mapping_file and data_type == "": raise ValueError("Please, choose a data type if uploading a QIIME " "mapping file") for rd in raw_data(): rd = RawData(rd) for pt in rd.prep_templates: if PrepTemplate.exists(pt): PrepTemplate.delete(pt) if SampleTemplate.exists(study_id): SampleTemplate.delete(study_id) if is_mapping_file: create_templates_from_qiime_mapping_file(fp_rsp, Study(study_id), int(data_type)) else: SampleTemplate.create(load_template_to_dataframe(fp_rsp), Study(study_id)) remove(fp_rsp)
def setUp(self): self.sample_template = SampleTemplate(1) self.sample_id = 'SKB8.640193' self.tester = Sample(self.sample_id, self.sample_template) self.exp_categories = { 'physical_location', 'has_physical_specimen', 'has_extracted_data', 'sample_type', 'required_sample_info_status_id', 'collection_timestamp', 'host_subject_id', 'description', 'season_environment', 'assigned_from_geo', 'texture', 'taxon_id', 'depth', 'host_taxid', 'common_name', 'water_content_soil', 'elevation', 'temp', 'tot_nitro', 'samp_salinity', 'altitude', 'env_biome', 'country', 'ph', 'anonymized_name', 'tot_org_carb', 'description_duplicate', 'env_feature', 'latitude', 'longitude' }
def get(self): userobj = self.current_user analysis = Analysis(int(self.get_argument("aid"))) # make sure user has access to the analysis check_analysis_access(userobj, analysis) # get the dictionaries of selected samples and data types selproc_data, selsamples = self._selected_parser(analysis) self.render('search_studies.html', aid=analysis.id, selsamples=selsamples, selproc_data=selproc_data, counts={}, fullcounts={}, searchmsg="", query="", results={}, availmeta=SampleTemplate.metadata_headers() + get_table_cols("study"))
def test_create(self): """Creates a new SampleTemplate""" st = SampleTemplate.create(self.metadata, self.new_study) # The returned object has the correct id self.assertEqual(st.id, 2) # The relevant rows to required_sample_info have been added. obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.required_sample_info WHERE study_id=2") # study_id sample_id physical_location has_physical_specimen # has_extracted_data sample_type required_sample_info_status_id # collection_timestamp host_subject_id description exp = [[ 2, "Sample1", "location1", True, True, "type1", 1, datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", "Test Sample 1", 42.42, 41.41 ], [ 2, "Sample2", "location1", True, True, "type1", 1, datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", "Test Sample 2", 4.2, 1.1 ], [ 2, "Sample3", "location1", True, True, "type1", 1, datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", "Test Sample 3", 4.8, 4.41 ]] self.assertEqual(obs, exp) # The relevant rows have been added to the study_sample_columns obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_sample_columns WHERE study_id=2") # study_id, column_name, column_type exp = [[2, "str_column", "varchar"]] self.assertEqual(obs, exp) # The new table exists self.assertTrue(exists_table("sample_2", self.conn_handler)) # The new table hosts the correct values obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.sample_2") # sample_id, str_column exp = [['Sample1', "Value for sample 1"], ['Sample2', "Value for sample 2"], ['Sample3', "Value for sample 3"]] self.assertEqual(obs, exp)
def test_metadata_map_from_sample_and_prep_templates(self): obs = metadata_map_from_sample_and_prep_templates( SampleTemplate(1), PrepTemplate(1)) # We don't test the specific values as this would blow up the size # of this file as the amount of lines would go to ~1000 # 27 samples self.assertEqual(len(obs), 27) self.assertTrue( all(obs.index == pd.Index([ u'SKB1.640202', u'SKB2.640194', u'SKB3.640195', u'SKB4.640189', u'SKB5.640181', u'SKB6.640176', u'SKB7.640196', u'SKB8.640193', u'SKB9.640200', u'SKD1.640179', u'SKD2.640178', u'SKD3.640198', u'SKD4.640185', u'SKD5.640186', u'SKD6.640190', u'SKD7.640191', u'SKD8.640184', u'SKD9.640182', u'SKM1.640183', u'SKM2.640199', u'SKM3.640197', u'SKM4.640180', u'SKM5.640177', u'SKM6.640187', u'SKM7.640188', u'SKM8.640201', u'SKM9.640192' ], dtype='object'))) self.assertTrue( all(obs.columns == pd.Index([ u'tot_org_carb', u'common_name', u'has_extracted_data', u'water_content_soil', u'env_feature', u'assigned_from_geo', u'altitude', u'env_biome', u'texture', u'has_physical_specimen', u'description_duplicate', u'physical_location', u'latitude', u'ph', u'host_taxid', u'elevation', u'description', u'collection_timestamp', u'taxon_id', u'samp_salinity', u'host_subject_id', u'sample_type', u'season_environment', u'required_sample_info_status_id', u'temp', u'country', u'longitude', u'tot_nitro', u'depth', u'anonymized_name', u'experiment_center', u'center_name', u'run_center', u'run_prefix', u'data_type_id', u'target_gene', u'sequencing_meth', u'run_date', u'pcr_primers', u'linkerprimersequence', u'platform', u'library_construction_protocol', u'experiment_design_description', u'study_center', u'center_project_name', u'sample_center', u'samp_size', u'illumina_technology', u'experiment_title', u'emp_status_id', u'target_subfragment', u'barcodesequence' ], dtype='object')))
def test_create(self): """Creates a new SampleTemplate""" st = SampleTemplate.create(self.metadata, self.new_study) # The returned object has the correct id self.assertEqual(st.id, 2) # The relevant rows to required_sample_info have been added. obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.required_sample_info WHERE study_id=2") # study_id sample_id physical_location has_physical_specimen # has_extracted_data sample_type required_sample_info_status_id # collection_timestamp host_subject_id description exp = [[2, "Sample1", "location1", True, True, "type1", 1, datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", "Test Sample 1"], [2, "Sample2", "location1", True, True, "type1", 1, datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", "Test Sample 2"], [2, "Sample3", "location1", True, True, "type1", 1, datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", "Test Sample 3"]] self.assertEqual(obs, exp) # The relevant rows have been added to the study_sample_columns obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_sample_columns WHERE study_id=2") # study_id, column_name, column_type exp = [[2, "str_column", "varchar"]] self.assertEqual(obs, exp) # The new table exists self.assertTrue(exists_table("sample_2", self.conn_handler)) # The new table hosts the correct values obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.sample_2") # sample_id, str_column exp = [['Sample1', "Value for sample 1"], ['Sample2', "Value for sample 2"], ['Sample3', "Value for sample 3"]] self.assertEqual(obs, exp)
def display_template(self, study, user, msg, msg_level, full_access, top_tab=None, sub_tab=None, prep_tab=None): """Simple function to avoid duplication of code""" study_status = study.status user_level = user.level sample_template_exists = SampleTemplate.exists(study.id) # The general information of the study can be changed if the study is # not public or if the user is an admin, in which case they can always # modify the information of the study show_edit_btn = study_status != 'public' or user_level == 'admin' self.render('study_description.html', message=msg, level=msg_level, study=study, study_title=study.title, study_alias=study.info['study_alias'], show_edit_btn=show_edit_btn, show_data_tabs=sample_template_exists, full_access=full_access, top_tab=top_tab, sub_tab=sub_tab, prep_tab=prep_tab)
from os.path import join from time import strftime from qiita_db.util import get_mountpoint from qiita_db.sql_connection import SQLConnectionHandler from qiita_db.metadata_template import SampleTemplate, PrepTemplate conn_handler = SQLConnectionHandler() _id, fp_base = get_mountpoint('templates')[0] for study_id in conn_handler.execute_fetchall( "SELECT study_id FROM qiita.study"): study_id = study_id[0] if SampleTemplate.exists(study_id): st = SampleTemplate(study_id) fp = join(fp_base, '%d_%s.txt' % (study_id, strftime("%Y%m%d-%H%M%S"))) st.to_file(fp) st.add_filepath(fp) for prep_template_id in conn_handler.execute_fetchall( "SELECT prep_template_id FROM qiita.prep_template"): prep_template_id = prep_template_id[0] pt = PrepTemplate(prep_template_id) study_id = pt.study_id fp = join(fp_base, '%d_prep_%d_%s.txt' % (pt.study_id, prep_template_id, strftime("%Y%m%d-%H%M%S"))) pt.to_file(fp) pt.add_filepath(fp)
def test_init(self): """Init successfully instantiates the object""" st = SampleTemplate(1) self.assertTrue(st.id, 1)
def process_sample_template(self, study, user, callback): """Process a sample template from the POST method Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done Raises ------ HTTPError If the sample template file does not exists """ # If we are on this function, the arguments "sample_template" and # "data_type" must be defined. If not, let tornado raise its error sample_template = self.get_argument('sample_template') data_type = self.get_argument('data_type') # Get the uploads folder _, base_fp = get_mountpoint("uploads")[0] # Get the path of the sample template in the uploads folder fp_rsp = join(base_fp, str(study.id), sample_template) if not exists(fp_rsp): # The file does not exist, fail nicely raise HTTPError(404, "This file doesn't exist: %s" % fp_rsp) # Define here the message and message level in case of success msg = "The sample template '%s' has been added" % sample_template msg_level = "success" is_mapping_file = looks_like_qiime_mapping_file(fp_rsp) try: if is_mapping_file and not data_type: raise ValueError("Please, choose a data type if uploading a " "QIIME mapping file") with warnings.catch_warnings(record=True) as warns: if is_mapping_file: create_templates_from_qiime_mapping_file(fp_rsp, study, int(data_type)) else: SampleTemplate.create(load_template_to_dataframe(fp_rsp), study) remove(fp_rsp) # join all the warning messages into one. Note that this # info will be ignored if an exception is raised if warns: msg = '; '.join([convert_text_html(str(w.message)) for w in warns]) msg_level = 'warning' except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, KeyError, CParserError, QiitaDBDuplicateHeaderError, QiitaDBError, QiitaWareError) as e: # Some error occurred while processing the sample template # Show the error to the user so they can fix the template error_msg = ('parsing the QIIME mapping file' if is_mapping_file else 'parsing the sample template') msg = html_error_message % (error_msg, basename(fp_rsp), str(e)) msg = convert_text_html(msg) msg_level = "danger" callback((msg, msg_level, None, None, None))
def _extend_sample_template(self, st_id, fp_rpt): SampleTemplate(st_id).extend(load_template_to_dataframe(fp_rpt))
def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None): """Submit a preprocessed data to EBI Parameters ---------- preprocessed_data_id : int The preprocesssed data id action : %s The action to perform with this data send : bool True to actually send the files fastq_dir_fp : str, optional The fastq filepath """ preprocessed_data = PreprocessedData(preprocessed_data_id) preprocessed_data_id_str = str(preprocessed_data_id) study = Study(preprocessed_data.study) sample_template = SampleTemplate(study.sample_template) prep_template = PrepTemplate(preprocessed_data.prep_template) investigation_type = None new_investigation_type = None status = preprocessed_data.submitted_to_insdc_status() if status in ('submitting', 'success'): raise ValueError("Cannot resubmit! Current status is: %s" % status) if send: # If we intend actually to send the files, then change the status in # the database preprocessed_data.update_insdc_status('submitting') # we need to figure out whether the investigation type is a known one # or if we have to submit a "new_investigation_type" to EBI current_type = prep_template.investigation_type ena_ontology = Ontology(convert_to_id('ENA', 'ontology')) if current_type in ena_ontology.terms: investigation_type = current_type elif current_type in ena_ontology.user_defined_terms: investigation_type = 'Other' new_investigation_type = current_type else: # This should never happen raise ValueError("Unrecognized investigation type: '%s'. This term " "is neither one of the official terms nor one of the " "user-defined terms in the ENA ontology") if fastq_dir_fp is not None: # If the user specifies a FASTQ directory, use it # Set demux_samples to None so that MetadataTemplate.to_file will put # all samples in the template files demux_samples = None else: # If the user does not specify a FASTQ directory, create one and # re-serialize the per-sample FASTQs from the demux file fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir) demux = [ path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == 'preprocessed_demux' ][0] # Keep track of which files were actually in the demux file so that we # can write those rows to the prep and samples templates demux_samples = set() with open_file(demux) as demux_fh: for samp, iterator in to_per_sample_ascii(demux_fh, list(sample_template)): demux_samples.add(samp) sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp) with gzopen(sample_fp, 'w') as fh: for record in iterator: fh.write(record) output_dir = fastq_dir_fp + '_submission' samp_fp = join(fastq_dir_fp, 'sample_metadata.txt') prep_fp = join(fastq_dir_fp, 'prep_metadata.txt') sample_template.to_file(samp_fp, demux_samples) prep_template.to_file(prep_fp, demux_samples) # Get specific output directory and set filepaths get_output_fp = partial(join, output_dir) study_fp = get_output_fp('study.xml') sample_fp = get_output_fp('sample.xml') experiment_fp = get_output_fp('experiment.xml') run_fp = get_output_fp('run.xml') submission_fp = get_output_fp('submission.xml') if not isdir(output_dir): makedirs(output_dir) else: raise IOError('The output folder already exists: %s' % output_dir) with open(samp_fp, 'U') as st, open(prep_fp, 'U') as pt: submission = EBISubmission.from_templates_and_per_sample_fastqs( preprocessed_data_id_str, study.title, study.info['study_abstract'], investigation_type, st, pt, fastq_dir_fp, new_investigation_type=new_investigation_type, pmids=study.pmids) submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp, submission_fp, action) if send: submission.send_sequences() study_accession, submission_accession = submission.send_xml() if study_accession is None or submission_accession is None: preprocessed_data.update_insdc_status('failed') raise ComputeError("EBI Submission failed!") else: preprocessed_data.update_insdc_status('success', study_accession, submission_accession) else: study_accession, submission_accession = None, None return study_accession, submission_accession
def test_table_name(self): """Table name return the correct string""" obs = SampleTemplate._table_name(self.test_study) self.assertEqual(obs, "sample_1")
def test_create_duplicate(self): """Create raises an error when creating a duplicated SampleTemplate""" with self.assertRaises(QiitaDBDuplicateError): SampleTemplate.create(self.metadata, self.test_study)
def test_exists_false(self): """Exists returns false when the SampleTemplate does not exists""" self.assertFalse(SampleTemplate.exists(self.new_study))
def test_exists_true(self): """Exists returns true when the SampleTemplate already exists""" self.assertTrue(SampleTemplate.exists(self.test_study))
def test_init_wrong_template(self): """Raises an error if using a SampleTemplate instead of PrepTemplate""" with self.assertRaises(IncompetentQiitaDeveloperError): PrepSample('SKB8.640193', SampleTemplate(1))
def test_exists(self): """exists should raise an error if called from the base class""" with self.assertRaises(IncompetentQiitaDeveloperError): BaseSample.exists('SKM7.640188', SampleTemplate(1))
def setUp(self): metadata_dict = { 'Sample1': { 'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status_id': 1, 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 1', 'str_column': 'Value for sample 1', 'latitude': 42.42, 'longitude': 41.41 }, 'Sample2': { 'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status_id': 1, 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 2', 'str_column': 'Value for sample 2', 'latitude': 4.2, 'longitude': 1.1 }, 'Sample3': { 'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status_id': 1, 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 3', 'str_column': 'Value for sample 3', 'latitude': 4.8, 'longitude': 4.41 }, } self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') self.test_study = Study(1) info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "portal_type_id": 3, "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " "gut microbiome", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } self.new_study = Study.create(User('*****@*****.**'), "Fried Chicken Microbiome", [1], info) self.tester = SampleTemplate(1) self.exp_sample_ids = { 'SKB1.640202', 'SKB2.640194', 'SKB3.640195', 'SKB4.640189', 'SKB5.640181', 'SKB6.640176', 'SKB7.640196', 'SKB8.640193', 'SKB9.640200', 'SKD1.640179', 'SKD2.640178', 'SKD3.640198', 'SKD4.640185', 'SKD5.640186', 'SKD6.640190', 'SKD7.640191', 'SKD8.640184', 'SKD9.640182', 'SKM1.640183', 'SKM2.640199', 'SKM3.640197', 'SKM4.640180', 'SKM5.640177', 'SKM6.640187', 'SKM7.640188', 'SKM8.640201', 'SKM9.640192' } self._clean_up_files = []
def test_init(self): """BaseSample init should raise an error (it's a base class)""" with self.assertRaises(IncompetentQiitaDeveloperError): BaseSample('SKM7.640188', SampleTemplate(1))
def test_create_duplicate_header(self): """Create raises an error when duplicate headers are present""" self.metadata['STR_COLUMN'] = pd.Series(['', '', ''], index=self.metadata.index) with self.assertRaises(QiitaDBDuplicateHeaderError): SampleTemplate.create(self.metadata, self.new_study)
def create_templates_from_qiime_mapping_file(fp, study, data_type): """Creates a sample template and a prep template from qiime mapping file Parameters ---------- fp : str or file-like object Path to the QIIME mapping file study : Study The study to which the sample template belongs to data_type : str or int The data_type of the prep_template Returns ------- (SampleTemplate, PrepTemplate) The templates created from the QIIME mapping file """ qiime_map = load_template_to_dataframe(fp, index='#SampleID') # There are a few columns in the QIIME mapping file that are special and # we know how to deal with them rename_cols = { 'BarcodeSequence': 'barcode', 'LinkerPrimerSequence': 'primer', 'Description': 'description', } if 'ReverseLinkerPrimer' in qiime_map: rename_cols['ReverseLinkerPrimer'] = 'reverselinkerprimer' missing = set(rename_cols).difference(qiime_map.columns) if missing: raise QiitaWareError( "Error generating the templates from the QIIME mapping file. " "Missing QIIME mapping file columns: %s" % ', '.join(missing)) qiime_map.rename(columns=rename_cols, inplace=True) # Fix the casing in the columns that we control qiime_map.columns = [c.lower() if c.lower() in CONTROLLED_COLS else c for c in qiime_map.columns] # Figure out which columns belong to the prep template def _col_iterator(restriction_set): for restriction in viewvalues(restriction_set): for cols in viewkeys(restriction.columns): yield cols pt_cols = set(col for col in _col_iterator(PREP_TEMPLATE_COLUMNS)) data_type_str = (convert_from_id(data_type, "data_type") if isinstance(data_type, (int, long)) else data_type) if data_type_str in TARGET_GENE_DATA_TYPES: pt_cols.update( col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE)) pt_cols.add('reverselinkerprimer') qiime_cols = set(qiime_map.columns) pt_cols = qiime_cols.intersection(pt_cols) st_cols = qiime_cols.difference(pt_cols) st_md = qiime_map.ix[:, st_cols] pt_md = qiime_map.ix[:, pt_cols] return (SampleTemplate.create(st_md, study), PrepTemplate.create(pt_md, study, data_type))
def post(self): user = self.current_user action = self.get_argument("action") # set required template variables results = {} meta_headers = [] counts = {} fullcounts = {} query = "" searchmsg = "" selsamples = {} selproc_data = {} # get analysis and selected samples if exists, or create if necessary if action == "create": name = self.get_argument('name') description = self.get_argument('description') analysis = Analysis.create(User(user), name, description) analysis_id = analysis.id # set to second step since this page is second step in workflow analysis.step = SELECT_SAMPLES # fill example studies by running query for specific studies search = QiitaStudySearch() def_query = 'study_id = 1 OR study_id = 2 OR study_id = 3' results, meta_headers = search(def_query, user) results, counts, fullcounts = self._parse_search_results( results, selsamples, meta_headers) else: analysis_id = int(self.get_argument("analysis-id")) check_analysis_access(User(user), analysis_id) analysis = Analysis(analysis_id) selproc_data, selsamples = self._selected_parser(analysis) # run through action requested if action == "search": search = QiitaStudySearch() query = str(self.get_argument("query")) try: results, meta_headers = search(query, user) except ParseException: searchmsg = "Malformed search query, please read search help." except QiitaDBIncompatibleDatatypeError as e: searchmsg = ''.join(e) if not results and not searchmsg: searchmsg = "No results found." else: results, counts, fullcounts = self._parse_search_results( results, selsamples, meta_headers) elif action == "select": analysis.add_samples(self._parse_form_select()) # rebuild the selected from database to reflect changes selproc_data, selsamples = self._selected_parser(analysis) elif action == "deselect": proc_data, samples = self._parse_form_deselect() if proc_data: analysis.remove_samples(proc_data=proc_data) if samples: analysis.remove_samples(samples=samples) if not proc_data and not samples: searchmsg = "Must select samples to remove from analysis!" # rebuild the selected from database to reflect changes selproc_data, selsamples = self._selected_parser(analysis) self.render('search_studies.html', user=user, aid=analysis_id, results=results, meta_headers=meta_headers, selsamples=selsamples, selproc_data=selproc_data, counts=counts, fullcounts=fullcounts, searchmsg=searchmsg, query=query, availmeta=SampleTemplate.metadata_headers() + get_table_cols("study"))
def display_template(self, preprocessed_data_id, msg, msg_level): """Simple function to avoid duplication of code""" preprocessed_data_id = int(preprocessed_data_id) try: preprocessed_data = PreprocessedData(preprocessed_data_id) except QiitaDBUnknownIDError: raise HTTPError(404, "PreprocessedData %d does not exist!" % preprocessed_data_id) else: user = self.current_user if user.level != 'admin': raise HTTPError(403, "No permissions of admin, " "get/EBISubmitHandler: %s!" % user.id) prep_template = PrepTemplate(preprocessed_data.prep_template) sample_template = SampleTemplate(preprocessed_data.study) study = Study(preprocessed_data.study) stats = [('Number of samples', len(prep_template)), ('Number of metadata headers', len(sample_template.categories()))] demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == 'preprocessed_demux'] demux_length = len(demux) if not demux_length: msg = ("Study does not appear to have demultiplexed " "sequences associated") msg_level = 'danger' elif demux_length > 1: msg = ("Study appears to have multiple demultiplexed files!") msg_level = 'danger' elif demux_length == 1: demux_file = demux[0] demux_file_stats = demux_stats(demux_file) stats.append(('Number of sequences', demux_file_stats.n)) msg_level = 'success' # Check if the templates have all the required columns for EBI pt_missing_cols = prep_template.check_restrictions( [PREP_TEMPLATE_COLUMNS['EBI']]) st_missing_cols = sample_template.check_restrictions( [SAMPLE_TEMPLATE_COLUMNS['EBI']]) allow_submission = (len(pt_missing_cols) == 0 and len(st_missing_cols) == 0) if not allow_submission: msg_list = ["Submission to EBI disabled due to missing columns:"] if len(pt_missing_cols) > 0: msg_list.append("Columns missing in prep template: %s" % ', '.join(pt_missing_cols)) if len(st_missing_cols) > 0: msg_list.append("Columns missing in sample template: %s" % ', '.join(st_missing_cols)) ebi_disabled_msg = "<br/>".join(msg_list) else: ebi_disabled_msg = None self.render('ebi_submission.html', study_title=study.title, stats=stats, message=msg, study_id=study.id, level=msg_level, preprocessed_data_id=preprocessed_data_id, investigation_type=prep_template.investigation_type, allow_submission=allow_submission, ebi_disabled_msg=ebi_disabled_msg)
class TestSampleTemplate(TestCase): """Tests the SampleTemplate class""" def setUp(self): metadata_dict = { 'Sample1': { 'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status_id': 1, 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 1', 'str_column': 'Value for sample 1', 'latitude': 42.42, 'longitude': 41.41 }, 'Sample2': { 'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status_id': 1, 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 2', 'str_column': 'Value for sample 2', 'latitude': 4.2, 'longitude': 1.1 }, 'Sample3': { 'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status_id': 1, 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 3', 'str_column': 'Value for sample 3', 'latitude': 4.8, 'longitude': 4.41 }, } self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') self.test_study = Study(1) info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "portal_type_id": 3, "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " "gut microbiome", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } self.new_study = Study.create(User('*****@*****.**'), "Fried Chicken Microbiome", [1], info) self.tester = SampleTemplate(1) self.exp_sample_ids = { 'SKB1.640202', 'SKB2.640194', 'SKB3.640195', 'SKB4.640189', 'SKB5.640181', 'SKB6.640176', 'SKB7.640196', 'SKB8.640193', 'SKB9.640200', 'SKD1.640179', 'SKD2.640178', 'SKD3.640198', 'SKD4.640185', 'SKD5.640186', 'SKD6.640190', 'SKD7.640191', 'SKD8.640184', 'SKD9.640182', 'SKM1.640183', 'SKM2.640199', 'SKM3.640197', 'SKM4.640180', 'SKM5.640177', 'SKM6.640187', 'SKM7.640188', 'SKM8.640201', 'SKM9.640192' } self._clean_up_files = [] def tearDown(self): for f in self._clean_up_files: remove(f) def test_init_unknown_error(self): """Init raises an error if the id is not known""" with self.assertRaises(QiitaDBUnknownIDError): SampleTemplate(2) def test_init(self): """Init successfully instantiates the object""" st = SampleTemplate(1) self.assertTrue(st.id, 1) def test_table_name(self): """Table name return the correct string""" obs = SampleTemplate._table_name(self.test_study) self.assertEqual(obs, "sample_1") def test_create_duplicate(self): """Create raises an error when creating a duplicated SampleTemplate""" with self.assertRaises(QiitaDBDuplicateError): SampleTemplate.create(self.metadata, self.test_study) def test_create_duplicate_header(self): """Create raises an error when duplicate headers are present""" self.metadata['STR_COLUMN'] = pd.Series(['', '', ''], index=self.metadata.index) with self.assertRaises(QiitaDBDuplicateHeaderError): SampleTemplate.create(self.metadata, self.new_study) def test_create(self): """Creates a new SampleTemplate""" st = SampleTemplate.create(self.metadata, self.new_study) # The returned object has the correct id self.assertEqual(st.id, 2) # The relevant rows to required_sample_info have been added. obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.required_sample_info WHERE study_id=2") # study_id sample_id physical_location has_physical_specimen # has_extracted_data sample_type required_sample_info_status_id # collection_timestamp host_subject_id description exp = [[ 2, "Sample1", "location1", True, True, "type1", 1, datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", "Test Sample 1", 42.42, 41.41 ], [ 2, "Sample2", "location1", True, True, "type1", 1, datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", "Test Sample 2", 4.2, 1.1 ], [ 2, "Sample3", "location1", True, True, "type1", 1, datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", "Test Sample 3", 4.8, 4.41 ]] self.assertEqual(obs, exp) # The relevant rows have been added to the study_sample_columns obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_sample_columns WHERE study_id=2") # study_id, column_name, column_type exp = [[2, "str_column", "varchar"]] self.assertEqual(obs, exp) # The new table exists self.assertTrue(exists_table("sample_2", self.conn_handler)) # The new table hosts the correct values obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.sample_2") # sample_id, str_column exp = [['Sample1', "Value for sample 1"], ['Sample2', "Value for sample 2"], ['Sample3', "Value for sample 3"]] self.assertEqual(obs, exp) def test_exists_true(self): """Exists returns true when the SampleTemplate already exists""" self.assertTrue(SampleTemplate.exists(self.test_study)) def test_exists_false(self): """Exists returns false when the SampleTemplate does not exists""" self.assertFalse(SampleTemplate.exists(self.new_study)) def test_get_sample_ids(self): """get_sample_ids returns the correct set of sample ids""" obs = self.tester._get_sample_ids(self.conn_handler) self.assertEqual(obs, self.exp_sample_ids) def test_len(self): """Len returns the correct number of sample ids""" self.assertEqual(len(self.tester), 27) def test_getitem(self): """Get item returns the correct sample object""" obs = self.tester['SKM7.640188'] exp = Sample('SKM7.640188', self.tester) self.assertEqual(obs, exp) def test_getitem_error(self): """Get item raises an error if key does not exists""" with self.assertRaises(KeyError): self.tester['Not_a_Sample'] def test_setitem(self): """setitem raises an error (currently not allowed)""" with self.assertRaises(QiitaDBNotImplementedError): self.tester['SKM7.640188'] = Sample('SKM7.640188', self.tester) def test_delitem(self): """delitem raises an error (currently not allowed)""" with self.assertRaises(QiitaDBNotImplementedError): del self.tester['SKM7.640188'] def test_iter(self): """iter returns an iterator over the sample ids""" obs = self.tester.__iter__() self.assertTrue(isinstance(obs, Iterable)) self.assertEqual(set(obs), self.exp_sample_ids) def test_contains_true(self): """contains returns true if the sample id exists""" self.assertTrue('SKM7.640188' in self.tester) def test_contains_false(self): """contains returns false if the sample id does not exists""" self.assertFalse('Not_a_Sample' in self.tester) def test_keys(self): """keys returns an iterator over the sample ids""" obs = self.tester.keys() self.assertTrue(isinstance(obs, Iterable)) self.assertEqual(set(obs), self.exp_sample_ids) def test_values(self): """values returns an iterator over the values""" obs = self.tester.values() self.assertTrue(isinstance(obs, Iterable)) exp = { Sample('SKB1.640202', self.tester), Sample('SKB2.640194', self.tester), Sample('SKB3.640195', self.tester), Sample('SKB4.640189', self.tester), Sample('SKB5.640181', self.tester), Sample('SKB6.640176', self.tester), Sample('SKB7.640196', self.tester), Sample('SKB8.640193', self.tester), Sample('SKB9.640200', self.tester), Sample('SKD1.640179', self.tester), Sample('SKD2.640178', self.tester), Sample('SKD3.640198', self.tester), Sample('SKD4.640185', self.tester), Sample('SKD5.640186', self.tester), Sample('SKD6.640190', self.tester), Sample('SKD7.640191', self.tester), Sample('SKD8.640184', self.tester), Sample('SKD9.640182', self.tester), Sample('SKM1.640183', self.tester), Sample('SKM2.640199', self.tester), Sample('SKM3.640197', self.tester), Sample('SKM4.640180', self.tester), Sample('SKM5.640177', self.tester), Sample('SKM6.640187', self.tester), Sample('SKM7.640188', self.tester), Sample('SKM8.640201', self.tester), Sample('SKM9.640192', self.tester) } # Creating a list and looping over it since unittest does not call # the __eq__ function on the objects for o, e in zip(sorted(list(obs), key=lambda x: x.id), sorted(exp, key=lambda x: x.id)): self.assertEqual(o, e) def test_items(self): """items returns an iterator over the (key, value) tuples""" obs = self.tester.items() self.assertTrue(isinstance(obs, Iterable)) exp = [('SKB1.640202', Sample('SKB1.640202', self.tester)), ('SKB2.640194', Sample('SKB2.640194', self.tester)), ('SKB3.640195', Sample('SKB3.640195', self.tester)), ('SKB4.640189', Sample('SKB4.640189', self.tester)), ('SKB5.640181', Sample('SKB5.640181', self.tester)), ('SKB6.640176', Sample('SKB6.640176', self.tester)), ('SKB7.640196', Sample('SKB7.640196', self.tester)), ('SKB8.640193', Sample('SKB8.640193', self.tester)), ('SKB9.640200', Sample('SKB9.640200', self.tester)), ('SKD1.640179', Sample('SKD1.640179', self.tester)), ('SKD2.640178', Sample('SKD2.640178', self.tester)), ('SKD3.640198', Sample('SKD3.640198', self.tester)), ('SKD4.640185', Sample('SKD4.640185', self.tester)), ('SKD5.640186', Sample('SKD5.640186', self.tester)), ('SKD6.640190', Sample('SKD6.640190', self.tester)), ('SKD7.640191', Sample('SKD7.640191', self.tester)), ('SKD8.640184', Sample('SKD8.640184', self.tester)), ('SKD9.640182', Sample('SKD9.640182', self.tester)), ('SKM1.640183', Sample('SKM1.640183', self.tester)), ('SKM2.640199', Sample('SKM2.640199', self.tester)), ('SKM3.640197', Sample('SKM3.640197', self.tester)), ('SKM4.640180', Sample('SKM4.640180', self.tester)), ('SKM5.640177', Sample('SKM5.640177', self.tester)), ('SKM6.640187', Sample('SKM6.640187', self.tester)), ('SKM7.640188', Sample('SKM7.640188', self.tester)), ('SKM8.640201', Sample('SKM8.640201', self.tester)), ('SKM9.640192', Sample('SKM9.640192', self.tester))] # Creating a list and looping over it since unittest does not call # the __eq__ function on the objects for o, e in zip(sorted(list(obs)), sorted(exp)): self.assertEqual(o, e) def test_get(self): """get returns the correct sample object""" obs = self.tester.get('SKM7.640188') exp = Sample('SKM7.640188', self.tester) self.assertEqual(obs, exp) def test_get_none(self): """get returns none if the sample id is not present""" self.assertTrue(self.tester.get('Not_a_Sample') is None) def test_to_file(self): """to file writes a tab delimited file with all the metadata""" fd, fp = mkstemp() close(fd) st = SampleTemplate.create(self.metadata, self.new_study) st.to_file(fp) self._clean_up_files.append(fp) with open(fp, 'U') as f: obs = f.read() self.assertEqual(obs, EXP_SAMPLE_TEMPLATE)
def test_retrieve_dropped_samples(self): # Create and populate second study to do test with info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "portal_type_id": 3, "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " "gut microbiome", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } metadata_dict = { 'SKB8.640193': { 'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 1', 'str_column': 'Value for sample 1', 'latitude': 42.42, 'longitude': 41.41 }, 'SKD8.640184': { 'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 2', 'str_column': 'Value for sample 2', 'latitude': 4.2, 'longitude': 1.1 }, 'SKB7.640196': { 'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 3', 'str_column': 'Value for sample 3', 'latitude': 4.8, 'longitude': 4.41 }, } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') Study.create(User("*****@*****.**"), "Test study 2", [1], info) SampleTemplate.create(metadata, Study(2)) mp = get_mountpoint("processed_data")[0][1] study_fp = join(mp, "2_study_1001_closed_reference_otu_table.biom") ProcessedData.create("processed_params_uclust", 1, [(study_fp, 6)], study=Study(2), data_type="16S") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample (analysis_id, " "processed_data_id, sample_id) VALUES " "(1,2,'2.SKB8.640193'), (1,2,'2.SKD8.640184'), " "(1,2,'2.SKB7.640196')") samples = { 1: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'], 2: ['2.SKB8.640193', '2.SKD8.640184'] } self.analysis._build_biom_tables(samples, 10000, conn_handler=self.conn_handler) exp = {1: {'1.SKM4.640180', '1.SKM9.640192'}, 2: {'2.SKB7.640196'}} self.assertEqual(self.analysis.dropped_samples, exp)
def test_metadata_stats_from_sample_and_prep_templates(self): obs = metadata_stats_from_sample_and_prep_templates( SampleTemplate(1), PrepTemplate(1)) for k in obs: self.assertEqual(obs[k], SUMMARY_STATS[k])
def display_template(self, study, user, msg, msg_level, top_tab=None, sub_tab=None, prep_tab=None): """Simple function to avoid duplication of code""" # getting the RawData and its prep templates available_raw_data = yield Task(self.get_raw_data, study.raw_data()) available_prep_templates = yield Task(self.get_prep_templates, available_raw_data) # set variable holding if we have files attached to all raw data or not raw_files = True if available_raw_data else False for r in available_raw_data: if not r.get_filepaths(): raw_files = False # set variable holding if we have all prep templates or not if available_prep_templates: _test = lambda item: not item prep_templates = all( [_test(val) for val in viewvalues(available_prep_templates)]) else: prep_templates = False study_status = study.status user_level = user.level sample_template_exists = SampleTemplate.exists(study.id) # The general information of the study can be changed if the study is # not public or if the user is an admin, in which case they can always # modify the information of the study show_edit_btn = study_status != 'public' or user_level == 'admin' # Files can be added to a study only if the study is sandboxed # or if the user is the admin show_upload_btn = study_status == 'sandbox' or user_level == 'admin' # The request approval, approve study and make public buttons are # mutually exclusive. Only one of them will be shown, depending on the # current status of the study btn_to_show = None if (study_status == 'sandbox' and qiita_config.require_approval and sample_template_exists and raw_files and prep_templates): # The request approval button only appears if the study is # sandboxed, the qiita_config specifies that the approval should # be requested and the sample template, raw files and prep # prep templates have been added to the study btn_to_show = 'request_approval' elif (user_level == 'admin' and study_status == 'awaiting_approval' and qiita_config.require_approval): # The approve study button only appears if the user is an admin, # the study is waiting approval and the qiita config requires # study approval btn_to_show = 'approve_study' elif study_status == 'private': # The make public button only appers if the study is private btn_to_show = 'make_public' # The revert to sandbox button only appears if the study is not # sandboxed or public show_revert_btn = study_status not in {'sandbox', 'public'} self.render('study_description.html', message=msg, level=msg_level, user=self.current_user, study=study, study_title=study.title, study_alias=study.info['study_alias'], show_edit_btn=show_edit_btn, show_upload_btn=show_upload_btn, show_revert_btn=show_revert_btn, btn_to_show=btn_to_show, show_data_tabs=sample_template_exists, top_tab=top_tab, sub_tab=sub_tab, prep_tab=prep_tab)
def test_eq_false_type(self): """Equality returns false if types are not equal""" other = Sample(self.sample_id, SampleTemplate(1)) self.assertFalse(self.tester == other)
def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None): """Submit a preprocessed data to EBI Parameters ---------- preprocessed_data_id : int The preprocesssed data id action : %s The action to perform with this data send : bool True to actually send the files fastq_dir_fp : str, optional The fastq filepath Notes ----- If fastq_dir_fp is passed, it must not contain any empty files, or gzipped empty files """ preprocessed_data = PreprocessedData(preprocessed_data_id) preprocessed_data_id_str = str(preprocessed_data_id) study = Study(preprocessed_data.study) sample_template = SampleTemplate(study.sample_template) prep_template = PrepTemplate(preprocessed_data.prep_template) investigation_type = None new_investigation_type = None status = preprocessed_data.submitted_to_insdc_status() if status in ("submitting", "success"): raise ValueError("Cannot resubmit! Current status is: %s" % status) if send: # If we intend actually to send the files, then change the status in # the database preprocessed_data.update_insdc_status("submitting") # we need to figure out whether the investigation type is a known one # or if we have to submit a "new_investigation_type" to EBI current_type = prep_template.investigation_type ena_ontology = Ontology(convert_to_id("ENA", "ontology")) if current_type in ena_ontology.terms: investigation_type = current_type elif current_type in ena_ontology.user_defined_terms: investigation_type = "Other" new_investigation_type = current_type else: # This should never happen raise ValueError( "Unrecognized investigation type: '%s'. This term " "is neither one of the official terms nor one of the " "user-defined terms in the ENA ontology" ) if fastq_dir_fp is not None: # If the user specifies a FASTQ directory, use it # Set demux_samples to None so that MetadataTemplate.to_file will put # all samples in the template files demux_samples = None else: # If the user does not specify a FASTQ directory, create one and # re-serialize the per-sample FASTQs from the demux file fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir) demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == "preprocessed_demux"][0] # Keep track of which files were actually in the demux file so that we # can write those rows to the prep and samples templates demux_samples = set() with open_file(demux) as demux_fh: for samp, iterator in to_per_sample_ascii(demux_fh, list(sample_template)): demux_samples.add(samp) sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp) wrote_sequences = False with gzopen(sample_fp, "w") as fh: for record in iterator: fh.write(record) wrote_sequences = True if not wrote_sequences: remove(sample_fp) output_dir = fastq_dir_fp + "_submission" samp_fp = join(fastq_dir_fp, "sample_metadata.txt") prep_fp = join(fastq_dir_fp, "prep_metadata.txt") sample_template.to_file(samp_fp, demux_samples) prep_template.to_file(prep_fp, demux_samples) # Get specific output directory and set filepaths get_output_fp = partial(join, output_dir) study_fp = get_output_fp("study.xml") sample_fp = get_output_fp("sample.xml") experiment_fp = get_output_fp("experiment.xml") run_fp = get_output_fp("run.xml") submission_fp = get_output_fp("submission.xml") if not isdir(output_dir): makedirs(output_dir) else: raise IOError("The output folder already exists: %s" % output_dir) with open(samp_fp, "U") as st, open(prep_fp, "U") as pt: submission = EBISubmission.from_templates_and_per_sample_fastqs( preprocessed_data_id_str, study.title, study.info["study_abstract"], investigation_type, st, pt, fastq_dir_fp, new_investigation_type=new_investigation_type, pmids=study.pmids, ) submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp, submission_fp, action) if send: submission.send_sequences() study_accession, submission_accession = submission.send_xml() if study_accession is None or submission_accession is None: preprocessed_data.update_insdc_status("failed") raise ComputeError("EBI Submission failed!") else: preprocessed_data.update_insdc_status("success", study_accession, submission_accession) else: study_accession, submission_accession = None, None return study_accession, submission_accession
class TestSampleTemplate(TestCase): """Tests the SampleTemplate class""" def setUp(self): metadata_dict = { 'Sample1': {'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status_id': 1, 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 1', 'str_column': 'Value for sample 1'}, 'Sample2': {'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status_id': 1, 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 2', 'str_column': 'Value for sample 2'}, 'Sample3': {'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status_id': 1, 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 3', 'str_column': 'Value for sample 3'} } self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') self.test_study = Study(1) info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "portal_type_id": 3, "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " "gut microbiome", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } self.new_study = Study.create(User('*****@*****.**'), "Fried Chicken Microbiome", [1], info) self.tester = SampleTemplate(1) self.exp_sample_ids = {'SKB1.640202', 'SKB2.640194', 'SKB3.640195', 'SKB4.640189', 'SKB5.640181', 'SKB6.640176', 'SKB7.640196', 'SKB8.640193', 'SKB9.640200', 'SKD1.640179', 'SKD2.640178', 'SKD3.640198', 'SKD4.640185', 'SKD5.640186', 'SKD6.640190', 'SKD7.640191', 'SKD8.640184', 'SKD9.640182', 'SKM1.640183', 'SKM2.640199', 'SKM3.640197', 'SKM4.640180', 'SKM5.640177', 'SKM6.640187', 'SKM7.640188', 'SKM8.640201', 'SKM9.640192'} self._clean_up_files = [] def tearDown(self): for f in self._clean_up_files: remove(f) def test_init_unknown_error(self): """Init raises an error if the id is not known""" with self.assertRaises(QiitaDBUnknownIDError): SampleTemplate(2) def test_init(self): """Init successfully instantiates the object""" st = SampleTemplate(1) self.assertTrue(st.id, 1) def test_table_name(self): """Table name return the correct string""" obs = SampleTemplate._table_name(self.test_study) self.assertEqual(obs, "sample_1") def test_create_duplicate(self): """Create raises an error when creating a duplicated SampleTemplate""" with self.assertRaises(QiitaDBDuplicateError): SampleTemplate.create(self.metadata, self.test_study) def test_create_duplicate_header(self): """Create raises an error when duplicate headers are present""" self.metadata['STR_COLUMN'] = pd.Series(['', '', ''], index=self.metadata.index) with self.assertRaises(QiitaDBDuplicateHeaderError): SampleTemplate.create(self.metadata, self.new_study) def test_create(self): """Creates a new SampleTemplate""" st = SampleTemplate.create(self.metadata, self.new_study) # The returned object has the correct id self.assertEqual(st.id, 2) # The relevant rows to required_sample_info have been added. obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.required_sample_info WHERE study_id=2") # study_id sample_id physical_location has_physical_specimen # has_extracted_data sample_type required_sample_info_status_id # collection_timestamp host_subject_id description exp = [[2, "Sample1", "location1", True, True, "type1", 1, datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", "Test Sample 1"], [2, "Sample2", "location1", True, True, "type1", 1, datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", "Test Sample 2"], [2, "Sample3", "location1", True, True, "type1", 1, datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", "Test Sample 3"]] self.assertEqual(obs, exp) # The relevant rows have been added to the study_sample_columns obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_sample_columns WHERE study_id=2") # study_id, column_name, column_type exp = [[2, "str_column", "varchar"]] self.assertEqual(obs, exp) # The new table exists self.assertTrue(exists_table("sample_2", self.conn_handler)) # The new table hosts the correct values obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.sample_2") # sample_id, str_column exp = [['Sample1', "Value for sample 1"], ['Sample2', "Value for sample 2"], ['Sample3', "Value for sample 3"]] self.assertEqual(obs, exp) def test_exists_true(self): """Exists returns true when the SampleTemplate already exists""" self.assertTrue(SampleTemplate.exists(self.test_study)) def test_exists_false(self): """Exists returns false when the SampleTemplate does not exists""" self.assertFalse(SampleTemplate.exists(self.new_study)) def test_get_sample_ids(self): """get_sample_ids returns the correct set of sample ids""" obs = self.tester._get_sample_ids(self.conn_handler) self.assertEqual(obs, self.exp_sample_ids) def test_len(self): """Len returns the correct number of sample ids""" self.assertEqual(len(self.tester), 27) def test_getitem(self): """Get item returns the correct sample object""" obs = self.tester['SKM7.640188'] exp = Sample('SKM7.640188', self.tester) self.assertEqual(obs, exp) def test_getitem_error(self): """Get item raises an error if key does not exists""" with self.assertRaises(KeyError): self.tester['Not_a_Sample'] def test_setitem(self): """setitem raises an error (currently not allowed)""" with self.assertRaises(QiitaDBNotImplementedError): self.tester['SKM7.640188'] = Sample('SKM7.640188', self.tester) def test_delitem(self): """delitem raises an error (currently not allowed)""" with self.assertRaises(QiitaDBNotImplementedError): del self.tester['SKM7.640188'] def test_iter(self): """iter returns an iterator over the sample ids""" obs = self.tester.__iter__() self.assertTrue(isinstance(obs, Iterable)) self.assertEqual(set(obs), self.exp_sample_ids) def test_contains_true(self): """contains returns true if the sample id exists""" self.assertTrue('SKM7.640188' in self.tester) def test_contains_false(self): """contains returns false if the sample id does not exists""" self.assertFalse('Not_a_Sample' in self.tester) def test_keys(self): """keys returns an iterator over the sample ids""" obs = self.tester.keys() self.assertTrue(isinstance(obs, Iterable)) self.assertEqual(set(obs), self.exp_sample_ids) def test_values(self): """values returns an iterator over the values""" obs = self.tester.values() self.assertTrue(isinstance(obs, Iterable)) exp = {Sample('SKB1.640202', self.tester), Sample('SKB2.640194', self.tester), Sample('SKB3.640195', self.tester), Sample('SKB4.640189', self.tester), Sample('SKB5.640181', self.tester), Sample('SKB6.640176', self.tester), Sample('SKB7.640196', self.tester), Sample('SKB8.640193', self.tester), Sample('SKB9.640200', self.tester), Sample('SKD1.640179', self.tester), Sample('SKD2.640178', self.tester), Sample('SKD3.640198', self.tester), Sample('SKD4.640185', self.tester), Sample('SKD5.640186', self.tester), Sample('SKD6.640190', self.tester), Sample('SKD7.640191', self.tester), Sample('SKD8.640184', self.tester), Sample('SKD9.640182', self.tester), Sample('SKM1.640183', self.tester), Sample('SKM2.640199', self.tester), Sample('SKM3.640197', self.tester), Sample('SKM4.640180', self.tester), Sample('SKM5.640177', self.tester), Sample('SKM6.640187', self.tester), Sample('SKM7.640188', self.tester), Sample('SKM8.640201', self.tester), Sample('SKM9.640192', self.tester)} # Creating a list and looping over it since unittest does not call # the __eq__ function on the objects for o, e in zip(sorted(list(obs), key=lambda x: x.id), sorted(exp, key=lambda x: x.id)): self.assertEqual(o, e) def test_items(self): """items returns an iterator over the (key, value) tuples""" obs = self.tester.items() self.assertTrue(isinstance(obs, Iterable)) exp = [('SKB1.640202', Sample('SKB1.640202', self.tester)), ('SKB2.640194', Sample('SKB2.640194', self.tester)), ('SKB3.640195', Sample('SKB3.640195', self.tester)), ('SKB4.640189', Sample('SKB4.640189', self.tester)), ('SKB5.640181', Sample('SKB5.640181', self.tester)), ('SKB6.640176', Sample('SKB6.640176', self.tester)), ('SKB7.640196', Sample('SKB7.640196', self.tester)), ('SKB8.640193', Sample('SKB8.640193', self.tester)), ('SKB9.640200', Sample('SKB9.640200', self.tester)), ('SKD1.640179', Sample('SKD1.640179', self.tester)), ('SKD2.640178', Sample('SKD2.640178', self.tester)), ('SKD3.640198', Sample('SKD3.640198', self.tester)), ('SKD4.640185', Sample('SKD4.640185', self.tester)), ('SKD5.640186', Sample('SKD5.640186', self.tester)), ('SKD6.640190', Sample('SKD6.640190', self.tester)), ('SKD7.640191', Sample('SKD7.640191', self.tester)), ('SKD8.640184', Sample('SKD8.640184', self.tester)), ('SKD9.640182', Sample('SKD9.640182', self.tester)), ('SKM1.640183', Sample('SKM1.640183', self.tester)), ('SKM2.640199', Sample('SKM2.640199', self.tester)), ('SKM3.640197', Sample('SKM3.640197', self.tester)), ('SKM4.640180', Sample('SKM4.640180', self.tester)), ('SKM5.640177', Sample('SKM5.640177', self.tester)), ('SKM6.640187', Sample('SKM6.640187', self.tester)), ('SKM7.640188', Sample('SKM7.640188', self.tester)), ('SKM8.640201', Sample('SKM8.640201', self.tester)), ('SKM9.640192', Sample('SKM9.640192', self.tester))] # Creating a list and looping over it since unittest does not call # the __eq__ function on the objects for o, e in zip(sorted(list(obs)), sorted(exp)): self.assertEqual(o, e) def test_get(self): """get returns the correct sample object""" obs = self.tester.get('SKM7.640188') exp = Sample('SKM7.640188', self.tester) self.assertEqual(obs, exp) def test_get_none(self): """get returns none if the sample id is not present""" self.assertTrue(self.tester.get('Not_a_Sample') is None) def test_to_file(self): """to file writes a tab delimited file with all the metadata""" fd, fp = mkstemp() close(fd) st = SampleTemplate.create(self.metadata, self.new_study) st.to_file(fp) self._clean_up_files.append(fp) with open(fp, 'U') as f: obs = f.read() self.assertEqual(obs, EXP_SAMPLE_TEMPLATE)
def post(self): user = self.current_user action = self.get_argument("action") # set required template variables results = {} meta_headers = [] counts = {} fullcounts = {} query = "" searchmsg = "" selsamples = {} selproc_data = {} # get analysis and selected samples if exists, or create if necessary if action == "create": name = self.get_argument('name') description = self.get_argument('description') analysis = Analysis.create(user, name, description) analysis_id = analysis.id # set to second step since this page is second step in workflow analysis.step = SELECT_SAMPLES # fill example studies by running query for specific studies search = QiitaStudySearch() def_query = 'study_id = 1 OR study_id = 2 OR study_id = 3' results, meta_headers = search(def_query, user) results, counts, fullcounts = self._parse_search_results( results, selsamples, meta_headers) else: analysis_id = int(self.get_argument("analysis-id")) analysis = Analysis(analysis_id) check_analysis_access(user, analysis) selproc_data, selsamples = self._selected_parser(analysis) # run through action requested if action == "search": search = QiitaStudySearch() query = str(self.get_argument("query")) try: results, meta_headers = search(query, user) except ParseException: searchmsg = "Malformed search query, please read search help." except QiitaDBIncompatibleDatatypeError as e: searchmsg = ''.join(e) if not results and not searchmsg: searchmsg = "No results found." else: results, counts, fullcounts = self._parse_search_results( results, selsamples, meta_headers) elif action == "select": analysis.add_samples(self._parse_form_select()) # rebuild the selected from database to reflect changes selproc_data, selsamples = self._selected_parser(analysis) elif action == "deselect": proc_data, samples = self._parse_form_deselect() if proc_data: analysis.remove_samples(proc_data=proc_data) if samples: analysis.remove_samples(samples=samples) if not proc_data and not samples: searchmsg = "Must select samples to remove from analysis!" # rebuild the selected from database to reflect changes selproc_data, selsamples = self._selected_parser(analysis) self.render('search_studies.html', user=user, aid=analysis_id, results=results, meta_headers=meta_headers, selsamples=selsamples, selproc_data=selproc_data, counts=counts, fullcounts=fullcounts, searchmsg=searchmsg, query=query, availmeta=SampleTemplate.metadata_headers() + get_table_cols("study"))
def setUp(self): metadata_dict = { 'Sample1': {'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status_id': 1, 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 1', 'str_column': 'Value for sample 1'}, 'Sample2': {'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status_id': 1, 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 2', 'str_column': 'Value for sample 2'}, 'Sample3': {'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status_id': 1, 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 3', 'str_column': 'Value for sample 3'} } self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') self.test_study = Study(1) info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "portal_type_id": 3, "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " "gut microbiome", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } self.new_study = Study.create(User('*****@*****.**'), "Fried Chicken Microbiome", [1], info) self.tester = SampleTemplate(1) self.exp_sample_ids = {'SKB1.640202', 'SKB2.640194', 'SKB3.640195', 'SKB4.640189', 'SKB5.640181', 'SKB6.640176', 'SKB7.640196', 'SKB8.640193', 'SKB9.640200', 'SKD1.640179', 'SKD2.640178', 'SKD3.640198', 'SKD4.640185', 'SKD5.640186', 'SKD6.640190', 'SKD7.640191', 'SKD8.640184', 'SKD9.640182', 'SKM1.640183', 'SKM2.640199', 'SKM3.640197', 'SKM4.640180', 'SKM5.640177', 'SKM6.640187', 'SKM7.640188', 'SKM8.640201', 'SKM9.640192'} self._clean_up_files = []
def display_template(self, study, user, msg, msg_level, top_tab=None, sub_tab=None, prep_tab=None): """Simple function to avoid duplication of code""" # getting the RawData and its prep templates available_raw_data = yield Task(self.get_raw_data, study.raw_data()) available_prep_templates = yield Task(self.get_prep_templates, available_raw_data) # set variable holding if we have files attached to all raw data or not raw_files = True if available_raw_data else False for r in available_raw_data: if not r.get_filepaths(): raw_files = False # set variable holding if we have all prep templates or not if available_prep_templates: def _test(item): return not item prep_templates = all( [_test(val) for val in viewvalues(available_prep_templates)]) else: prep_templates = False study_status = study.status user_level = user.level sample_template_exists = SampleTemplate.exists(study.id) # The general information of the study can be changed if the study is # not public or if the user is an admin, in which case they can always # modify the information of the study show_edit_btn = study_status != 'public' or user_level == 'admin' # Files can be added to a study only if the study is sandboxed # or if the user is the admin show_upload_btn = study_status == 'sandbox' or user_level == 'admin' # The request approval, approve study and make public buttons are # mutually exclusive. Only one of them will be shown, depending on the # current status of the study btn_to_show = None if (study_status == 'sandbox' and qiita_config.require_approval and sample_template_exists and raw_files and prep_templates): # The request approval button only appears if the study is # sandboxed, the qiita_config specifies that the approval should # be requested and the sample template, raw files and prep # prep templates have been added to the study btn_to_show = 'request_approval' elif (user_level == 'admin' and study_status == 'awaiting_approval' and qiita_config.require_approval): # The approve study button only appears if the user is an admin, # the study is waiting approval and the qiita config requires # study approval btn_to_show = 'approve_study' elif study_status == 'private': # The make public button only appers if the study is private btn_to_show = 'make_public' # The revert to sandbox button only appears if the study is not # sandboxed or public show_revert_btn = study_status not in {'sandbox', 'public'} self.render('study_description.html', message=msg, level=msg_level, study=study, study_title=study.title, study_alias=study.info['study_alias'], show_edit_btn=show_edit_btn, show_upload_btn=show_upload_btn, show_revert_btn=show_revert_btn, btn_to_show=btn_to_show, show_data_tabs=sample_template_exists, top_tab=top_tab, sub_tab=sub_tab, prep_tab=prep_tab)
def test_stats_from_df(self): obs = stats_from_df(dataframe_from_template(SampleTemplate(1))) for k in obs: self.assertEqual(obs[k], SUMMARY_STATS[k])
def test_retrieve_dropped_samples(self): # Create and populate second study to do test with info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "portal_type_id": 3, "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " "gut microbiome", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } metadata_dict = { 'SKB8.640193': {'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 1', 'str_column': 'Value for sample 1', 'latitude': 42.42, 'longitude': 41.41}, 'SKD8.640184': {'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 2', 'str_column': 'Value for sample 2', 'latitude': 4.2, 'longitude': 1.1}, 'SKB7.640196': {'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 3', 'str_column': 'Value for sample 3', 'latitude': 4.8, 'longitude': 4.41}, } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') Study.create(User("*****@*****.**"), "Test study 2", [1], info) SampleTemplate.create(metadata, Study(2)) mp = get_mountpoint("processed_data")[0][1] study_fp = join(mp, "2_study_1001_closed_reference_otu_table.biom") ProcessedData.create("processed_params_uclust", 1, [(study_fp, 6)], study=Study(2), data_type="16S") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample (analysis_id, " "processed_data_id, sample_id) VALUES " "(1,2,'2.SKB8.640193'), (1,2,'2.SKD8.640184'), " "(1,2,'2.SKB7.640196')") samples = {1: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'], 2: ['2.SKB8.640193', '2.SKD8.640184']} self.analysis._build_biom_tables(samples, 10000, conn_handler=self.conn_handler) exp = {1: {'1.SKM4.640180', '1.SKM9.640192'}, 2: {'2.SKB7.640196'}} self.assertEqual(self.analysis.dropped_samples, exp)
def test_init_unknown_error(self): """Init raises an error if the id is not known""" with self.assertRaises(QiitaDBUnknownIDError): SampleTemplate(2)