def submit_VAMPS(preprocessed_data_id): """Submit preprocessed data to VAMPS Parameters ---------- preprocessed_data_id : int The preprocesssed data id """ preprocessed_data = PreprocessedData(preprocessed_data_id) study = Study(preprocessed_data.study) sample_template = SampleTemplate(study.sample_template) prep_template = PrepTemplate(preprocessed_data.prep_template) status = preprocessed_data.submitted_to_vamps_status() if status in ('submitting', 'success'): raise ValueError("Cannot resubmit! Current status is: %s" % status) preprocessed_data.update_vamps_status('submitting') # Generating a tgz targz_folder = mkdtemp(prefix=qiita_config.working_dir) targz_fp = join( targz_folder, '%d_%d_%d.tgz' % (study.id, prep_template.id, preprocessed_data.id)) targz = taropen(targz_fp, mode='w:gz') # adding sample/prep samp_fp = join(targz_folder, 'sample_metadata.txt') sample_template.to_file(samp_fp) targz.add(samp_fp, arcname='sample_metadata.txt') prep_fp = join(targz_folder, 'prep_metadata.txt') prep_template.to_file(prep_fp) targz.add(prep_fp, arcname='prep_metadata.txt') # adding preprocessed data for _, fp, fp_type in preprocessed_data.get_filepaths(): if fp_type == 'preprocessed_fasta': targz.add(fp, arcname='preprocessed_fasta.fna') targz.close() # submitting cmd = ("curl -F user=%s -F pass='******' -F uploadFile=@%s -F " "press=UploadFile %s" % (qiita_config.vamps_user, qiita_config.vamps_pass, targz_fp, qiita_config.vamps_url)) obs, _, _ = system_call(cmd) exp = ("<html>\n<head>\n<title>Process Uploaded File</title>\n</head>\n" "<body>\n</body>\n</html>") if obs != exp: preprocessed_data.update_vamps_status('failure') return False else: preprocessed_data.update_vamps_status('success') return True
def update_sample_template(self, study, user, callback): """Update a sample template from the POST method Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done Raises ------ HTTPError If the sample template file does not exists """ # If we are on this function, the argument "sample_template" must # defined. If not, let tornado raise its error sample_template = self.get_argument('sample_template') # Define here the message and message level in case of success msg = "The sample template '%s' has been updated" % sample_template msg_level = "success" # Get the uploads folder _, base_fp = get_mountpoint("uploads")[0] # Get the path of the sample template in the uploads folder fp_rsp = join(base_fp, str(study.id), sample_template) if not exists(fp_rsp): # The file does not exist, fail nicely raise HTTPError(400, "This file doesn't exist: %s" % fp_rsp) try: with warnings.catch_warnings(record=True) as warns: # deleting previous uploads and inserting new one st = SampleTemplate(study.id) st.update(load_template_to_dataframe(fp_rsp)) # join all the warning messages into one. Note that this info # will be ignored if an exception is raised if warns: msg = '; '.join([str(w.message) for w in warns]) msg_level = 'warning' except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, KeyError, CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e: # Some error occurred while processing the sample template # Show the error to the user so they can fix the template msg = html_error_message % ('updating the sample template:', basename(fp_rsp), str(e)) msg_level = "danger" callback((msg, msg_level, None, None, None))
def setUp(self): self.sample_template = SampleTemplate(1) self.sample_id = 'SKB8.640193' self.tester = Sample(self.sample_id, self.sample_template) self.exp_categories = { 'physical_location', 'has_physical_specimen', 'has_extracted_data', 'sample_type', 'required_sample_info_status_id', 'collection_timestamp', 'host_subject_id', 'description', 'season_environment', 'assigned_from_geo', 'texture', 'taxon_id', 'depth', 'host_taxid', 'common_name', 'water_content_soil', 'elevation', 'temp', 'tot_nitro', 'samp_salinity', 'altitude', 'env_biome', 'country', 'ph', 'anonymized_name', 'tot_org_carb', 'description_duplicate', 'env_feature', 'latitude', 'longitude' }
def test_metadata_map_from_sample_and_prep_templates(self): obs = metadata_map_from_sample_and_prep_templates( SampleTemplate(1), PrepTemplate(1)) # We don't test the specific values as this would blow up the size # of this file as the amount of lines would go to ~1000 # 27 samples self.assertEqual(len(obs), 27) self.assertTrue( all(obs.index == pd.Index([ u'SKB1.640202', u'SKB2.640194', u'SKB3.640195', u'SKB4.640189', u'SKB5.640181', u'SKB6.640176', u'SKB7.640196', u'SKB8.640193', u'SKB9.640200', u'SKD1.640179', u'SKD2.640178', u'SKD3.640198', u'SKD4.640185', u'SKD5.640186', u'SKD6.640190', u'SKD7.640191', u'SKD8.640184', u'SKD9.640182', u'SKM1.640183', u'SKM2.640199', u'SKM3.640197', u'SKM4.640180', u'SKM5.640177', u'SKM6.640187', u'SKM7.640188', u'SKM8.640201', u'SKM9.640192' ], dtype='object'))) self.assertTrue( all(obs.columns == pd.Index([ u'tot_org_carb', u'common_name', u'has_extracted_data', u'water_content_soil', u'env_feature', u'assigned_from_geo', u'altitude', u'env_biome', u'texture', u'has_physical_specimen', u'description_duplicate', u'physical_location', u'latitude', u'ph', u'host_taxid', u'elevation', u'description', u'collection_timestamp', u'taxon_id', u'samp_salinity', u'host_subject_id', u'sample_type', u'season_environment', u'required_sample_info_status_id', u'temp', u'country', u'longitude', u'tot_nitro', u'depth', u'anonymized_name', u'experiment_center', u'center_name', u'run_center', u'run_prefix', u'data_type_id', u'target_gene', u'sequencing_meth', u'run_date', u'pcr_primers', u'linkerprimersequence', u'platform', u'library_construction_protocol', u'experiment_design_description', u'study_center', u'center_project_name', u'sample_center', u'samp_size', u'illumina_technology', u'experiment_title', u'emp_status_id', u'target_subfragment', u'barcodesequence' ], dtype='object')))
def render(self, study): study_info = study.info abstract = study_info['study_abstract'] description = study_info['study_description'] pmids = ", ".join([pubmed_linkifier([pmid]) for pmid in study.pmids]) princ_inv = StudyPerson(study_info['principal_investigator_id']) pi_link = study_person_linkifier((princ_inv.email, princ_inv.name)) number_samples_promised = study_info['number_samples_promised'] number_samples_collected = study_info['number_samples_collected'] metadata_complete = study_info['metadata_complete'] # Retrieve the files from the uploads folder, so the user can choose # the sample template of the study files = [f for _, f in get_files_from_uploads_folders(str(study.id))] # If the sample template exists, retrieve all its filepaths if SampleTemplate.exists(study.id): sample_templates = SampleTemplate(study.id).get_filepaths() else: # If the sample template does not exist, just pass an empty list sample_templates = [] # Check if the request came from a local source is_local_request = self._is_local() # The user can choose the sample template only if the study is # sandboxed or the current user is an admin show_select_sample = ( study.status == 'sandbox' or self.current_user.level == 'admin') return self.render_string( "study_description_templates/study_information_tab.html", abstract=abstract, description=description, pmids=pmids, principal_investigator=pi_link, number_samples_promised=number_samples_promised, number_samples_collected=number_samples_collected, metadata_complete=metadata_complete, show_select_sample=show_select_sample, files=files, study_id=study.id, sample_templates=sample_templates, is_local_request=is_local_request)
def display_template(self, preprocessed_data_id, msg, msg_level): """Simple function to avoid duplication of code""" preprocessed_data_id = int(preprocessed_data_id) try: preprocessed_data = PreprocessedData(preprocessed_data_id) except QiitaDBUnknownIDError: raise HTTPError(404, "PreprocessedData %d does not exist!" % preprocessed_data_id) else: user = self.current_user if user.level != 'admin': raise HTTPError(403, "No permissions of admin, " "get/EBISubmitHandler: %s!" % user.id) prep_template = PrepTemplate(preprocessed_data.prep_template) sample_template = SampleTemplate(preprocessed_data.study) study = Study(preprocessed_data.study) stats = [('Number of samples', len(prep_template)), ('Number of metadata headers', len(sample_template.metadata_headers()))] demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == 'preprocessed_demux'] demux_length = len(demux) if not demux_length: msg = ("Study does not appear to have demultiplexed " "sequences associated") msg_level = 'danger' elif demux_length > 1: msg = ("Study appears to have multiple demultiplexed files!") msg_level = 'danger' elif demux_length == 1: demux_file = demux[0] demux_file_stats = demux_stats(demux_file) stats.append(('Number of sequences', demux_file_stats.n)) msg_level = 'success' self.render('ebi_submission.html', study_title=study.title, stats=stats, message=msg, study_id=study.id, level=msg_level, preprocessed_data_id=preprocessed_data_id, investigation_type=prep_template.investigation_type)
def test_metadata_stats_from_sample_and_prep_templates(self): obs = metadata_stats_from_sample_and_prep_templates( SampleTemplate(1), PrepTemplate(1)) for k in obs: self.assertEqual(obs[k], SUMMARY_STATS[k])
def test_exists(self): """exists should raise an error if called from the base class""" with self.assertRaises(IncompetentQiitaDeveloperError): BaseSample.exists('SKM7.640188', SampleTemplate(1))
def test_init(self): """BaseSample init should raise an error (it's a base class)""" with self.assertRaises(IncompetentQiitaDeveloperError): BaseSample('SKM7.640188', SampleTemplate(1))
from os.path import join from time import strftime from qiita_db.util import get_mountpoint from qiita_db.sql_connection import SQLConnectionHandler from qiita_db.metadata_template import SampleTemplate, PrepTemplate conn_handler = SQLConnectionHandler() _id, fp_base = get_mountpoint('templates')[0] for study_id in conn_handler.execute_fetchall( "SELECT study_id FROM qiita.study"): study_id = study_id[0] if SampleTemplate.exists(study_id): st = SampleTemplate(study_id) fp = join(fp_base, '%d_%s.txt' % (study_id, strftime("%Y%m%d-%H%M%S"))) st.to_file(fp) st.add_filepath(fp) for prep_template_id in conn_handler.execute_fetchall( "SELECT prep_template_id FROM qiita.prep_template"): prep_template_id = prep_template_id[0] pt = PrepTemplate(prep_template_id) study_id = pt.study_id fp = join( fp_base, '%d_prep_%d_%s.txt' % (pt.study_id, prep_template_id, strftime("%Y%m%d-%H%M%S"))) pt.to_file(fp) pt.add_filepath(fp)
def test_init_unknown_error(self): """Init raises an error if the id is not known""" with self.assertRaises(QiitaDBUnknownIDError): SampleTemplate(2)
def setUp(self): metadata_dict = { 'Sample1': { 'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status_id': 1, 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 1', 'str_column': 'Value for sample 1', 'latitude': 42.42, 'longitude': 41.41 }, 'Sample2': { 'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status_id': 1, 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 2', 'str_column': 'Value for sample 2', 'latitude': 4.2, 'longitude': 1.1 }, 'Sample3': { 'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status_id': 1, 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 3', 'str_column': 'Value for sample 3', 'latitude': 4.8, 'longitude': 4.41 }, } self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') self.test_study = Study(1) info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "portal_type_id": 3, "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " "gut microbiome", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } self.new_study = Study.create(User('*****@*****.**'), "Fried Chicken Microbiome", [1], info) self.tester = SampleTemplate(1) self.exp_sample_ids = { 'SKB1.640202', 'SKB2.640194', 'SKB3.640195', 'SKB4.640189', 'SKB5.640181', 'SKB6.640176', 'SKB7.640196', 'SKB8.640193', 'SKB9.640200', 'SKD1.640179', 'SKD2.640178', 'SKD3.640198', 'SKD4.640185', 'SKD5.640186', 'SKD6.640190', 'SKD7.640191', 'SKD8.640184', 'SKD9.640182', 'SKM1.640183', 'SKM2.640199', 'SKM3.640197', 'SKM4.640180', 'SKM5.640177', 'SKM6.640187', 'SKM7.640188', 'SKM8.640201', 'SKM9.640192' } self._clean_up_files = []
def test_eq_false_type(self): """Equality returns false if types are not equal""" other = Sample(self.sample_id, SampleTemplate(1)) self.assertFalse(self.tester == other)
def test_init_wrong_template(self): """Raises an error if using a SampleTemplate instead of PrepTemplate""" with self.assertRaises(IncompetentQiitaDeveloperError): PrepSample('SKB8.640193', SampleTemplate(1))
def _extend_sample_template(self, st_id, fp_rpt): SampleTemplate(st_id).extend(load_template_to_dataframe(fp_rpt))
def test_stats_from_df(self): obs = stats_from_df(dataframe_from_template(SampleTemplate(1))) for k in obs: self.assertEqual(obs[k], SUMMARY_STATS[k])
def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None): """Submit a preprocessed data to EBI Parameters ---------- preprocessed_data_id : int The preprocesssed data id action : %s The action to perform with this data send : bool True to actually send the files fastq_dir_fp : str, optional The fastq filepath """ preprocessed_data = PreprocessedData(preprocessed_data_id) preprocessed_data_id_str = str(preprocessed_data_id) study = Study(preprocessed_data.study) sample_template = SampleTemplate(study.sample_template) prep_template = PrepTemplate(preprocessed_data.prep_template) investigation_type = None new_investigation_type = None status = preprocessed_data.submitted_to_insdc_status() if status in ('submitting', 'success'): raise ValueError("Cannot resubmit! Current status is: %s" % status) if send: # If we intend actually to send the files, then change the status in # the database preprocessed_data.update_insdc_status('submitting') # we need to figure out whether the investigation type is a known one # or if we have to submit a "new_investigation_type" to EBI current_type = prep_template.investigation_type ena_ontology = Ontology(convert_to_id('ENA', 'ontology')) if current_type in ena_ontology.terms: investigation_type = current_type elif current_type in ena_ontology.user_defined_terms: investigation_type = 'Other' new_investigation_type = current_type else: # This should never happen raise ValueError("Unrecognized investigation type: '%s'. This term " "is neither one of the official terms nor one of the " "user-defined terms in the ENA ontology") if fastq_dir_fp is not None: # If the user specifies a FASTQ directory, use it # Set demux_samples to None so that MetadataTemplate.to_file will put # all samples in the template files demux_samples = None else: # If the user does not specify a FASTQ directory, create one and # re-serialize the per-sample FASTQs from the demux file fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir) demux = [ path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == 'preprocessed_demux' ][0] # Keep track of which files were actually in the demux file so that we # can write those rows to the prep and samples templates demux_samples = set() with open_file(demux) as demux_fh: for samp, iterator in to_per_sample_ascii(demux_fh, list(sample_template)): demux_samples.add(samp) sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp) with gzopen(sample_fp, 'w') as fh: for record in iterator: fh.write(record) output_dir = fastq_dir_fp + '_submission' samp_fp = join(fastq_dir_fp, 'sample_metadata.txt') prep_fp = join(fastq_dir_fp, 'prep_metadata.txt') sample_template.to_file(samp_fp, demux_samples) prep_template.to_file(prep_fp, demux_samples) # Get specific output directory and set filepaths get_output_fp = partial(join, output_dir) study_fp = get_output_fp('study.xml') sample_fp = get_output_fp('sample.xml') experiment_fp = get_output_fp('experiment.xml') run_fp = get_output_fp('run.xml') submission_fp = get_output_fp('submission.xml') if not isdir(output_dir): makedirs(output_dir) else: raise IOError('The output folder already exists: %s' % output_dir) with open(samp_fp, 'U') as st, open(prep_fp, 'U') as pt: submission = EBISubmission.from_templates_and_per_sample_fastqs( preprocessed_data_id_str, study.title, study.info['study_abstract'], investigation_type, st, pt, fastq_dir_fp, new_investigation_type=new_investigation_type, pmids=study.pmids) submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp, submission_fp, action) if send: submission.send_sequences() study_accession, submission_accession = submission.send_xml() if study_accession is None or submission_accession is None: preprocessed_data.update_insdc_status('failed') raise ComputeError("EBI Submission failed!") else: preprocessed_data.update_insdc_status('success', study_accession, submission_accession) else: study_accession, submission_accession = None, None return study_accession, submission_accession
def test_init(self): """Init successfully instantiates the object""" st = SampleTemplate(1) self.assertTrue(st.id, 1)