def test_dataframe_from_template(self): template = PrepTemplate(1) obs = template.to_dataframe() # 27 samples self.assertEqual(len(obs), 27) self.assertTrue(set(obs.index), { u'SKB1.640202', u'SKB2.640194', u'SKB3.640195', u'SKB4.640189', u'SKB5.640181', u'SKB6.640176', u'SKB7.640196', u'SKB8.640193', u'SKB9.640200', u'SKD1.640179', u'SKD2.640178', u'SKD3.640198', u'SKD4.640185', u'SKD5.640186', u'SKD6.640190', u'SKD7.640191', u'SKD8.640184', u'SKD9.640182', u'SKM1.640183', u'SKM2.640199', u'SKM3.640197', u'SKM4.640180', u'SKM5.640177', u'SKM6.640187', u'SKM7.640188', u'SKM8.640201', u'SKM9.640192'}) self.assertTrue(set(obs.columns), { u'tot_org_carb', u'common_name', u'has_extracted_data', u'required_sample_info_status', u'water_content_soil', u'env_feature', u'assigned_from_geo', u'altitude', u'env_biome', u'texture', u'has_physical_specimen', u'description_duplicate', u'physical_location', u'latitude', u'ph', u'host_taxid', u'elevation', u'description', u'collection_timestamp', u'taxon_id', u'samp_salinity', u'host_subject_id', u'sample_type', u'season_environment', u'temp', u'country', u'longitude', u'tot_nitro', u'depth', u'anonymized_name', u'target_subfragment', u'sample_center', u'samp_size', u'run_date', u'experiment_center', u'pcr_primers', u'center_name', u'barcodesequence', u'run_center', u'run_prefix', u'library_construction_protocol', u'emp_status', u'linkerprimersequence', u'experiment_design_description', u'target_gene', u'center_project_name', u'illumina_technology', u'sequencing_meth', u'platform', u'experiment_title', u'study_center'})
def delete_prep_template(self, study, user, callback): """Delete the selected prep template Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done """ prep_template_id = int(self.get_argument('prep_template_id')) prep_id = prep_template_id try: PrepTemplate.delete(prep_template_id) msg = ("Prep template %d has been deleted" % prep_template_id) msg_level = "success" prep_id = None except Exception as e: msg = ("Couldn't remove prep template: %s" % str(e)) msg_level = "danger" callback((msg, msg_level, 'prep_template_tab', prep_id, None))
def test_prep_template_post_req(self): obs = prep_template_post_req(1, '*****@*****.**', 'update.txt', '16S', name=" ") exp = {'status': 'warning', 'message': [ ('Some columns required to generate a QIIME-compliant ' 'mapping file are not present in the template. A ' 'placeholder value (XXQIITAXX) has been used to populate ' 'these columns. Missing columns: BarcodeSequence, ' 'LinkerPrimerSequence'), ('Some functionality will be disabled due to missing ' 'columns:'), ('\tDemultiplexing with multiple input files disabled.: ' 'barcode, primer, run_prefix;'), '\tDemultiplexing disabled.: barcode;', ('\tEBI submission disabled: center_name, ' 'experiment_design_description, instrument_model, ' 'library_construction_protocol, platform.'), ('See the Templates tutorial for a description of these ' 'fields.')], 'file': 'update.txt', 'id': 'ignored in test'} self.assertItemsEqual(obs['message'].split('\n'), exp['message']) self.assertEqual(obs['status'], exp['status']) self.assertEqual(obs['file'], exp['file']) self.assertIsInstance(obs['id'], int) # Make sure new prep template added prep = PrepTemplate(obs['id']) self.assertEqual(prep.data_type(), '16S') self.assertEqual([x for x in prep.keys()], ['1.SKD6.640190']) self.assertEqual([x._to_dict() for x in prep.values()], [{'new_col': 'new_value'}]) self.assertEqual(prep.name, "Prep information %s" % prep.id)
def test_prep_template_post_req(self): obs = prep_template_post_req(1, '*****@*****.**', 'update.txt', '16S') exp = {'status': 'warning', 'message': [ 'Sample names were already prefixed with the study id.', ('Some columns required to generate a QIIME-compliant ' 'mapping file are not present in the template. A ' 'placeholder value (XXQIITAXX) has been used to populate ' 'these columns. Missing columns: BarcodeSequence, ' 'LinkerPrimerSequence'), ('Some functionality will be disabled due to missing ' 'columns:'), ('\tDemultiplexing with multiple input files disabled.: ' 'barcode, primer, run_prefix;'), '\tDemultiplexing disabled.: barcode, primer;', ('\tEBI submission disabled: center_name, ' 'experiment_design_description, instrument_model, ' 'library_construction_protocol, platform, primer.'), ('See the Templates tutorial for a description of these ' 'fields.')], 'file': 'update.txt', 'id': 'ignored in test'} self.assertItemsEqual(obs['message'].split('\n'), exp['message']) self.assertEqual(obs['status'], exp['status']) self.assertEqual(obs['file'], exp['file']) self.assertIsInstance(obs['id'], int) # Make sure new prep template added prep = PrepTemplate(obs['id']) self.assertEqual(prep.data_type(), '16S') self.assertEqual([x for x in prep.keys()], ['1.SKD6.640190']) self.assertEqual([x._to_dict() for x in prep.values()], [{'new_col': 'new_value'}])
def prep_template_samples_get_req(prep_id, user_id): """Returns list of samples in the prep template Parameters ---------- prep_id : int or str typecastable to int PrepTemplate id to get info for user_id : str User requesting the prep template info Returns ------- dict Returns summary information in the form {'status': str, 'message': str, 'samples': list of str} samples is list of samples in the template """ exists = _check_prep_template_exists(int(prep_id)) if exists['status'] != 'success': return exists prep = PrepTemplate(int(prep_id)) access_error = check_access(prep.study_id, user_id) if access_error: return access_error return {'status': 'success', 'message': '', 'samples': sorted(x for x in PrepTemplate(int(prep_id))) }
def prep_template_delete_req(prep_id, user_id): """Delete the prep template Parameters ---------- prep_id : int The prep template to update user_id : str The current user object id Returns ------- dict of str {'status': status, 'message': message} """ exists = _check_prep_template_exists(int(prep_id)) if exists['status'] != 'success': return exists prep = PrepTemplate(int(prep_id)) access_error = check_access(prep.study_id, user_id) if access_error: return access_error msg = '' status = 'success' try: PrepTemplate.delete(prep.id) except Exception as e: msg = str(e) status = 'error' return {'status': status, 'message': msg}
def prep_template_get_req(prep_id, user_id): """Gets the json of the full prep template Parameters ---------- prep_id : int PrepTemplate id to get info for user_id : str User requesting the sample template info Returns ------- dict of objects {'status': status, 'message': message, 'template': {sample: {column: value, ...}, ...} """ exists = _check_prep_template_exists(int(prep_id)) if exists['status'] != 'success': return exists prep = PrepTemplate(int(prep_id)) access_error = check_access(prep.study_id, user_id) if access_error: return access_error df = prep.to_dataframe() return {'status': 'success', 'message': '', 'template': df.to_dict(orient='index')}
def prep_template_filepaths_get_req(prep_id, user_id): """Returns all filepaths attached to a prep template Parameters ---------- prep_id : int The current prep template id user_id : int The current user object id Returns ------- dict of objects {'status': status, 'message': message, 'filepaths': [(filepath_id, filepath), ...]} """ exists = _check_prep_template_exists(int(prep_id)) if exists['status'] != 'success': return exists prep = PrepTemplate(int(prep_id)) access_error = check_access(prep.study_id, user_id) if access_error: return access_error return {'status': 'success', 'message': '', 'filepaths': prep.get_filepaths() }
def prep_template_delete_req(prep_id, user_id): """Delete the prep template Parameters ---------- prep_id : int The prep template to update user_id : str The current user object id Returns ------- dict of str {'status': status, 'message': message} """ exists = _check_prep_template_exists(int(prep_id)) if exists['status'] != 'success': return exists prep = PrepTemplate(int(prep_id)) access_error = check_access(prep.study_id, user_id) if access_error: return access_error msg = '' status = 'success' try: PrepTemplate.delete(prep.id) except Exception as e: msg = ("Couldn't remove prep template: %s" % str(e)) status = 'error' return {'status': status, 'message': msg}
def tearDown(self): for fp in self._clean_up_files: if exists(fp): remove(fp) study_id = self.new_study.id for pt in self.new_study.prep_templates(): PrepTemplate.delete(pt.id) if SampleTemplate.exists(study_id): SampleTemplate.delete(study_id) Study.delete(study_id)
def update_investigation_type(self, study, user, callback): """Updates the investigation type of a prep template Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done """ msg = "investigation type successfully updated" msg_level = "success" ppd_id = int(self.get_argument('ppd_id')) prep_id = self.get_argument('prep_id') edit_investigation_type = self.get_argument('edit-investigation-type', None) edit_user_defined_investigation_type = self.get_argument( 'edit-user-defined-investigation-type', None) edit_new_investigation_type = self.get_argument( 'edit-new-investigation-type', None) pt = PrepTemplate(prep_id) investigation_type = self._process_investigation_type( edit_investigation_type, edit_user_defined_investigation_type, edit_new_investigation_type) try: pt.investigation_type = investigation_type except QiitaDBColumnError as e: msg = html_error_message % (", invalid investigation type: ", investigation_type, str(e)) msg = convert_text_html(msg) msg_level = "danger" if ppd_id == 0: top_tab = "prep_template_tab" sub_tab = prep_id prep_tab = None else: top_tab = "preprocessed_data_tab" sub_tab = ppd_id prep_tab = None callback((msg, msg_level, top_tab, sub_tab, prep_tab))
def prep_template_summary_get_req(prep_id, user_id): """Get the summarized prep template data for each metadata column Parameters ---------- prep_id : int PrepTemplate id to get info for user_id : str User requesting the sample template info Returns ------- dict of objects Dictionary object where the keys are the metadata categories and the values are list of tuples. Each tuple is an observed value in the category and the number of times its seen. Format {'status': status, 'message': message, 'num_samples': value, 'category': [(val1, count1), (val2, count2), ...], 'editable': bool} """ exists = _check_prep_template_exists(int(prep_id)) if exists['status'] != 'success': return exists prep = PrepTemplate(int(prep_id)) access_error = check_access(prep.study_id, user_id) if access_error: return access_error editable = Study(prep.study_id).can_edit(User(user_id)) df = prep.to_dataframe() out = { 'num_samples': df.shape[0], 'summary': [], 'status': 'success', 'message': '', 'editable': editable } cols = sorted(list(df.columns)) for column in cols: counts = df[column].value_counts(dropna=False) out['summary'].append( (str(column), [(str(key), counts[key]) for key in natsorted(counts.index)])) return out
def test_post_valid(self): dontcare, uploads_dir = get_mountpoint('uploads')[0] foo_fp = os.path.join(uploads_dir, '1', 'foo.txt') bar_fp = os.path.join(uploads_dir, '1', 'bar.txt') with open(foo_fp, 'w') as fp: fp.write("@x\nATGC\n+\nHHHH\n") with open(bar_fp, 'w') as fp: fp.write("@x\nATGC\n+\nHHHH\n") prep = StringIO(EXP_PREP_TEMPLATE.format(1)) prep_table = load_template_to_dataframe(prep) response = self.post('/api/v1/study/1/preparation?data_type=16S', data=prep_table.T.to_dict(), headers=self.headers, asjson=True) prepid = json_decode(response.body)['id'] uri = '/api/v1/study/1/preparation/%d/artifact' % prepid # 1 -> fwd or rev sequences in fastq # 3 -> barcodes body = { 'artifact_type': 'FASTQ', 'filepaths': [['foo.txt', 1], ['bar.txt', 'raw_barcodes']], 'artifact_name': 'a name is a name' } response = self.post(uri, data=body, headers=self.headers, asjson=True) self.assertEqual(response.code, 201) obs = json_decode(response.body)['id'] prep_instance = PrepTemplate(prepid) exp = prep_instance.artifact.id self.assertEqual(obs, exp)
def prep_template_jobs_get_req(prep_id, user_id): """Returns graph of all artifacts created from the prep base artifact Parameters ---------- prep_id : int Prep template ID to get graph for user_id : str User making the request Returns ------- dict with the jobs information Notes ----- Nodes are identified by the corresponding Artifact ID. """ prep = PrepTemplate(int(prep_id)) access_error = check_access(prep.study_id, user_id) if access_error: return access_error job_info = r_client.get(PREP_TEMPLATE_KEY_FORMAT % prep_id) result = {} if job_info: job_info = defaultdict(lambda: '', loads(job_info)) job_id = job_info['job_id'] job = ProcessingJob(job_id) result[job.id] = {'status': job.status, 'step': job.step, 'error': job.log.msg if job.log else ""} return result
def post(self, study_id, prep_id): study = self.safe_get_study(study_id) if study is None: return prep_id = to_int(prep_id) try: p = PrepTemplate(prep_id) except QiitaDBUnknownIDError: self.fail('Preparation not found', 404) return if p.study_id != study.id: self.fail('Preparation ID not associated with the study', 409) return artifact_deets = json_decode(self.request.body) _, upload = get_mountpoint('uploads')[0] base = os.path.join(upload, study_id) filepaths = [(os.path.join(base, fp), fp_type) for fp, fp_type in artifact_deets['filepaths']] try: art = Artifact.create(filepaths, artifact_deets['artifact_type'], artifact_deets['artifact_name'], p) except QiitaError as e: self.fail(str(e), 406) return self.write({'id': art.id}) self.set_status(201) self.finish()
def test_copy_artifact(self): # Failure test job = self._create_job('copy_artifact', { 'artifact': 1, 'prep_template': 1 }) private_task(job.id) self.assertEqual(job.status, 'error') self.assertIn("Prep template 1 already has an artifact associated", job.log.msg) # Success test metadata_dict = { 'SKB8.640193': { 'center_name': 'ANL', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'GTCCGCAAGTTA', 'run_prefix': "s_G1_L001_sequences", 'platform': 'Illumina', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB' } } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) prep = PrepTemplate.create(metadata, Study(1), "16S") job = self._create_job('copy_artifact', { 'artifact': 1, 'prep_template': prep.id }) private_task(job.id) self.assertEqual(job.status, 'success')
def test_copy_artifact(self): # Failure test job = self._create_job('copy_artifact', {'artifact': 1, 'prep_template': 1}) private_task(job.id) self.assertEqual(job.status, 'error') self.assertIn("Prep template 1 already has an artifact associated", job.log.msg) # Success test metadata_dict = { 'SKB8.640193': {'center_name': 'ANL', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'GTCCGCAAGTTA', 'run_prefix': "s_G1_L001_sequences", 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB'}} metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) prep = PrepTemplate.create(metadata, Study(1), "16S") job = self._create_job('copy_artifact', {'artifact': 1, 'prep_template': prep.id}) private_task(job.id) self.assertEqual(job.status, 'success')
def prep_template_summary_get_req(prep_id, user_id): """Get the summarized prep template data for each metadata column Parameters ---------- prep_id : int PrepTemplate id to get info for user_id : str User requesting the sample template info Returns ------- dict of objects Dictionary object where the keys are the metadata categories and the values are list of tuples. Each tuple is an observed value in the category and the number of times its seen. Format {'status': status, 'message': message, 'num_samples': value, 'category': [(val1, count1), (val2, count2), ...], 'editable': bool} """ exists = _check_prep_template_exists(int(prep_id)) if exists['status'] != 'success': return exists prep = PrepTemplate(int(prep_id)) access_error = check_access(prep.study_id, user_id) if access_error: return access_error editable = Study(prep.study_id).can_edit(User(user_id)) df = prep.to_dataframe() out = {'num_samples': df.shape[0], 'summary': [], 'status': 'success', 'message': '', 'editable': editable} cols = sorted(list(df.columns)) for column in cols: counts = df[column].value_counts() out['summary'].append( (str(column), [(str(key), counts[key]) for key in natsorted(counts.index)])) return out
def remove_add_prep_template(self, fp_rpt, study, data_type_id, investigation_type): """add prep templates""" pt_id = PrepTemplate.create(load_template_to_dataframe(fp_rpt), study, _to_int(data_type_id), investigation_type=investigation_type).id remove(fp_rpt) return pt_id
def test_create_raw_data(self): fps = {'raw_barcodes': 'uploaded_file.txt', 'raw_forward_seqs': 'update.txt'} obs = create_raw_data("FASTQ", PrepTemplate(1), fps, name="New name") exp = {'status': 'danger', 'message': "Error creating artifact: Prep template 1 already " "has an artifact associated"} self.assertEqual(obs, exp)
def test_submit_EBI_step_2_failure(self): ppd = self.write_demux_files(PrepTemplate(1), True) pid = ppd.id with self.assertRaises(ComputeError): submit_EBI(pid, 'VALIDATE', True) rmtree(join(self.base_fp, '%d_ebi_submission' % pid), True)
def prep_template_graph_get_req(prep_id, user_id): """Returns graph of all artifacts created from the prep base artifact Parameters ---------- prep_id : int Prep template ID to get graph for user_id : str User making the request Returns ------- dict of lists of tuples A dictionary containing the edge list representation of the graph, and the node labels. Formatted as: {'status': status, 'message': message, 'edge_list': [(0, 1), (0, 2)...], 'node_labels': [(0, 'label0'), (1, 'label1'), ...]} Notes ----- Nodes are identified by the corresponding Artifact ID. """ exists = _check_prep_template_exists(int(prep_id)) if exists['status'] != 'success': return exists prep = PrepTemplate(int(prep_id)) access_error = check_access(prep.study_id, user_id) if access_error: return access_error # We should filter for only the public artifacts if the user # doesn't have full access to the study full_access = Study(prep.study_id).can_edit(User(user_id)) artifact = prep.artifact if artifact is None: return {'edges': [], 'nodes': [], 'status': 'success', 'message': ''} G = artifact.descendants_with_jobs nodes, edges, wf_id = get_network_nodes_edges(G, full_access) # nodes returns [node_type, node_name, element_id]; here we are looking # for the node_type == artifact, and check by the element/artifact_id if # it's being deleted artifacts_being_deleted = [a[2] for a in nodes if a[0] == 'artifact' and Artifact(a[2]).being_deleted_by is not None] return {'edges': edges, 'nodes': nodes, 'workflow': wf_id, 'status': 'success', 'artifacts_being_deleted': artifacts_being_deleted, 'message': ''}
def test_prep_template_post_req(self): obs = prep_template_post_req(1, '*****@*****.**', 'update.txt', '16S', name=" ") exp = { 'status': 'warning', 'message': [ 'Both a converter and dtype were specified for column ' 'sample_name - only the converter will be used', 'Some ' 'functionality will be disabled due to missing columns:', '\tEBI submission disabled: center_name, ' 'experiment_design_description, instrument_model, ' 'library_construction_protocol, platform;', '\tDemultiplexing disabled.: barcode;', '\tDemultiplexing ' 'with multiple input files disabled.: barcode, primer, ' 'run_prefix.', 'See the Templates tutorial for a ' 'description of these fields.', 'Some columns required to ' 'generate a QIIME-compliant mapping file are not present ' 'in the template. A placeholder value (XXQIITAXX) ' 'has been used to populate these columns. Missing columns: ' 'BarcodeSequence, LinkerPrimerSequence' ], 'file': 'update.txt', 'id': 'ignored in test' } self.assertCountEqual(obs['message'].split('\n'), exp['message']) self.assertEqual(obs['status'], exp['status']) self.assertEqual(obs['file'], exp['file']) self.assertIsInstance(obs['id'], int) # Make sure new prep template added prep = PrepTemplate(obs['id']) self.assertEqual(prep.data_type(), '16S') self.assertEqual([x for x in prep.keys()], ['1.SKD6.640190']) self.assertEqual([x._to_dict() for x in prep.values()], [{ 'new_col': 'new_value' }]) self.assertEqual(prep.name, "Prep information %s" % prep.id)
def test_post(self): new_prep_id = get_count('qiita.prep_template') + 1 arguments = {'study_id': '1', 'data-type': '16S', 'prep-file': 'new_template.txt'} response = self.post('/prep_template/', arguments) self.assertEqual(response.code, 200) # Check that the new prep template has been created self.assertTrue(PrepTemplate.exists(new_prep_id))
def test_post_valid_study(self): prep = StringIO(EXP_PREP_TEMPLATE.format(1)) prep_table = load_template_to_dataframe(prep) response = self.post('/api/v1/study/1/preparation?data_type=16S', data=prep_table.T.to_dict(), headers=self.headers, asjson=True) self.assertEqual(response.code, 201) exp = json_decode(response.body) exp_prep = PrepTemplate(exp['id']).to_dataframe() prep_table.index.name = 'sample_id' # sort columns to be comparable prep_table = prep_table[sorted(prep_table.columns.tolist())] exp_prep = exp_prep[sorted(exp_prep.columns.tolist())] exp_prep.drop('qiita_prep_id', axis=1, inplace=True) pd.util.testing.assert_frame_equal(prep_table, exp_prep)
def test_copy_raw_data(self): obs = copy_raw_data(PrepTemplate(1), 1) exp = { 'status': 'danger', 'message': "Error creating artifact: Prep template 1 already " "has an artifact associated" } self.assertEqual(obs, exp)
def get(self, prep_template_id): pid = int(prep_template_id) pt = PrepTemplate(pid) sid = pt.study_id self._check_permissions(sid) self._generate_files( 'experiment_accession', pt.ebi_experiment_accessions, 'ebi_experiment_accessions_study_%s_prep_%s.tsv' % (sid, pid))
def test_submit_EBI_parse_EBI_reply_failure(self): ppd = self.write_demux_files(PrepTemplate(1)) with self.assertRaises(ComputeError) as error: submit_EBI(ppd.id, 'VALIDATE', True) error = str(error.exception) self.assertIn('EBI Submission failed! Log id:', error) self.assertIn('The EBI submission failed:', error) self.assertIn('Failed to validate run xml, error: Expected element', error)
def test_submit_EBI_parse_EBI_reply_failure(self): ppd = self.write_demux_files(PrepTemplate(1)) pid = ppd.id with self.assertRaises(ComputeError) as error: submit_EBI(pid, 'VALIDATE', True) error = str(error.exception) self.assertIn('EBI Submission failed! Log id:', error) self.assertIn('The EBI submission failed:', error) rmtree(join(self.base_fp, '%d_ebi_submission' % pid), True)
def post(self): prep_id = self.get_argument('prep_id') msg_error = None data = None try: workflow = PrepTemplate(prep_id).add_default_workflow( self.current_user) data = workflow.id except Exception as error: msg_error = str(error) self.write({'data': data, 'msg_error': msg_error})
def update_prep_template(prep_id, fp): """Updates a prep template Parameters ---------- prep_id : int Prep template id to be updated fp : str The file path to the template file Returns ------- dict of {str: str} A dict of the form {'status': str, 'message': str} """ import warnings from os import remove from qiita_db.metadata_template.util import load_template_to_dataframe from qiita_db.metadata_template.prep_template import PrepTemplate msg = '' status = 'success' prep = PrepTemplate(prep_id) try: with warnings.catch_warnings(record=True) as warns: df = load_template_to_dataframe(fp) prep.extend(df) prep.update(df) remove(fp) if warns: msg = '\n'.join(set(str(w.message) for w in warns)) status = 'warning' except Exception as e: status = 'danger' msg = str(e) return {'status': status, 'message': msg}
def get(self, prep_template_id): pid = int(prep_template_id) pt = PrepTemplate(pid) sid = pt.study_id self._check_permissions(sid) st = SampleTemplate(sid) text = st.to_dataframe(samples=list(pt)).to_csv(None, sep='\t') self._finish_generate_files( 'sample_information_from_prep_%s.tsv' % pid, text)
def prep_template_graph_get_req(prep_id, user_id): """Returns graph of all artifacts created from the prep base artifact Parameters ---------- prep_id : int Prep template ID to get graph for user_id : str User making the request Returns ------- dict of lists of tuples A dictionary containing the edge list representation of the graph, and the node labels. Formatted as: {'status': status, 'message': message, 'edge_list': [(0, 1), (0, 2)...], 'node_labels': [(0, 'label0'), (1, 'label1'), ...]} Notes ----- Nodes are identified by the corresponding Artifact ID. """ exists = _check_prep_template_exists(int(prep_id)) if exists['status'] != 'success': return exists prep = PrepTemplate(int(prep_id)) access_error = check_access(prep.study_id, user_id) if access_error: return access_error # We should filter for only the public artifacts if the user # doesn't have full access to the study full_access = Study(prep.study_id).can_edit(User(user_id)) G = prep.artifact.descendants node_labels = [(n.id, ' - '.join([n.name, n.artifact_type])) for n in G.nodes() if full_access or n.visibility == 'public'] node_ids = [id_ for id_, label in node_labels] edge_list = [(n.id, m.id) for n, m in G.edges() if n.id in node_ids and m.id in node_ids] return { 'status': 'success', 'message': '', 'edge_list': edge_list, 'node_labels': node_labels }
def test_prep_template_post_req(self): new_id = get_count('qiita.prep_template') + 1 obs = prep_template_post_req(1, '*****@*****.**', 'update.txt', '16S') exp = {'status': 'warning', 'message': 'Sample names were already prefixed with the study ' 'id.\nSome functionality will be disabled due to ' 'missing columns:\n\tDemultiplexing with multiple ' 'input files disabled. If your raw data includes ' 'multiple raw input files, you will not be able to ' 'preprocess your raw data: barcode, primer, ' 'run_prefix;\n\tDemultiplexing disabled. You will ' 'not be able to preprocess your raw data: barcode, ' 'primer;\n\tEBI submission disabled: center_name, ' 'experiment_design_description, instrument_model, ' 'library_construction_protocol, platform, primer.' '\nSee the Templates tutorial for a description of ' 'these fields.\nSome columns required to generate a ' 'QIIME-compliant mapping file are not present in the' ' template. A placeholder value (XXQIITAXX) has been' ' used to populate these columns. Missing columns: ' 'BarcodeSequence, LinkerPrimerSequence', 'file': 'update.txt', 'id': new_id} self.assertItemsEqual(obs['message'].split('\n'), exp['message'].split('\n')) self.assertEqual(obs['status'], exp['status']) self.assertEqual(obs['file'], exp['file']) self.assertEqual(obs['id'], exp['id']) # Make sure new prep template added prep = PrepTemplate(new_id) self.assertEqual(prep.data_type(), '16S') self.assertEqual([x for x in prep.keys()], ['1.SKD6.640190']) self.assertEqual([x._to_dict() for x in prep.values()], [{'new_col': 'new_value'}])
def display_template(self, preprocessed_data_id, msg, msg_level): """Simple function to avoid duplication of code""" preprocessed_data_id = int(preprocessed_data_id) try: preprocessed_data = Artifact(preprocessed_data_id) except QiitaDBUnknownIDError: raise HTTPError( 404, "Artifact %d does not exist!" % preprocessed_data_id) else: user = self.current_user if user.level != 'admin': raise HTTPError( 403, "No permissions of admin, " "get/VAMPSSubmitHandler: %s!" % user.id) prep_template = PrepTemplate(preprocessed_data.prep_template) sample_template = SampleTemplate(preprocessed_data.study) study = Study(preprocessed_data.study) stats = [('Number of samples', len(prep_template)), ('Number of metadata headers', len(sample_template.categories()))] demux = [ path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == 'preprocessed_demux' ] demux_length = len(demux) if not demux_length: msg = ("Study does not appear to have demultiplexed " "sequences associated") msg_level = 'danger' elif demux_length > 1: msg = ("Study appears to have multiple demultiplexed files!") msg_level = 'danger' elif demux_length == 1: demux_file = demux[0] demux_file_stats = demux_stats(demux_file) stats.append(('Number of sequences', demux_file_stats.n)) msg_level = 'success' self.render('vamps_submission.html', study_title=study.title, stats=stats, message=msg, study_id=study.id, level=msg_level, preprocessed_data_id=preprocessed_data_id)
def test_prep_template_jobs_get_req(self): # Create a new template: metadata = pd.DataFrame.from_dict( { 'SKD6.640190': { 'center_name': 'ANL', 'target_subfragment': 'V4', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status': 'EMP', 'str_column': 'Value for sample 1', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'GTCCGCAAGTTA', 'run_prefix': "s_G1_L001_sequences", 'platform': 'Illumina', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB' } }, orient='index', dtype=str) pt = PrepTemplate.create(metadata, Study(1), '16S') # Check that it returns an empty dictionary when there are no jobs # attached to the prep template self.assertEqual(prep_template_jobs_get_req(pt.id, '*****@*****.**'), {}) # Create a job on the template prep_template_patch_req('*****@*****.**', 'remove', '/%s/10/columns/target_subfragment/' % pt.id) # To ensure a deterministic result, wait until the job is completed self._wait_for_parallel_job('prep_template_%s' % pt.id) obs = prep_template_jobs_get_req(pt.id, '*****@*****.**') self.assertEqual(len(obs), 1) self.assertCountEqual(obs.values(), [{ 'error': '', 'status': 'success', 'step': None }]) obs = prep_template_jobs_get_req(pt.id, '*****@*****.**') exp = { 'status': 'error', 'message': 'User does not have access to study' } self.assertEqual(obs, exp)
def _check_prep_template_exists(prep_id): """Make sure a prep template exists in the system Parameters ---------- prep_id : int or str castable to int PrepTemplate id to check Returns ------- dict {'status': status, 'message': msg} """ if not PrepTemplate.exists(int(prep_id)): return {'status': 'error', 'message': 'Prep template %d does not exist' % int(prep_id) } return {'status': 'success', 'message': ''}
def post(self, study_id, *args, **kwargs): data_type = self.get_argument('data_type') investigation_type = self.get_argument('investigation_type', None) study_id = self.safe_get_study(study_id) if study_id is None: return data = pd.DataFrame.from_dict(json_decode(self.request.body), orient='index') try: p = PrepTemplate.create(data, study_id, data_type, investigation_type) except QiitaError as e: self.fail(str(e), 406) return self.write({'id': p.id}) self.set_status(201) self.finish()
def test_prep_template_jobs_get_req(self): # Create a new template: metadata = pd.DataFrame.from_dict( {'SKD6.640190': {'center_name': 'ANL', 'target_subfragment': 'V4', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status': 'EMP', 'str_column': 'Value for sample 1', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'GTCCGCAAGTTA', 'run_prefix': "s_G1_L001_sequences", 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB'}}, orient='index', dtype=str) pt = PrepTemplate.create(metadata, Study(1), '16S') # Check that it returns an empty dictionary when there are no jobs # attached to the prep template self.assertEqual(prep_template_jobs_get_req(pt.id, '*****@*****.**'), {}) # Create a job on the template prep_template_patch_req( '*****@*****.**', 'remove', '/%s/10/columns/target_subfragment/' % pt.id) # To ensure a deterministic result, wait until the job is completed self._wait_for_parallel_job('prep_template_%s' % pt.id) obs = prep_template_jobs_get_req(pt.id, '*****@*****.**') self.assertEqual(len(obs), 1) self.assertEqual(obs.values(), [{'error': '', 'status': 'success', 'step': None}]) obs = prep_template_jobs_get_req(pt.id, '*****@*****.**') exp = {'status': 'error', 'message': 'User does not have access to study'} self.assertEqual(obs, exp)
def prep_template_patch_req(user_id, req_op, req_path, req_value=None, req_from=None): """Modifies an attribute of the prep template Parameters ---------- user_id : str The id of the user performing the patch operation req_op : str The operation to perform on the prep information req_path : str The prep information and attribute to patch req_value : str, optional The value that needs to be modified req_from : str, optional The original path of the element Returns ------- dict of {str, str} A dictionary with the following keys: - status: str, whether if the request is successful or not - message: str, if the request is unsuccessful, a human readable error """ if req_op == 'replace': req_path = [v for v in req_path.split('/') if v] # The structure of the path should be /prep_id/attribute_to_modify/ # so if we don't have those 2 elements, we should return an error if len(req_path) != 2: return {'status': 'error', 'message': 'Incorrect path parameter'} prep_id = int(req_path[0]) attribute = req_path[1] # Check if the user actually has access to the prep template prep = PrepTemplate(prep_id) access_error = check_access(prep.study_id, user_id) if access_error: return access_error status = 'success' msg = '' if attribute == 'investigation_type': prep.investigation_type = req_value elif attribute == 'data': fp = check_fp(prep.study_id, req_value) if fp['status'] != 'success': return fp fp = fp['file'] job_id = safe_submit(user_id, update_prep_template, prep_id, fp) r_client.set(PREP_TEMPLATE_KEY_FORMAT % prep_id, job_id) else: # We don't understand the attribute so return an error return {'status': 'error', 'message': 'Attribute "%s" not found. ' 'Please, check the path parameter' % attribute} return {'status': status, 'message': msg} else: return {'status': 'error', 'message': 'Operation "%s" not supported. ' 'Current supported operations: replace' % req_op}
def prep_template_ajax_get_req(user_id, prep_id): """Returns the prep tempalte information needed for the AJAX handler Parameters ---------- user_id : str The user id prep_id : int The prep template id Returns ------- dict of {str: object} A dictionary with the following keys: - status: str, whether the request is successful or not - message: str, if the request is unsuccessful, a human readable error - name: str, the name of the prep template - files: list of str, the files available to update the prep template - download_prep: int, the filepath_id of the prep file - download_qiime, int, the filepath_id of the qiime mapping file - num_samples: int, the number of samples present in the template - num_columns: int, the number of columns present in the template - investigation_type: str, the investigation type of the template - ontology: str, dict of {str, list of str} containing the information of the ENA ontology - artifact_attached: bool, whether the template has an artifact attached - study_id: int, the study id of the template """ # Currently there is no name attribute, but it will be soon name = "Prep information %d" % prep_id pt = PrepTemplate(prep_id) artifact_attached = pt.artifact is not None study_id = pt.study_id files = [f for _, f in get_files_from_uploads_folders(study_id) if f.endswith(('.txt', '.tsv'))] # The call to list is needed because keys is an iterator num_samples = len(list(pt.keys())) num_columns = len(pt.categories()) investigation_type = pt.investigation_type # Retrieve the information to download the prep template and QIIME # mapping file. See issue https://github.com/biocore/qiita/issues/1675 download_prep = [] download_qiime = [] for fp_id, fp in pt.get_filepaths(): if 'qiime' in basename(fp): download_qiime.append(fp_id) else: download_prep.append(fp_id) download_prep = download_prep[0] download_qiime = download_qiime[0] ontology = _get_ENA_ontology() job_id = r_client.get(PREP_TEMPLATE_KEY_FORMAT % prep_id) if job_id: redis_info = loads(r_client.get(job_id)) processing = redis_info['status_msg'] == 'Running' if processing: alert_type = 'info' alert_msg = 'This prep template is currently being updated' else: alert_type = redis_info['return']['status'] alert_msg = redis_info['return']['message'].replace('\n', '</br>') else: processing = False alert_type = '' alert_msg = '' editable = Study(study_id).can_edit(User(user_id)) and not processing return {'status': 'success', 'message': '', 'name': name, 'files': files, 'download_prep': download_prep, 'download_qiime': download_qiime, 'num_samples': num_samples, 'num_columns': num_columns, 'investigation_type': investigation_type, 'ontology': ontology, 'artifact_attached': artifact_attached, 'study_id': study_id, 'editable': editable, 'data_type': pt.data_type(), 'alert_type': alert_type, 'alert_message': alert_msg}
def update_prep_template(self, study, user, callback): """Update a prep template from the POST method Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done Raises ------ HTTPError If the prep template file does not exists """ # If we are on this function, the arguments "prep_template_id", # "update_prep_template_file" must defined. If not, let tornado # raise its error pt_id = int(self.get_argument('prep_template_id')) prep_template = self.get_argument('update_prep_template_file') # Define here the message and message level in case of success msg = "The prep template '%s' has been updated" % prep_template msg_level = "success" # Get the uploads folder _, base_fp = get_mountpoint("uploads")[0] # Get the path of the prep template in the uploads folder fp = join(base_fp, str(study.id), prep_template) if not exists(fp): # The file does not exist, fail nicely # Using 400 because we want the user to get the error in the GUI raise HTTPError(400, "This file doesn't exist: %s" % fp) try: with warnings.catch_warnings(record=True) as warns: pt = PrepTemplate(pt_id) df = load_template_to_dataframe(fp) pt.extend(df) pt.update(df) remove(fp) # join all the warning messages into one. Note that this info # will be ignored if an exception is raised if warns: msg = '\n'.join(set(str(w.message) for w in warns)) msg_level = 'warning' except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, KeyError, CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e: # Some error occurred while processing the sample template # Show the error to the user so they can fix the template msg = html_error_message % ('updating the prep template:', basename(fp), str(e)) msg = convert_text_html(msg) msg_level = "danger" callback((msg, msg_level, 'prep_template_tab', pt_id, None))
def test_delete_sample_or_column(self): st = SampleTemplate(1) # Delete a sample template column job = self._create_job('delete_sample_or_column', {'obj_class': 'SampleTemplate', 'obj_id': 1, 'sample_or_col': 'columns', 'name': 'season_environment'}) private_task(job.id) self.assertEqual(job.status, 'success') self.assertNotIn('season_environment', st.categories()) # Delete a sample template sample - need to add one # sample that we will remove npt.assert_warns( QiitaDBWarning, st.extend, pd.DataFrame.from_dict({'Sample1': {'taxon_id': '9606'}}, orient='index', dtype=str)) self.assertIn('1.Sample1', st.keys()) job = self._create_job('delete_sample_or_column', {'obj_class': 'SampleTemplate', 'obj_id': 1, 'sample_or_col': 'samples', 'name': '1.Sample1'}) private_task(job.id) self.assertEqual(job.status, 'success') self.assertNotIn('1.Sample1', st.keys()) # Delete a prep template column pt = PrepTemplate(1) job = self._create_job('delete_sample_or_column', {'obj_class': 'PrepTemplate', 'obj_id': 1, 'sample_or_col': 'columns', 'name': 'target_subfragment'}) private_task(job.id) self.assertEqual(job.status, 'success') self.assertNotIn('target_subfragment', pt.categories()) # Delete a prep template sample metadata = pd.DataFrame.from_dict( {'1.SKB8.640193': {'barcode': 'GTCCGCAAGTTA', 'primer': 'GTGCCAGCMGCCGCGGTAA'}, '1.SKD8.640184': {'barcode': 'CGTAGAGCTCTC', 'primer': 'GTGCCAGCMGCCGCGGTAA'}}, orient='index', dtype=str) pt = npt.assert_warns(QiitaDBWarning, PrepTemplate.create, metadata, Study(1), "16S") job = self._create_job('delete_sample_or_column', {'obj_class': 'PrepTemplate', 'obj_id': pt.id, 'sample_or_col': 'samples', 'name': '1.SKD8.640184'}) private_task(job.id) self.assertNotIn('1.SKD8.640184', pt.keys()) # Test exceptions job = self._create_job('delete_sample_or_column', {'obj_class': 'UnknownClass', 'obj_id': 1, 'sample_or_col': 'columns', 'name': 'column'}) private_task(job.id) self.assertEqual(job.status, 'error') self.assertIn('Unknown value "UnknownClass". Choose between ' '"SampleTemplate" and "PrepTemplate"', job.log.msg) job = self._create_job('delete_sample_or_column', {'obj_class': 'SampleTemplate', 'obj_id': 1, 'sample_or_col': 'unknown', 'name': 'column'}) private_task(job.id) self.assertEqual(job.status, 'error') self.assertIn('Unknown value "unknown". Choose between "samples" ' 'and "columns"', job.log.msg)
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type): """Returns the uploaded files for the study id categorized by artifact_type It retrieves the files uploaded for the given study and tries to guess on how those files should be added to the artifact of the given type. Uses information on the prep template to try to do a better guess. Parameters ---------- user_id : str The id of the user making the request study_id : int The study id prep_template_id : int The prep template id artifact_type : str The artifact type Returns ------- dict of {str: object} A dict of the form {'status': str, 'message': str, 'remaining': list of str, 'file_types': list of (str, bool, list of str), 'num_prefixes': int} where 'status' is a string specifying whether the query is successfull, 'message' is a human-readable description of the error (optional), 'remaining' is the list of files that could not be categorized, 'file_types' is a list of the available filetypes, if it is required or not and the list of categorized files for the given artifact type and 'num_prefixes' is the number of different run prefix values in the given prep template. """ supp_file_types = supported_filepath_types(artifact_type) selected = [] remaining = [] message = [] pt = PrepTemplate(prep_template_id) if pt.study_id != study_id: raise IncompetentQiitaDeveloperError( "The requested prep id (%d) doesn't belong to the study " "(%d)" % (pt.study_id, study_id)) uploaded = get_files_from_uploads_folders(study_id) pt = pt.to_dataframe() ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types if ft != 'raw_sff') if any(ftypes_if) and 'run_prefix' in pt.columns: prep_prefixes = tuple(set(pt['run_prefix'])) num_prefixes = len(prep_prefixes) # sorting prefixes by length to avoid collisions like: 100 1002 # 10003 prep_prefixes = sorted(prep_prefixes, key=len, reverse=True) # group files by prefix sfiles = defaultdict(list) for p in prep_prefixes: to_remove = [] for fid, f in uploaded: if f.startswith(p): sfiles[p].append(f) to_remove.append((fid, f)) uploaded = [x for x in uploaded if x not in to_remove] inuse = [y for x in sfiles.values() for y in x] remaining.extend([f for _, f in uploaded if f not in inuse]) supp_file_types_len = len(supp_file_types) for k, v in viewitems(sfiles): len_files = len(v) # if the number of files in the k group is larger than the # available columns add to the remaining group, if not put them in # the selected group if len_files > supp_file_types_len: remaining.extend(v) message.append("'%s' has %d matches." % (k, len_files)) else: v.sort() selected.append(v) else: num_prefixes = 0 remaining = [f for _, f in uploaded] # get file_types, format: filetype, required, list of files file_types = [(t, req, [x[i] for x in selected if i+1 <= len(x)]) for i, (t, req) in enumerate(supp_file_types)] # Create a list of artifacts that the user has access to, in case that # he wants to import the files from another artifact user = User(user_id) artifact_options = [] user_artifacts = user.user_artifacts(artifact_type=artifact_type) study = Study(study_id) if study not in user_artifacts: user_artifacts[study] = study.artifacts(artifact_type=artifact_type) for study, artifacts in viewitems(user_artifacts): study_label = "%s (%d)" % (study.title, study.id) for a in artifacts: artifact_options.append( (a.id, "%s - %s (%d)" % (study_label, a.name, a.id))) message = ('' if not message else '\n'.join(['Check these run_prefix:'] + message)) return {'status': 'success', 'message': message, 'remaining': sorted(remaining), 'file_types': file_types, 'num_prefixes': num_prefixes, 'artifacts': artifact_options}
def prep_template_patch_req(user_id, req_op, req_path, req_value=None, req_from=None): """Modifies an attribute of the prep template Parameters ---------- user_id : str The id of the user performing the patch operation req_op : str The operation to perform on the prep information req_path : str The prep information and attribute to patch req_value : str, optional The value that needs to be modified req_from : str, optional The original path of the element Returns ------- dict of {str, str, str} A dictionary with the following keys: - status: str, whether if the request is successful or not - message: str, if the request is unsuccessful, a human readable error - row_id: str, the row_id that we tried to delete """ req_path = [v for v in req_path.split('/') if v] if req_op == 'replace': # The structure of the path should be /prep_id/attribute_to_modify/ # so if we don't have those 2 elements, we should return an error if len(req_path) != 2: return {'status': 'error', 'message': 'Incorrect path parameter'} prep_id = int(req_path[0]) attribute = req_path[1] # Check if the user actually has access to the prep template prep = PrepTemplate(prep_id) access_error = check_access(prep.study_id, user_id) if access_error: return access_error status = 'success' msg = '' if attribute == 'investigation_type': prep.investigation_type = req_value elif attribute == 'data': fp = check_fp(prep.study_id, req_value) if fp['status'] != 'success': return fp fp = fp['file'] qiita_plugin = Software.from_name_and_version('Qiita', 'alpha') cmd = qiita_plugin.get_command('update_prep_template') params = Parameters.load( cmd, values_dict={'prep_template': prep_id, 'template_fp': fp}) job = ProcessingJob.create(User(user_id), params, True) r_client.set(PREP_TEMPLATE_KEY_FORMAT % prep_id, dumps({'job_id': job.id})) job.submit() elif attribute == 'name': prep.name = req_value.strip() else: # We don't understand the attribute so return an error return {'status': 'error', 'message': 'Attribute "%s" not found. ' 'Please, check the path parameter' % attribute} return {'status': status, 'message': msg} elif req_op == 'remove': # The structure of the path should be: # /prep_id/row_id/{columns|samples}/name if len(req_path) != 4: return {'status': 'error', 'message': 'Incorrect path parameter'} prep_id = int(req_path[0]) row_id = req_path[1] attribute = req_path[2] attr_id = req_path[3] # Check if the user actually has access to the study pt = PrepTemplate(prep_id) access_error = check_access(pt.study_id, user_id) if access_error: return access_error qiita_plugin = Software.from_name_and_version('Qiita', 'alpha') cmd = qiita_plugin.get_command('delete_sample_or_column') params = Parameters.load( cmd, values_dict={'obj_class': 'PrepTemplate', 'obj_id': prep_id, 'sample_or_col': attribute, 'name': attr_id}) job = ProcessingJob.create(User(user_id), params, True) # Store the job id attaching it to the sample template id r_client.set(PREP_TEMPLATE_KEY_FORMAT % prep_id, dumps({'job_id': job.id})) job.submit() return {'status': 'success', 'message': '', 'row_id': row_id} else: return {'status': 'error', 'message': 'Operation "%s" not supported. ' 'Current supported operations: replace, remove' % req_op, 'row_id': '0'}
st_df = st.to_dataframe()[columns] # converting to datetime for col in columns: st_df[col] = st_df[col].apply(transform_date) st.update(st_df) if cols_prep: with TRN: # a few notes: just getting the preps with duplicated values; ignoring # column 'sample_id' and tables 'study_sample', 'prep_template', # 'prep_template_sample' sql = """SELECT table_name, array_agg(column_name::text) FROM information_schema.columns WHERE column_name IN %s AND table_name LIKE 'prep_%%' AND table_name NOT IN ( 'prep_template', 'prep_template_sample') GROUP BY table_name""" # note that we are looking for those columns with duplicated names in # the headers TRN.add(sql, [tuple(set(cols_prep))]) for table, columns in viewitems(dict(TRN.execute_fetchindex())): # [1] the format is table_# so taking the # pt = PrepTemplate(int(table.split('_')[1])) # getting just the columns of interest pt_df = pt.to_dataframe()[columns] # converting to datetime for col in columns: pt_df[col] = pt_df[col].apply(transform_date) pt.update(pt_df)
def generate_new_study_with_preprocessed_data(self): """Creates a new study up to the processed data for testing""" # ignoring warnings generated when adding templates simplefilter("ignore") info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 3, "number_samples_promised": 3, "study_alias": "Test EBI", "study_description": "Study for testing EBI", "study_abstract": "Study for testing EBI", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } study = Study.create(User('*****@*****.**'), "Test EBI study", [1], info) metadata_dict = { 'Sample1': {'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 1'}, 'Sample2': {'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 2'}, 'Sample3': {'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 3'} } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) SampleTemplate.create(metadata, study) metadata_dict = { 'Sample1': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTC', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 1"}, 'Sample2': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTA', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 2"}, 'Sample3': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTT', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 3"}, } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics') fna_fp = join(self.temp_dir, 'seqs.fna') demux_fp = join(self.temp_dir, 'demux.seqs') with open(fna_fp, 'w') as f: f.write(FASTA_EXAMPLE_2.format(study.id)) with File(demux_fp, 'w') as f: to_hdf5(fna_fp, f) ppd = Artifact.create( [(demux_fp, 6)], "Demultiplexed", prep_template=pt) return ppd
def test_prep_template_patch_req(self): metadata = pd.DataFrame.from_dict( {'SKD6.640190': {'center_name': 'ANL', 'target_subfragment': 'V4', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status': 'EMP', 'str_column': 'Value for sample 1', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'GTCCGCAAGTTA', 'run_prefix': "s_G1_L001_sequences", 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB'}}, orient='index', dtype=str) pt = PrepTemplate.create(metadata, Study(1), '16S') # Update investigation type obs = prep_template_patch_req( '*****@*****.**', 'replace', '/%s/investigation_type' % pt.id, 'Cancer Genomics') exp = {'status': 'success', 'message': ''} self.assertEqual(obs, exp) self.assertEqual(pt.investigation_type, 'Cancer Genomics') # Update prep template data obs = prep_template_patch_req( '*****@*****.**', 'replace', '/%s/data' % pt.id, 'update.txt') self.assertEqual(obs, exp) obs = r_client.get('prep_template_%s' % pt.id) self.assertIsNotNone(obs) self._wait_for_parallel_job('prep_template_%s' % pt.id) # Delete a prep template column obs = prep_template_patch_req( '*****@*****.**', 'remove', '/%s/10/columns/target_subfragment/' % pt.id) exp = {'status': 'success', 'message': '', 'row_id': '10'} self.assertEqual(obs, exp) self._wait_for_parallel_job('prep_template_%s' % pt.id) self.assertNotIn('target_subfragment', pt.categories()) # Change the name of the prep template obs = prep_template_patch_req( '*****@*****.**', 'replace', '/%s/name' % pt.id, ' My New Name ') exp = {'status': 'success', 'message': ''} self.assertEqual(obs, exp) self.assertEqual(pt.name, 'My New Name') # Test all the errors # Operation not supported obs = prep_template_patch_req( '*****@*****.**', 'add', '/1/investigation_type', 'Cancer Genomics') exp = {'status': 'error', 'message': 'Operation "add" not supported. ' 'Current supported operations: replace, remove', 'row_id': '0'} self.assertEqual(obs, exp) # Incorrect path parameter obs = prep_template_patch_req( '*****@*****.**', 'replace', '/investigation_type', 'Cancer Genomics') exp = {'status': 'error', 'message': 'Incorrect path parameter'} self.assertEqual(obs, exp) # Incorrect attribute obs = prep_template_patch_req( '*****@*****.**', 'replace', '/1/other_attribute', 'Cancer Genomics') exp = {'status': 'error', 'message': 'Attribute "other_attribute" not found. ' 'Please, check the path parameter'} self.assertEqual(obs, exp) # User doesn't have access obs = prep_template_patch_req( '*****@*****.**', 'replace', '/%s/investigation_type' % pt.id, 'Cancer Genomics') exp = {'status': 'error', 'message': 'User does not have access to study'} self.assertEqual(obs, exp) # File does not exists obs = prep_template_patch_req( '*****@*****.**', 'replace', '/1/data', 'unknown_file.txt') exp = {'status': 'error', 'message': 'file does not exist', 'file': 'unknown_file.txt'} self.assertEqual(obs, exp)
with TRN: # a few notes: just getting the preps with duplicated values; ignoring # column 'sample_id' and tables 'study_sample', 'prep_template', # 'prep_template_sample' sql = """SELECT table_name, array_agg(column_name::text) FROM information_schema.columns WHERE column_name IN %s AND column_name != 'sample_id' AND table_name LIKE 'prep_%%' AND table_name NOT IN ( 'prep_template', 'prep_template_sample') GROUP BY table_name""" # note that we are looking for those columns with duplicated names in # the headers headers = set(PrepTemplate.metadata_headers()) & \ set(SampleTemplate.metadata_headers()) if headers: TRN.add(sql, [tuple(headers)]) overlapping = dict(TRN.execute_fetchindex()) else: overlapping = None if overlapping is not None: # finding actual duplicates for table_name, cols in viewitems(overlapping): # leaving print so when we patch in the main system we know that # nothing was renamed or deal with that print table_name with TRN:
def prep_template_ajax_get_req(user_id, prep_id): """Returns the prep tempalte information needed for the AJAX handler Parameters ---------- user_id : str The user id prep_id : int The prep template id Returns ------- dict of {str: object} A dictionary with the following keys: - status: str, whether the request is successful or not - message: str, if the request is unsuccessful, a human readable error - name: str, the name of the prep template - files: list of str, the files available to update the prep template - download_prep: int, the filepath_id of the prep file - download_qiime, int, the filepath_id of the qiime mapping file - num_samples: int, the number of samples present in the template - num_columns: int, the number of columns present in the template - investigation_type: str, the investigation type of the template - ontology: str, dict of {str, list of str} containing the information of the ENA ontology - artifact_attached: bool, whether the template has an artifact attached - study_id: int, the study id of the template """ pt = PrepTemplate(prep_id) name = pt.name # Initialize variables here processing = False alert_type = '' alert_msg = '' job_info = r_client.get(PREP_TEMPLATE_KEY_FORMAT % prep_id) if job_info: job_info = defaultdict(lambda: '', loads(job_info)) job_id = job_info['job_id'] job = ProcessingJob(job_id) job_status = job.status processing = job_status not in ('success', 'error') if processing: alert_type = 'info' alert_msg = 'This prep template is currently being updated' elif job_status == 'error': alert_type = 'danger' alert_msg = job.log.msg.replace('\n', '</br>') else: alert_type = job_info['alert_type'] alert_msg = job_info['alert_msg'].replace('\n', '</br>') artifact_attached = pt.artifact is not None study_id = pt.study_id files = [f for _, f, _ in get_files_from_uploads_folders(study_id) if f.endswith(('.txt', '.tsv'))] # The call to list is needed because keys is an iterator num_samples = len(list(pt.keys())) num_columns = len(pt.categories()) investigation_type = pt.investigation_type download_prep_id = None download_qiime_id = None other_filepaths = [] for fp_id, fp in pt.get_filepaths(): fp = basename(fp) if 'qiime' in fp: if download_qiime_id is None: download_qiime_id = fp_id else: if download_prep_id is None: download_prep_id = fp_id else: other_filepaths.append(fp) ontology = _get_ENA_ontology() editable = Study(study_id).can_edit(User(user_id)) and not processing return {'status': 'success', 'message': '', 'name': name, 'files': files, 'download_prep_id': download_prep_id, 'download_qiime_id': download_qiime_id, 'other_filepaths': other_filepaths, 'num_samples': num_samples, 'num_columns': num_columns, 'investigation_type': investigation_type, 'ontology': ontology, 'artifact_attached': artifact_attached, 'study_id': study_id, 'editable': editable, 'data_type': pt.data_type(), 'alert_type': alert_type, 'is_submitted_to_ebi': pt.is_submitted_to_ebi, 'alert_message': alert_msg}
def prep_template_post_req(study_id, user_id, prep_template, data_type, investigation_type=None, user_defined_investigation_type=None, new_investigation_type=None, name=None): """Adds a prep template to the system Parameters ---------- study_id : int Study to attach the prep template to user_id : str User adding the prep template prep_template : str Filepath to the prep template being added data_type : str Data type of the processed samples investigation_type: str, optional Existing investigation type to attach to the prep template user_defined_investigation_type: str, optional Existing user added investigation type to attach to the prep template new_investigation_type: str, optional Investigation type to add to the system name : str, optional The name of the new prep template Returns ------- dict of str {'status': status, 'message': message, 'file': prep_template, 'id': id} """ access_error = check_access(study_id, user_id) if access_error: return access_error fp_rpt = check_fp(study_id, prep_template) if fp_rpt['status'] != 'success': # Unknown filepath, so return the error message return fp_rpt fp_rpt = fp_rpt['file'] # Add new investigation type if needed investigation_type = _process_investigation_type( investigation_type, user_defined_investigation_type, new_investigation_type) msg = '' status = 'success' prep = None if name: name = name if name.strip() else None try: with warnings.catch_warnings(record=True) as warns: # deleting previous uploads and inserting new one prep = PrepTemplate.create( load_template_to_dataframe(fp_rpt), Study(study_id), data_type, investigation_type=investigation_type, name=name) remove(fp_rpt) # join all the warning messages into one. Note that this info # will be ignored if an exception is raised if warns: msg = '\n'.join(set(str(w.message) for w in warns)) status = 'warning' except Exception as e: # Some error occurred while processing the prep template # Show the error to the user so he can fix the template status = 'error' msg = str(e) info = {'status': status, 'message': msg, 'file': prep_template, 'id': prep.id if prep is not None else None} return info