def prep_template_graph_get_req(prep_id, user_id): """Returns graph of all artifacts created from the prep base artifact Parameters ---------- prep_id : int Prep template ID to get graph for user_id : str User making the request Returns ------- dict of lists of tuples A dictionary containing the edge list representation of the graph, and the node labels. Formatted as: {'status': status, 'message': message, 'edge_list': [(0, 1), (0, 2)...], 'node_labels': [(0, 'label0'), (1, 'label1'), ...]} Notes ----- Nodes are identified by the corresponding Artifact ID. """ exists = _check_prep_template_exists(int(prep_id)) if exists['status'] != 'success': return exists prep = PrepTemplate(int(prep_id)) access_error = check_access(prep.study_id, user_id) if access_error: return access_error # We should filter for only the public artifacts if the user # doesn't have full access to the study full_access = Study(prep.study_id).can_edit(User(user_id)) artifact = prep.artifact if artifact is None: return {'edges': [], 'nodes': [], 'status': 'success', 'message': ''} G = artifact.descendants_with_jobs nodes, edges, wf_id = get_network_nodes_edges(G, full_access) return { 'edges': edges, 'nodes': nodes, 'workflow': wf_id, 'status': 'success', 'message': '' }
def test_update_preprocessed_data_from_cmd(self): exp_ppd = PreprocessedData(Study(1).preprocessed_data()[0]) exp_fps = exp_ppd.get_filepaths() # The original paths mush exist, but they're not included in the test # so create them here for _, fp, _ in exp_fps: with open(fp, 'w') as f: f.write("") next_fp_id = get_count('qiita.filepath') + 1 exp_fps.append((next_fp_id, join(self.db_ppd_dir, "%s_split_library_log.txt" % exp_ppd.id), 'log')) ppd = update_preprocessed_data_from_cmd(self.test_slo, 1) # Check that the modified preprocessed data is the correct one self.assertEqual(ppd.id, exp_ppd.id) # Check that the filepaths returned are correct # We need to sort the list returned from the db because the ordering # on that list is based on db modification time, rather than id obs_fps = sorted(ppd.get_filepaths()) self.assertEqual(obs_fps, exp_fps) # Check that the checksums have been updated sql = "SELECT checksum FROM qiita.filepath WHERE filepath_id=%s" # Checksum of the fasta file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[0][0], ))[0] self.assertEqual(obs_checksum, '3532748626') # Checksum of the fastq file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[1][0], ))[0] self.assertEqual(obs_checksum, '2958832064') # Checksum of the demux file # The checksum is generated dynamically, so the checksum changes # We are going to test that the checksum is not the one that was # before, which corresponds to an empty file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[2][0], ))[0] self.assertTrue(isinstance(obs_checksum, str)) self.assertNotEqual(obs_checksum, '852952723') self.assertTrue(len(obs_checksum) > 0) # Checksum of the log file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[3][0], ))[0] self.assertEqual(obs_checksum, '626839734')
def study_tags_patch_request(user_id, study_id, req_op, req_path, req_value=None, req_from=None): """Modifies an attribute of the artifact Parameters ---------- user_id : int The id of the user performing the patch operation study_id : int The id of the study on which we will be performing the patch operation req_op : str The operation to perform on the study req_path : str The attribute to patch req_value : str, optional The value that needs to be modified req_from : str, optional The original path of the element Returns ------- dict of {str, str} A dictionary with the following keys: - status: str, whether if the request is successful or not - message: str, if the request is unsuccessful, a human readable error """ if req_op == 'replace': req_path = [v for v in req_path.split('/') if v] if len(req_path) != 1: return {'status': 'error', 'message': 'Incorrect path parameter'} attribute = req_path[0] # Check if the user actually has access to the study access_error = check_access(study_id, user_id) if access_error: return access_error study = Study(study_id) if attribute == 'tags': message = study.update_tags(User(user_id), req_value) return {'status': 'success', 'message': message} else: # We don't understand the attribute so return an error return {'status': 'error', 'message': 'Attribute "%s" not found. ' 'Please, check the path parameter' % attribute} else: return {'status': 'error', 'message': 'Operation "%s" not supported. ' 'Current supported operations: replace' % req_op}
def test_download_raw_data(self): # it's possible that one of the tests is deleting the raw data # so we will make sure that the files exists so this test passes study = Study(1) all_files = [x['fp'] for a in study.artifacts() for x in a.filepaths] for fp in all_files: if not exists(fp): with open(fp, 'w') as f: f.write('') response = self.get('/download_raw_data/1') self.assertEqual(response.code, 200) exp = ( '2125826711 58 /protected/raw_data/1_s_G1_L001_sequences.fastq.gz ' 'raw_data/1_s_G1_L001_sequences.fastq.gz\n' '2125826711 58 /protected/raw_data/' '1_s_G1_L001_sequences_barcodes.fastq.gz ' 'raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz\n' '- [0-9]* /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.txt ' 'mapping_files/1_mapping_file.txt\n' '1756512010 1093210 /protected/BIOM/7/biom_table.biom ' 'BIOM/7/biom_table.biom\n' '- [0-9]* /protected/templates/1_prep_2_qiime_[0-9]*-[0-9]*.txt ' 'mapping_files/7_mapping_file.txt\n') self.assertRegex(response.body.decode('ascii'), exp) response = self.get('/download_study_bioms/200') self.assertEqual(response.code, 405) # changing user so we can test the failures BaseHandler.get_current_user = Mock( return_value=User("*****@*****.**")) response = self.get('/download_study_bioms/1') self.assertEqual(response.code, 405) # now, let's make sure that when artifacts are public AND the # public_raw_download any user can download the files study.public_raw_download = True BaseHandler.get_current_user = Mock( return_value=User("*****@*****.**")) response = self.get('/download_study_bioms/1') self.assertEqual(response.code, 405) # 7 is an uploaded biom, which should now be available but as it's a # biom, only the prep info file will be retrieved Artifact(7).visibility = 'public' BaseHandler.get_current_user = Mock( return_value=User("*****@*****.**")) response = self.get('/download_study_bioms/1') self.assertEqual(response.code, 200) exp = ( '- [0-9]* /protected/templates/1_prep_2_qiime_[0-9]*-[0-9]*.txt ' 'mapping_files/7_mapping_file.txt\n') self.assertRegex(response.body.decode('ascii'), exp)
def submit_EBI_from_files(study_id, sample_template, prep_template, fastq_dir_fp, output_dir_fp, investigation_type, action, send): """EBI submission from files Parameters ---------- study_id : int The study id sample_template : File The file handler of the sample template file prep_template : File The file handler of the prep template file fastq_dir_fp : str The fastq filepath output_dir_fp : str The output directory investigation_type : str The investigation type string action : str The action to perform with this data, valid options are: %s send : bool True to actually send the files """ study = Study(study_id) study_id_str = str(study_id) # Get study-specific output directory and set filepaths get_output_fp = partial(join, output_dir_fp) study_fp = get_output_fp('study.xml') sample_fp = get_output_fp('sample.xml') experiment_fp = get_output_fp('experiment.xml') run_fp = get_output_fp('run.xml') submission_fp = get_output_fp('submission.xml') if not isdir(output_dir_fp): makedirs(output_dir_fp) else: raise ValueError('The output folder already exists: %s' % output_dir_fp) submission = EBISubmission.from_templates_and_per_sample_fastqs( study_id_str, study.title, study.info['study_abstract'], investigation_type, sample_template, prep_template, fastq_dir_fp) submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp, submission_fp, action) if send: submission.send_sequences() submission.send_xml()
def test_insert_processed_data_target_gene(self): fd, fna_fp = mkstemp(suffix='_seqs.fna') close(fd) fd, qual_fp = mkstemp(suffix='_seqs.qual') close(fd) filepaths = [ (fna_fp, convert_to_id('preprocessed_fasta', 'filepath_type')), (qual_fp, convert_to_id('preprocessed_fastq', 'filepath_type')) ] preprocessed_data = PreprocessedData.create( Study(1), "preprocessed_sequence_illumina_params", 1, filepaths, data_type="18S") params = ProcessedSortmernaParams(1) pick_dir = mkdtemp() path_builder = partial(join, pick_dir) db_path_builder = partial(join, get_mountpoint('processed_data')[0][1]) # Create a placeholder for the otu table with open(path_builder('otu_table.biom'), 'w') as f: f.write('\n') # Create a placeholder for the directory mkdir(path_builder('sortmerna_picked_otus')) # Create the log file fd, fp = mkstemp(dir=pick_dir, prefix='log_', suffix='.txt') close(fd) with open(fp, 'w') as f: f.write('\n') _insert_processed_data_target_gene(preprocessed_data, params, pick_dir) new_id = get_count('qiita.processed_data') # Check that the files have been copied db_files = [ db_path_builder("%s_otu_table.biom" % new_id), db_path_builder("%s_sortmerna_picked_otus" % new_id), db_path_builder("%s_%s" % (new_id, basename(fp))) ] for fp in db_files: self.assertTrue(exists(fp)) # Check that a new preprocessed data has been created self.assertTrue( self.conn_handler.execute_fetchone( "SELECT EXISTS(SELECT * FROM qiita.processed_data WHERE " "processed_data_id=%s)", (new_id, ))[0])
def study_prep_get_req(study_id, user_id): """Gives a summary of each prep template attached to the study Parameters ---------- study_id : int Study id to get prep template info for user_id : str User id requesting the prep templates Returns ------- dict of list of dict prep template information seperated by data type, in the form {data_type: [{prep 1 info dict}, ....], ...} """ access_error = check_access(study_id, user_id) if access_error: return access_error # Can only pass ids over API, so need to instantiate object study = Study(int(study_id)) prep_info = defaultdict(list) editable = study.can_edit(User(user_id)) for dtype in study.data_types: for prep in study.prep_templates(dtype): if prep.status != 'public' and not editable: continue start_artifact = prep.artifact info = { 'name': 'PREP %d NAME' % prep.id, 'id': prep.id, 'status': prep.status, } if start_artifact is not None: youngest_artifact = prep.artifact.youngest_artifact info['start_artifact'] = start_artifact.artifact_type info['start_artifact_id'] = start_artifact.id info['youngest_artifact'] = '%s - %s' % ( youngest_artifact.name, youngest_artifact.artifact_type) info['ebi_experiment'] = bool( [v for _, v in viewitems(prep.ebi_experiment_accessions) if v is not None]) else: info['start_artifact'] = None info['start_artifact_id'] = None info['youngest_artifact'] = None info['ebi_experiment'] = False prep_info[dtype].append(info) return {'status': 'success', 'message': '', 'info': prep_info}
def test_patch_sample_ids_already_exist(self): body = _sample_creator(['1.SKM8.640201', '1.SKM3.640197']) response = self.patch('/api/v1/study/1/samples', headers=self.headers, data=body, asjson=True) self.assertEqual(response.code, 200) df = Study(1).sample_template.to_dataframe() self.assertEqual(df.loc['1.SKM8.640201']['elevation'], '1') self.assertEqual(df.loc['1.SKM3.640197']['elevation'], '1') # make sure we didn't touch other samples self.assertNotEqual(df.loc['1.SKM4.640180']['elevation'], '1')
def get(self): data = self.get_argument("data", None) study_id = self.get_argument("study_id", None) data_type = self.get_argument("data_type", None) dtypes = get_data_types().keys() if data is None or study_id is None or data not in ('raw', 'biom'): raise HTTPError(422, reason='You need to specify both data (the ' 'data type you want to download - raw/biom) and ' 'study_id') elif data_type is not None and data_type not in dtypes: raise HTTPError(422, reason='Not a valid data_type. Valid types ' 'are: %s' % ', '.join(dtypes)) else: study_id = int(study_id) try: study = Study(study_id) except QiitaDBUnknownIDError: raise HTTPError(422, reason='Study does not exist') else: public_raw_download = study.public_raw_download if study.status != 'public': raise HTTPError(404, reason='Study is not public. If this ' 'is a mistake contact: ' '*****@*****.**') elif data == 'raw' and not public_raw_download: raise HTTPError(422, reason='No raw data access. If this ' 'is a mistake contact: ' '*****@*****.**') else: to_download = [] for a in study.artifacts(dtype=data_type, artifact_type='BIOM' if data == 'biom' else None): if a.visibility != 'public': continue to_download.extend(self._list_artifact_files_nginx(a)) if not to_download: raise HTTPError(422, reason='Nothing to download. If ' 'this is a mistake contact: ' '*****@*****.**') else: self._write_nginx_file_list(to_download) zip_fn = 'study_%d_%s_%s.zip' % ( study_id, data, datetime.now().strftime( '%m%d%y-%H%M%S')) self._set_nginx_headers(zip_fn) self.finish()
def get_info(self, portal="QIITA"): # Add the portals and, optionally, checkbox to the information studies = [s.id for s in Portal(portal).get_studies()] if not studies: return [] study_info = Study.get_info(studies, info_cols=self.study_cols) info = [] for s in study_info: # Make sure in correct order hold = dict(s) hold['portals'] = ', '.join(sorted(Study(s['study_id'])._portals)) info.append(hold) return info
def test_remove_filepath_errors(self): fp = join(self.db_test_raw_dir, '1_s_G1_L001_sequences.fastq.gz') with self.assertRaises(QiitaDBError): RawData(1).remove_filepath(fp) # filepath doesn't belong to that raw data with self.assertRaises(ValueError): RawData(2).remove_filepath(fp) # the raw data has been linked to more than 1 study so it can't be # unliked Study(2).add_raw_data([RawData(2)]) with self.assertRaises(QiitaDBError): RawData(2).remove_filepath(fp)
def prep_template_graph_get_req(prep_id, user_id): """Returns graph of all artifacts created from the prep base artifact Parameters ---------- prep_id : int Prep template ID to get graph for user_id : str User making the request Returns ------- dict of lists of tuples A dictionary containing the edge list representation of the graph, and the node labels. Formatted as: {'status': status, 'message': message, 'edge_list': [(0, 1), (0, 2)...], 'node_labels': [(0, 'label0'), (1, 'label1'), ...]} Notes ----- Nodes are identified by the corresponding Artifact ID. """ exists = _check_prep_template_exists(int(prep_id)) if exists['status'] != 'success': return exists prep = PrepTemplate(int(prep_id)) access_error = check_access(prep.study_id, user_id) if access_error: return access_error # We should filter for only the public artifacts if the user # doesn't have full access to the study full_access = Study(prep.study_id).can_edit(User(user_id)) G = prep.artifact.descendants node_labels = [(n.id, ' - '.join([n.name, n.artifact_type])) for n in G.nodes() if full_access or n.visibility == 'public'] node_ids = [id_ for id_, label in node_labels] edge_list = [(n.id, m.id) for n, m in G.edges() if n.id in node_ids and m.id in node_ids] return { 'status': 'success', 'message': '', 'edge_list': edge_list, 'node_labels': node_labels }
def remove_add_study_template(self, raw_data, study_id, fp_rsp): """Replace prep templates, raw data, and sample template with a new one """ for rd in raw_data(): rd = RawData(rd) for pt in rd.prep_templates: if PrepTemplate.exists(pt): PrepTemplate.delete(pt) if SampleTemplate.exists(study_id): SampleTemplate.delete(study_id) SampleTemplate.create(load_template_to_dataframe(fp_rsp), Study(study_id)) remove(fp_rsp)
def post(self): # vars to add files to raw data study_id = self.get_argument('study_id') raw_data_id = self.get_argument('raw_data_id') barcodes_str = self.get_argument('barcodes') forward_reads_str = self.get_argument('forward') sff_str = self.get_argument('sff') fasta_str = self.get_argument('fasta') qual_str = self.get_argument('qual') reverse_reads_str = self.get_argument('reverse') study_id = int(study_id) try: study = Study(study_id) except QiitaDBUnknownIDError: # Study not in database so fail nicely raise HTTPError(404, "Study %d does not exist" % study_id) else: check_access(self.current_user, study, raise_error=True) def _split(x): return x.split(',') if x else [] filepaths, fps = [], [] fps.append((_split(barcodes_str), 'raw_barcodes')) fps.append((_split(fasta_str), 'raw_fasta')) fps.append((_split(qual_str), 'raw_qual')) fps.append((_split(forward_reads_str), 'raw_forward_seqs')) fps.append((_split(reverse_reads_str), 'raw_reverse_seqs')) fps.append((_split(sff_str), 'raw_sff')) for _, f in get_mountpoint("uploads", retrieve_all=True): f = join(f, str(study_id)) for fp_set, filetype in fps: for t in fp_set: ft = join(f, t) if exists(ft): filepaths.append((ft, filetype)) job_id = submit(self.current_user.id, add_files_to_raw_data, raw_data_id, filepaths) self.render( 'compute_wait.html', job_id=job_id, title='Adding files to your raw data', completion_redirect=( '/study/description/%s?top_tab=raw_data_tab&sub_tab=%s' % (study_id, raw_data_id)))
def test_get_selected(self): s = Study(1) u = User('*****@*****.**') args = {'selected': u.id, 'study_id': s.id} response = self.get('/study/sharing/', args) self.assertEqual(response.code, 200) exp = { 'users': ['*****@*****.**', u.id], 'links': ('<a target="_blank" href="mailto:[email protected]">Shared</a>, ' '<a target="_blank" href="mailto:[email protected]">Admin</a>') } self.assertEqual(loads(response.body), exp) self.assertEqual(s.shared_with, [User('*****@*****.**'), u])
def display_template(self, study_id, msg): """Simple function to avoid duplication of code""" study_id = int(study_id) study = Study(study_id) user = self.current_user check_access(user, study, no_public=True, raise_error=True) # getting the ontologies self.render('upload.html', study_title=study.title, study_info=study.info, study_id=study_id, is_admin=user.level == 'admin', extensions=','.join(qiita_config.valid_upload_extension), max_upload_size=qiita_config.max_upload_size, files=get_files_from_uploads_folders(str(study_id)))
def test_artifact_post_req(self): # Create new prep template to attach artifact to pt = npt.assert_warns(QiitaDBWarning, PrepTemplate.create, pd.DataFrame({'new_col': { '1.SKD6.640190': 1 }}), Study(1), '16S') self._files_to_remove.extend([fp for _, fp in pt.get_filepaths()]) filepaths = { 'raw_forward_seqs': 'uploaded_file.txt', 'raw_barcodes': 'update.txt' } obs = artifact_post_req('*****@*****.**', filepaths, 'FASTQ', 'New Test Artifact', pt.id) exp = {'status': 'success', 'message': ''} self.assertEqual(obs, exp) wait_for_prep_information_job(pt.id) # Test importing an artifact # Create new prep template to attach artifact to pt = npt.assert_warns(QiitaDBWarning, PrepTemplate.create, pd.DataFrame({'new_col': { '1.SKD6.640190': 1 }}), Study(1), '16S') self._files_to_remove.extend([fp for _, fp in pt.get_filepaths()]) new_artifact_id = get_count('qiita.artifact') + 1 obs = artifact_post_req('*****@*****.**', {}, 'Demultiplexed', 'New Test Artifact 2', pt.id, 3) exp = {'status': 'success', 'message': ''} self.assertEqual(obs, exp) wait_for_prep_information_job(pt.id) # Instantiate the artifact to make sure it was made and # to clean the environment a = Artifact(new_artifact_id) self._files_to_remove.extend([fp for _, fp, _ in a.filepaths])
def _get_template_variables(self, preprocessed_data_id, callback): """Generates all the variables needed to render the template Parameters ---------- preprocessed_data_id : int The preprocessed data identifier callback : function The callback function to call with the results once the processing is done Raises ------ HTTPError If the preprocessed data does not have a log file """ # Get the objects and check user privileges ppd = PreprocessedData(preprocessed_data_id) study = Study(ppd.study) check_access(self.current_user, study, raise_error=True) # Get the return address back_button_path = self.get_argument( 'back_button_path', '/study/description/%d?top_tab=preprocessed_data_tab&sub_tab=%s' % (study.id, preprocessed_data_id)) # Get all the filepaths attached to the preprocessed data files_tuples = ppd.get_filepaths() # Group the files by filepath type files = defaultdict(list) for _, fp, fpt in files_tuples: files[fpt].append(fp) try: log_path = files['log'][0] except KeyError: raise HTTPError(500, "Log file not found in preprocessed data %s" % preprocessed_data_id) with open(log_path, 'U') as f: contents = f.read() contents = contents.replace('\n', '<br/>') contents = contents.replace('\t', ' ') title = 'Preprocessed Data: %d' % preprocessed_data_id callback((title, contents, back_button_path))
def display_template(self, study_id, msg): """Simple function to avoid duplication of code""" study_id = int(study_id) study = Study(study_id) user = self.current_user level = 'info' message = '' remote_url = '' remote_files = [] check_access(user, study, no_public=True, raise_error=True) job_info = r_client.get(UPLOAD_STUDY_FORMAT % study_id) if job_info: job_info = defaultdict(lambda: '', loads(job_info)) job_id = job_info['job_id'] job = ProcessingJob(job_id) job_status = job.status processing = job_status not in ('success', 'error') url = job.parameters.values['url'] if processing: if job.command.name == 'list_remote_files': message = 'Retrieving remote files: listing %s' % url else: message = 'Retrieving remote files: download %s' % url elif job_status == 'error': level = 'danger' message = job.log.msg.replace('\n', '</br>') # making errors nicer for users if 'No such file' in message: message = 'URL not valid: <i>%s</i>, please review.' % url else: remote_url = job_info['url'] remote_files = job_info['files'] level = job_info['alert_type'] message = job_info['alert_msg'].replace('\n', '</br>') # getting the ontologies self.render('upload.html', study_title=study.title, study_info=study.info, study_id=study_id, is_admin=user.level == 'admin', extensions=','.join(qiita_config.valid_upload_extension), max_upload_size=qiita_config.max_upload_size, level=level, message=message, remote_url=remote_url, remote_files=remote_files, files=get_files_from_uploads_folders(str(study_id)))
def preprocessor(study_id, prep_template_id, param_id, param_constructor): """Dispatch for preprocessor work""" study = Study(study_id) prep_template = PrepTemplate(prep_template_id) params = param_constructor(param_id) sp = StudyPreprocessor() try: preprocess_out = sp(study, prep_template, params) except Exception as e: error_msg = ''.join(format_exception_only(e, exc_info())) prep_template.preprocessing_status = "failed: %s" % error_msg preprocess_out = None return preprocess_out
def setUp(self): fd, self.seqs_fp = mkstemp(suffix='_seqs.fastq') close(fd) fd, self.barcodes_fp = mkstemp(suffix='_barcodes.fastq') close(fd) self.filetype = 2 self.filepaths = [(self.seqs_fp, 1), (self.barcodes_fp, 2)] self.studies = [Study(1)] self.db_test_raw_dir = join(get_db_files_base_dir(), 'raw_data') with open(self.seqs_fp, "w") as f: f.write("\n") with open(self.barcodes_fp, "w") as f: f.write("\n") self._clean_up_files = []
def get(self, study_id): fp = get_study_fp(study_id) if exists(fp): fs = [f for f in listdir(fp)] else: fs = [] fts = [' '.join(k.split('_')[1:]) for k in get_filetypes().keys() if k.startswith('raw_')] self.render('study_description.html', user=self.current_user, study_info=Study(study_id).info, study_id=study_id, files=fs, max_upoad_size=qiita_config.max_upoad_size, filetypes=fts)
def display_template(self, preprocessed_data_id, msg, msg_level): """Simple function to avoid duplication of code""" preprocessed_data_id = int(preprocessed_data_id) try: preprocessed_data = Artifact(preprocessed_data_id) except QiitaDBUnknownIDError: raise HTTPError( 404, "Artifact %d does not exist!" % preprocessed_data_id) else: user = self.current_user if user.level != 'admin': raise HTTPError( 403, "No permissions of admin, " "get/VAMPSSubmitHandler: %s!" % user.id) prep_template = PrepTemplate(preprocessed_data.prep_template) sample_template = SampleTemplate(preprocessed_data.study) study = Study(preprocessed_data.study) stats = [('Number of samples', len(prep_template)), ('Number of metadata headers', len(sample_template.categories()))] demux = [ path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == 'preprocessed_demux' ] demux_length = len(demux) if not demux_length: msg = ("Study does not appear to have demultiplexed " "sequences associated") msg_level = 'danger' elif demux_length > 1: msg = ("Study appears to have multiple demultiplexed files!") msg_level = 'danger' elif demux_length == 1: demux_file = demux[0] demux_file_stats = demux_stats(demux_file) stats.append(('Number of sequences', demux_file_stats.n)) msg_level = 'success' self.render('vamps_submission.html', study_title=study.title, stats=stats, message=msg, study_id=study.id, level=msg_level, preprocessed_data_id=preprocessed_data_id)
def prep_template_summary_get_req(prep_id, user_id): """Get the summarized prep template data for each metadata column Parameters ---------- prep_id : int PrepTemplate id to get info for user_id : str User requesting the sample template info Returns ------- dict of objects Dictionary object where the keys are the metadata categories and the values are list of tuples. Each tuple is an observed value in the category and the number of times its seen. Format {'status': status, 'message': message, 'num_samples': value, 'category': [(val1, count1), (val2, count2), ...], 'editable': bool} """ exists = _check_prep_template_exists(int(prep_id)) if exists['status'] != 'success': return exists prep = PrepTemplate(int(prep_id)) access_error = check_access(prep.study_id, user_id) if access_error: return access_error editable = Study(prep.study_id).can_edit(User(user_id)) df = prep.to_dataframe() out = { 'num_samples': df.shape[0], 'summary': [], 'status': 'success', 'message': '', 'editable': editable } cols = sorted(list(df.columns)) for column in cols: counts = df[column].value_counts(dropna=False) out['summary'].append( (str(column), [(str(key), counts[key]) for key in natsorted(counts.index)])) return out
def get(self): study_id = int(self.get_argument('study_id')) study = Study(study_id) _check_owner(self.current_user, study) selected = self.get_argument('selected', None) deselected = self.get_argument('deselected', None) if selected is not None: yield Task(self._share, study, selected) if deselected is not None: yield Task(self._unshare, study, deselected) users, links = yield Task(self._get_shared_for_study, study) self.write(dumps({'users': users, 'links': links}))
def test_get_deselected(self): s = Study(1) u = User('*****@*****.**') args = {'deselected': u.id, 'id': s.id} self.assertEqual(s.shared_with, [u]) response = self.get('/study/sharing/', args) self.assertEqual(response.code, 200) exp = {'users': [], 'links': ''} self.assertEqual(loads(response.body), exp) self.assertEqual(s.shared_with, []) # Make sure unshared message added to the system self.assertEqual( 'Study \'Identification of the Microbiomes for ' 'Cannabis Soils\' has been unshared from you.', u.messages()[0][1])
def test_prep_template_jobs_get_req(self): # Create a new template: metadata = pd.DataFrame.from_dict( { 'SKD6.640190': { 'center_name': 'ANL', 'target_subfragment': 'V4', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status': 'EMP', 'str_column': 'Value for sample 1', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'GTCCGCAAGTTA', 'run_prefix': "s_G1_L001_sequences", 'platform': 'Illumina', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB' } }, orient='index', dtype=str) pt = PrepTemplate.create(metadata, Study(1), '16S') # Check that it returns an empty dictionary when there are no jobs # attached to the prep template self.assertEqual(prep_template_jobs_get_req(pt.id, '*****@*****.**'), {}) # Create a job on the template prep_template_patch_req('*****@*****.**', 'remove', '/%s/10/columns/target_subfragment/' % pt.id) # To ensure a deterministic result, wait until the job is completed self._wait_for_parallel_job('prep_template_%s' % pt.id) obs = prep_template_jobs_get_req(pt.id, '*****@*****.**') self.assertEqual(len(obs), 1) self.assertCountEqual(obs.values(), [{ 'error': '', 'status': 'success', 'step': None }]) obs = prep_template_jobs_get_req(pt.id, '*****@*****.**') exp = { 'status': 'error', 'message': 'User does not have access to study' } self.assertEqual(obs, exp)
def get(self): study_id = int(self.get_argument('id')) study = Study(study_id) check_access(self.current_user, study, no_public=True, raise_error=True) selected = self.get_argument('selected', None) deselected = self.get_argument('deselected', None) if selected is not None: yield Task(self._share, study, selected) if deselected is not None: yield Task(self._unshare, study, deselected) users, links = yield Task(self._get_shared_for_study, study) self.write(dumps({'users': users, 'links': links}))
def post(self, study_id): method = self.get_argument('remote-request-type') url = self.get_argument('inputURL') ssh_key = self.request.files['ssh-key'][0]['body'] status = 'success' message = '' try: study = Study(int(study_id)) except QiitaDBUnknownIDError: raise HTTPError(404, reason="Study %s does not exist" % study_id) check_access( self.current_user, study, no_public=True, raise_error=True) _, upload_folder = get_mountpoint("uploads")[0] upload_folder = join(upload_folder, study_id) ssh_key_fp = join(upload_folder, '.key.txt') if not isdir(upload_folder): makedirs(upload_folder) with open(ssh_key_fp, 'w') as f: f.write(ssh_key) qiita_plugin = Software.from_name_and_version('Qiita', 'alpha') if method == 'list': cmd = qiita_plugin.get_command('list_remote_files') params = Parameters.load(cmd, values_dict={ 'url': url, 'private_key': ssh_key_fp, 'study_id': study_id}) elif method == 'transfer': cmd = qiita_plugin.get_command('download_remote_files') params = Parameters.load(cmd, values_dict={ 'url': url, 'private_key': ssh_key_fp, 'destination': upload_folder}) else: status = 'error' message = 'Not a valid method' if status == 'success': job = ProcessingJob.create(self.current_user, params, True) job.submit() r_client.set( UPLOAD_STUDY_FORMAT % study_id, dumps({'job_id': job.id})) self.write({'status': status, 'message': message})
def get(self, arguments): study_id = int(self.get_argument('study_id')) # Get the arguments prep_template = self.get_argument('prep_template', None) sample_template = self.get_argument('sample_template', None) if prep_template and sample_template: raise HTTPError( 500, "You should provide either a sample template " "or a prep template, but not both") elif prep_template: # The prep template has been provided template = self._get_template(PrepTemplate, prep_template) back_button_path = ( "/study/description/%s?top_tab=raw_data_tab&sub_tab=%s" "&prep_tab=%s" % (study_id, template.raw_data, template.id)) elif sample_template: # The sample template has been provided template = self._get_template(SampleTemplate, sample_template) back_button_path = ("/study/description/%s" % study_id) else: # Neither a sample template or a prep template has been provided # Fail nicely raise HTTPError( 500, "You should provide either a sample template " "or a prep template") study = Study(template.study_id) # check whether or not the user has access to the requested information if not study.has_access(self.current_user): raise HTTPError( 403, "You do not have access to access this " "information.") df = dataframe_from_template(template) num_samples = df.shape[0] stats = stats_from_df(df) self.render('metadata_summary.html', study_title=study.title, stats=stats, num_samples=num_samples, back_button_path=back_button_path)