def get(self): data = self.get_argument("data", None) study_id = self.get_argument("study_id", None) data_type = self.get_argument("data_type", None) dtypes = get_data_types().keys() if data is None or study_id is None or data not in ('raw', 'biom'): raise HTTPError(422, reason='You need to specify both data (the ' 'data type you want to download - raw/biom) and ' 'study_id') elif data_type is not None and data_type not in dtypes: raise HTTPError(422, reason='Not a valid data_type. Valid types ' 'are: %s' % ', '.join(dtypes)) else: study_id = int(study_id) try: study = Study(study_id) except QiitaDBUnknownIDError: raise HTTPError(422, reason='Study does not exist') else: public_raw_download = study.public_raw_download if study.status != 'public': raise HTTPError(422, reason='Study is not public. If this ' 'is a mistake contact: ' '*****@*****.**') elif data == 'raw' and not public_raw_download: raise HTTPError(422, reason='No raw data access. If this ' 'is a mistake contact: ' '*****@*****.**') else: to_download = [] for a in study.artifacts( dtype=data_type, artifact_type='BIOM' if data == 'biom' else None): if a.visibility != 'public': continue to_download.extend(self._list_artifact_files_nginx(a)) if not to_download: raise HTTPError(422, reason='Nothing to download. If ' 'this is a mistake contact: ' '*****@*****.**') else: self._write_nginx_file_list(to_download) zip_fn = 'study_%d_%s_%s.zip' % ( study_id, data, datetime.now().strftime('%m%d%y-%H%M%S')) self._set_nginx_headers(zip_fn) self.finish()
def test_download_raw_data(self): # it's possible that one of the tests is deleting the raw data # so we will make sure that the files exists so this test passes study = Study(1) all_files = [x['fp'] for a in study.artifacts() for x in a.filepaths] for fp in all_files: if not exists(fp): with open(fp, 'w') as f: f.write('') response = self.get('/download_raw_data/1') self.assertEqual(response.code, 200) exp = ( '2125826711 58 /protected/raw_data/1_s_G1_L001_sequences.fastq.gz ' 'raw_data/1_s_G1_L001_sequences.fastq.gz\n' '2125826711 58 /protected/raw_data/' '1_s_G1_L001_sequences_barcodes.fastq.gz ' 'raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz\n' '- [0-9]* /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.txt ' 'mapping_files/1_mapping_file.txt\n' '1756512010 1093210 /protected/BIOM/7/biom_table.biom ' 'BIOM/7/biom_table.biom\n' '- [0-9]* /protected/templates/1_prep_2_qiime_[0-9]*-[0-9]*.txt ' 'mapping_files/7_mapping_file.txt\n') self.assertRegex(response.body.decode('ascii'), exp) response = self.get('/download_study_bioms/200') self.assertEqual(response.code, 405) # changing user so we can test the failures BaseHandler.get_current_user = Mock( return_value=User("*****@*****.**")) response = self.get('/download_study_bioms/1') self.assertEqual(response.code, 405) # now, let's make sure that when artifacts are public AND the # public_raw_download any user can download the files study.public_raw_download = True BaseHandler.get_current_user = Mock( return_value=User("*****@*****.**")) response = self.get('/download_study_bioms/1') self.assertEqual(response.code, 405) # 7 is an uploaded biom, which should now be available but as it's a # biom, only the prep info file will be retrieved Artifact(7).visibility = 'public' BaseHandler.get_current_user = Mock( return_value=User("*****@*****.**")) response = self.get('/download_study_bioms/1') self.assertEqual(response.code, 200) exp = ( '- [0-9]* /protected/templates/1_prep_2_qiime_[0-9]*-[0-9]*.txt ' 'mapping_files/7_mapping_file.txt\n') self.assertRegex(response.body.decode('ascii'), exp)
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type): """Returns the uploaded files for the study id categorized by artifact_type It retrieves the files uploaded for the given study and tries to do a guess on how those files should be added to the artifact of the given type. Uses information on the prep template to try to do a better guess. Parameters ---------- user_id : str The id of the user making the request study_id : int The study id prep_template_id : int The prep template id artifact_type : str The artifact type Returns ------- dict of {str: object} A dict of the form {'status': str, 'message': str, 'remaining': list of str, 'file_types': list of (str, bool, list of str), 'num_prefixes': int} where 'status' is a string specifying whether the query is successfull, 'message' is a human-readable description of the error (optional), 'remaining' is the list of files that could not be categorized, 'file_types' is a list of the available filetypes, if it is required or not and the list of categorized files for the given artifact type and 'num_prefixes' is the number of different run prefix values in the given prep template. """ supp_file_types = supported_filepath_types(artifact_type) selected = [] remaining = [] uploaded = get_files_from_uploads_folders(study_id) pt = PrepTemplate(prep_template_id).to_dataframe() if (any(ft.startswith('raw_') for ft, _ in supp_file_types) and 'run_prefix' in pt.columns): prep_prefixes = tuple(set(pt['run_prefix'])) num_prefixes = len(prep_prefixes) for _, filename in uploaded: if filename.startswith(prep_prefixes): selected.append(filename) else: remaining.append(filename) else: num_prefixes = 0 remaining = [f for _, f in uploaded] # At this point we can't do anything smart about selecting by default # the files for each type. The only thing that we can do is assume that # the first in the supp_file_types list is the default one where files # should be added in case of 'run_prefix' being present file_types = [(fp_type, req, []) for fp_type, req in supp_file_types[1:]] first = supp_file_types[0] # Note that this works even if `run_prefix` is not in the prep template # because selected is initialized to the empty list file_types.insert(0, (first[0], first[1], selected)) # Create a list of artifacts that the user has access to, in case that # he wants to import the files from another artifact user = User(user_id) artifact_options = [] user_artifacts = user.user_artifacts(artifact_type=artifact_type) study = Study(study_id) if study not in user_artifacts: user_artifacts[study] = study.artifacts(artifact_type=artifact_type) for study, artifacts in viewitems(user_artifacts): study_label = "%s (%d)" % (study.title, study.id) for a in artifacts: artifact_options.append( (a.id, "%s - %s (%d)" % (study_label, a.name, a.id))) return { 'status': 'success', 'message': '', 'remaining': remaining, 'file_types': file_types, 'num_prefixes': num_prefixes, 'artifacts': artifact_options }
def get(self): data = self.get_argument("data", None) study_id = self.get_argument("study_id", None) prep_id = self.get_argument("prep_id", None) data_type = self.get_argument("data_type", None) dtypes = get_data_types().keys() templates = ['sample_information', 'prep_information'] valid_data = ['raw', 'biom'] + templates to_download = [] if data is None or (study_id is None and prep_id is None) or \ data not in valid_data: raise HTTPError(422, reason='You need to specify both data (the ' 'data type you want to download - %s) and ' 'study_id or prep_id' % '/'.join(valid_data)) elif data_type is not None and data_type not in dtypes: raise HTTPError(422, reason='Not a valid data_type. Valid types ' 'are: %s' % ', '.join(dtypes)) elif data in templates and prep_id is None and study_id is None: raise HTTPError(422, reason='If downloading a sample or ' 'preparation file you need to define study_id or' ' prep_id') elif data in templates: if data_type is not None: raise HTTPError(422, reason='If requesting an information ' 'file you cannot specify the data_type') elif prep_id is not None and data == 'prep_information': fname = 'preparation_information_%s' % prep_id prep_id = int(prep_id) try: infofile = PrepTemplate(prep_id) except QiitaDBUnknownIDError: raise HTTPError( 422, reason='Preparation information does not exist') elif study_id is not None and data == 'sample_information': fname = 'sample_information_%s' % study_id study_id = int(study_id) try: infofile = SampleTemplate(study_id) except QiitaDBUnknownIDError: raise HTTPError( 422, reason='Sample information does not exist') else: raise HTTPError(422, reason='Review your parameters, not a ' 'valid combination') x = retrieve_filepaths( infofile._filepath_table, infofile._id_column, infofile.id, sort='descending')[0] basedir_len = len(get_db_files_base_dir()) + 1 fp = x['fp'][basedir_len:] to_download.append((fp, fp, str(x['checksum']), str(x['fp_size']))) self._write_nginx_file_list(to_download) zip_fn = '%s_%s.zip' % ( fname, datetime.now().strftime('%m%d%y-%H%M%S')) self._set_nginx_headers(zip_fn) else: study_id = int(study_id) try: study = Study(study_id) except QiitaDBUnknownIDError: raise HTTPError(422, reason='Study does not exist') else: public_raw_download = study.public_raw_download if study.status != 'public': raise HTTPError(404, reason='Study is not public. If this ' 'is a mistake contact: ' '*****@*****.**') elif data == 'raw' and not public_raw_download: raise HTTPError(422, reason='No raw data access. If this ' 'is a mistake contact: ' '*****@*****.**') else: # raw data artifacts = [a for a in study.artifacts(dtype=data_type) if not a.parents] # bioms if data == 'biom': artifacts = study.artifacts( dtype=data_type, artifact_type='BIOM') for a in artifacts: if a.visibility != 'public': continue to_download.extend(self._list_artifact_files_nginx(a)) if not to_download: raise HTTPError(422, reason='Nothing to download. If ' 'this is a mistake contact: ' '*****@*****.**') else: self._write_nginx_file_list(to_download) zip_fn = 'study_%d_%s_%s.zip' % ( study_id, data, datetime.now().strftime( '%m%d%y-%H%M%S')) self._set_nginx_headers(zip_fn) self.finish()
def _build_study_info(user, study_proc=None, proc_samples=None): """Builds list of dicts for studies table, with all HTML formatted Parameters ---------- user : User object logged in user study_proc : dict of lists, optional Dictionary keyed on study_id that lists all processed data associated with that study. Required if proc_samples given. proc_samples : dict of lists, optional Dictionary keyed on proc_data_id that lists all samples associated with that processed data. Required if study_proc given. Returns ------- infolist: list of dict of lists and dicts study and processed data info for JSON serialiation for datatables Each dict in the list is a single study, and contains the text Notes ----- Both study_proc and proc_samples must be passed, or neither passed. """ build_samples = False # Logic check to make sure both needed parts passed if study_proc is not None and proc_samples is None: raise IncompetentQiitaDeveloperError( 'Must pass proc_samples when study_proc given') elif proc_samples is not None and study_proc is None: raise IncompetentQiitaDeveloperError( 'Must pass study_proc when proc_samples given') elif study_proc is None: build_samples = True # get list of studies for table study_set = user.user_studies.union( Study.get_by_status('public')).union(user.shared_studies) if study_proc is not None: study_set = study_set.intersection(study_proc) if not study_set: # No studies left so no need to continue return [] cols = ['study_id', 'email', 'principal_investigator_id', 'publication_doi', 'study_title', 'metadata_complete', 'number_samples_collected', 'study_abstract'] study_info = Study.get_info([s.id for s in study_set], cols) # get info for the studies infolist = [] for info in study_info: # Convert DictCursor to proper dict info = dict(info) study = Study(info['study_id']) # Build the processed data info for the study if none passed if build_samples: proc_data_list = [ar for ar in study.artifacts() if ar.artifact_type == 'BIOM'] proc_samples = {} study_proc = {study.id: defaultdict(list)} for proc_data in proc_data_list: study_proc[study.id][proc_data.data_type].append(proc_data.id) # there is only one prep template for each processed data proc_samples[proc_data.id] = proc_data.prep_templates[0].keys() study_info = _build_single_study_info(study, info, study_proc, proc_samples) infolist.append(study_info) return infolist
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type): """Returns the uploaded files for the study id categorized by artifact_type It retrieves the files uploaded for the given study and tries to do a guess on how those files should be added to the artifact of the given type. Uses information on the prep template to try to do a better guess. Parameters ---------- user_id : str The id of the user making the request study_id : int The study id prep_template_id : int The prep template id artifact_type : str The artifact type Returns ------- dict of {str: object} A dict of the form {'status': str, 'message': str, 'remaining': list of str, 'file_types': list of (str, bool, list of str), 'num_prefixes': int} where 'status' is a string specifying whether the query is successfull, 'message' is a human-readable description of the error (optional), 'remaining' is the list of files that could not be categorized, 'file_types' is a list of the available filetypes, if it is required or not and the list of categorized files for the given artifact type and 'num_prefixes' is the number of different run prefix values in the given prep template. """ supp_file_types = supported_filepath_types(artifact_type) selected = [] remaining = [] uploaded = get_files_from_uploads_folders(study_id) pt = PrepTemplate(prep_template_id).to_dataframe() ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types if ft != 'raw_sff') if any(ftypes_if) and 'run_prefix' in pt.columns: prep_prefixes = tuple(set(pt['run_prefix'])) num_prefixes = len(prep_prefixes) for _, filename in uploaded: if filename.startswith(prep_prefixes): selected.append(filename) else: remaining.append(filename) else: num_prefixes = 0 remaining = [f for _, f in uploaded] # At this point we can't do anything smart about selecting by default # the files for each type. The only thing that we can do is assume that # the first in the supp_file_types list is the default one where files # should be added in case of 'run_prefix' being present file_types = [(fp_type, req, []) for fp_type, req in supp_file_types[1:]] first = supp_file_types[0] # Note that this works even if `run_prefix` is not in the prep template # because selected is initialized to the empty list file_types.insert(0, (first[0], first[1], selected)) # Create a list of artifacts that the user has access to, in case that # he wants to import the files from another artifact user = User(user_id) artifact_options = [] user_artifacts = user.user_artifacts(artifact_type=artifact_type) study = Study(study_id) if study not in user_artifacts: user_artifacts[study] = study.artifacts(artifact_type=artifact_type) for study, artifacts in viewitems(user_artifacts): study_label = "%s (%d)" % (study.title, study.id) for a in artifacts: artifact_options.append( (a.id, "%s - %s (%d)" % (study_label, a.name, a.id))) return {'status': 'success', 'message': '', 'remaining': sorted(remaining), 'file_types': file_types, 'num_prefixes': num_prefixes, 'artifacts': artifact_options}
def study_get_req(study_id, user_id): """Returns information available for the given study Parameters ---------- study_id : int Study id to get prep template info for user_id : str User requesting the info Returns ------- dict Data types information in the form {'status': status, 'message': message, 'info': dict of objects status can be success, warning, or error depending on result message has the warnings or errors info contains study information seperated by data type, in the form {col_name: value, ...} with value being a string, int, or list of strings or ints """ access_error = check_access(study_id, user_id) if access_error: return access_error # Can only pass ids over API, so need to instantiate object study = Study(study_id) study_info = study.info # Add needed info that is not part of the initial info pull study_info['publication_doi'] = [] study_info['publication_pid'] = [] for pub, is_doi in study.publications: if is_doi: study_info['publication_doi'].append(pub) else: study_info['publication_pid'].append(pub) study_info['study_id'] = study.id study_info['study_title'] = study.title study_info['shared_with'] = [s.id for s in study.shared_with] study_info['status'] = study.status study_info['ebi_study_accession'] = study.ebi_study_accession study_info['ebi_submission_status'] = study.ebi_submission_status study_info['public_raw_download'] = study.public_raw_download study_info['notes'] = study.notes # Clean up StudyPerson objects to string for display pi = study_info['principal_investigator'] study_info['principal_investigator'] = { 'name': pi.name, 'email': pi.email, 'affiliation': pi.affiliation } lab_person = study_info['lab_person'] if lab_person: study_info['lab_person'] = { 'name': lab_person.name, 'email': lab_person.email, 'affiliation': lab_person.affiliation } samples = study.sample_template study_info['num_samples'] = 0 if samples is None else len(list(samples)) study_info['owner'] = study.owner.id # Study.has_access no_public=True, will return True only if the user_id is # the owner of the study or if the study is shared with the user_id; this # with study.public_raw_download will define has_access_to_raw_data study_info['has_access_to_raw_data'] = study.has_access( User(user_id), True) or study.public_raw_download study_info['show_biom_download_button'] = 'BIOM' in [ a.artifact_type for a in study.artifacts() ] study_info['show_raw_download_button'] = any( [True for pt in study.prep_templates() if pt.artifact is not None]) # getting study processing status from redis processing = False study_info['level'] = '' study_info['message'] = '' job_info = r_client.get(STUDY_KEY_FORMAT % study_id) if job_info: job_info = defaultdict(lambda: '', loads(job_info)) job_id = job_info['job_id'] job = ProcessingJob(job_id) job_status = job.status processing = job_status not in ('success', 'error') if processing: study_info['level'] = 'info' study_info['message'] = 'This study is currently being processed' elif job_status == 'error': study_info['level'] = 'danger' study_info['message'] = job.log.msg.replace('\n', '</br>') else: study_info['level'] = job_info['alert_type'] study_info['message'] = job_info['alert_msg'].replace( '\n', '</br>') return { 'status': 'success', 'message': '', 'study_info': study_info, 'editable': study.can_edit(User(user_id)) }
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type): """Returns the uploaded files for the study id categorized by artifact_type It retrieves the files uploaded for the given study and tries to guess on how those files should be added to the artifact of the given type. Uses information on the prep template to try to do a better guess. Parameters ---------- user_id : str The id of the user making the request study_id : int The study id prep_template_id : int The prep template id artifact_type : str The artifact type Returns ------- dict of {str: object} A dict of the form {'status': str, 'message': str, 'remaining': list of str, 'file_types': list of (str, bool, list of str), 'num_prefixes': int} where 'status' is a string specifying whether the query is successfull, 'message' is a human-readable description of the error (optional), 'remaining' is the list of files that could not be categorized, 'file_types' is a list of the available filetypes, if it is required or not and the list of categorized files for the given artifact type and 'num_prefixes' is the number of different run prefix values in the given prep template. """ supp_file_types = supported_filepath_types(artifact_type) selected = [] remaining = [] message = [] pt = PrepTemplate(prep_template_id) if pt.study_id != study_id: raise IncompetentQiitaDeveloperError( "The requested prep id (%d) doesn't belong to the study " "(%d)" % (pt.study_id, study_id)) uploaded = get_files_from_uploads_folders(study_id) pt = pt.to_dataframe() ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types if ft != 'raw_sff') if any(ftypes_if) and 'run_prefix' in pt.columns: prep_prefixes = tuple(set(pt['run_prefix'])) num_prefixes = len(prep_prefixes) # sorting prefixes by length to avoid collisions like: 100 1002 # 10003 prep_prefixes = sorted(prep_prefixes, key=len, reverse=True) # group files by prefix sfiles = defaultdict(list) for p in prep_prefixes: to_remove = [] for fid, f, _ in uploaded: if f.startswith(p): sfiles[p].append(f) to_remove.append((fid, f)) uploaded = [x for x in uploaded if x not in to_remove] inuse = [y for x in sfiles.values() for y in x] remaining.extend([f for _, f, _ in uploaded if f not in inuse]) supp_file_types_len = len(supp_file_types) for k, v in sfiles.items(): len_files = len(v) # if the number of files in the k group is larger than the # available columns add to the remaining group, if not put them in # the selected group if len_files > supp_file_types_len: remaining.extend(v) message.append("'%s' has %d matches." % (k, len_files)) else: v.sort() selected.append(v) else: num_prefixes = 0 remaining = [f for _, f, _ in uploaded] # get file_types, format: filetype, required, list of files file_types = [(t, req, [x[i] for x in selected if i + 1 <= len(x)]) for i, (t, req) in enumerate(supp_file_types)] # Create a list of artifacts that the user has access to, in case that # he wants to import the files from another artifact user = User(user_id) artifact_options = [] user_artifacts = user.user_artifacts(artifact_type=artifact_type) study = Study(study_id) if study not in user_artifacts: user_artifacts[study] = study.artifacts(artifact_type=artifact_type) for study, artifacts in user_artifacts.items(): study_label = "%s (%d)" % (study.title, study.id) for a in artifacts: artifact_options.append( (a.id, "%s - %s (%d)" % (study_label, a.name, a.id))) message = ('' if not message else '\n'.join(['Check these run_prefix:'] + message)) return { 'status': 'success', 'message': message, 'remaining': sorted(remaining), 'file_types': file_types, 'num_prefixes': num_prefixes, 'artifacts': artifact_options }
def study_get_req(study_id, user_id): """Returns information available for the given study Parameters ---------- study_id : int Study id to get prep template info for user_id : str User requesting the info Returns ------- dict Data types information in the form {'status': status, 'message': message, 'info': dict of objects status can be success, warning, or error depending on result message has the warnings or errors info contains study information seperated by data type, in the form {col_name: value, ...} with value being a string, int, or list of strings or ints """ access_error = check_access(study_id, user_id) if access_error: return access_error # Can only pass ids over API, so need to instantiate object study = Study(study_id) study_info = study.info # Add needed info that is not part of the initial info pull study_info['publication_doi'] = [] study_info['publication_pid'] = [] for pub, is_doi in study.publications: if is_doi: study_info['publication_doi'].append(pub) else: study_info['publication_pid'].append(pub) study_info['study_id'] = study.id study_info['study_title'] = study.title study_info['shared_with'] = [s.id for s in study.shared_with] study_info['status'] = study.status study_info['ebi_study_accession'] = study.ebi_study_accession study_info['ebi_submission_status'] = study.ebi_submission_status # Clean up StudyPerson objects to string for display pi = study_info['principal_investigator'] study_info['principal_investigator'] = { 'name': pi.name, 'email': pi.email, 'affiliation': pi.affiliation} lab_person = study_info['lab_person'] if lab_person: study_info['lab_person'] = { 'name': lab_person.name, 'email': lab_person.email, 'affiliation': lab_person.affiliation} samples = study.sample_template study_info['num_samples'] = 0 if samples is None else len(list(samples)) study_info['owner'] = study.owner.id # Study.has_access no_public=True, will return True only if the user_id is # the owner of the study or if the study is shared with the user_id study_info['has_access_to_raw_data'] = study.has_access( User(user_id), True) study_info['show_biom_download_button'] = 'BIOM' in [ a.artifact_type for a in study.artifacts()] study_info['show_raw_download_button'] = any([ True for pt in study.prep_templates() if pt.artifact is not None]) # getting study processing status from redis processing = False study_info['level'] = '' study_info['message'] = '' job_info = r_client.get(STUDY_KEY_FORMAT % study_id) if job_info: job_info = defaultdict(lambda: '', loads(job_info)) job_id = job_info['job_id'] job = ProcessingJob(job_id) job_status = job.status processing = job_status not in ('success', 'error') if processing: study_info['level'] = 'info' study_info['message'] = 'This study is currently being processed' elif job_status == 'error': study_info['level'] = 'danger' study_info['message'] = job.log.msg.replace('\n', '</br>') else: study_info['level'] = job_info['alert_type'] study_info['message'] = job_info['alert_msg'].replace( '\n', '</br>') return {'status': 'success', 'message': '', 'study_info': study_info, 'editable': study.can_edit(User(user_id))}
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type): """Returns the uploaded files for the study id categorized by artifact_type It retrieves the files uploaded for the given study and tries to guess on how those files should be added to the artifact of the given type. Uses information on the prep template to try to do a better guess. Parameters ---------- user_id : str The id of the user making the request study_id : int The study id prep_template_id : int The prep template id artifact_type : str The artifact type Returns ------- dict of {str: object} A dict of the form {'status': str, 'message': str, 'remaining': list of str, 'file_types': list of (str, bool, list of str), 'num_prefixes': int} where 'status' is a string specifying whether the query is successfull, 'message' is a human-readable description of the error (optional), 'remaining' is the list of files that could not be categorized, 'file_types' is a list of the available filetypes, if it is required or not and the list of categorized files for the given artifact type and 'num_prefixes' is the number of different run prefix values in the given prep template. """ supp_file_types = supported_filepath_types(artifact_type) selected = [] remaining = [] message = [] pt = PrepTemplate(prep_template_id) if pt.study_id != study_id: raise IncompetentQiitaDeveloperError( "The requested prep id (%d) doesn't belong to the study " "(%d)" % (pt.study_id, study_id)) uploaded = get_files_from_uploads_folders(study_id) pt = pt.to_dataframe() ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types if ft != 'raw_sff') if any(ftypes_if) and 'run_prefix' in pt.columns: prep_prefixes = tuple(set(pt['run_prefix'])) num_prefixes = len(prep_prefixes) # sorting prefixes by length to avoid collisions like: 100 1002 # 10003 prep_prefixes = sorted(prep_prefixes, key=len, reverse=True) # group files by prefix sfiles = defaultdict(list) for p in prep_prefixes: to_remove = [] for fid, f in uploaded: if f.startswith(p): sfiles[p].append(f) to_remove.append((fid, f)) uploaded = [x for x in uploaded if x not in to_remove] inuse = [y for x in sfiles.values() for y in x] remaining.extend([f for _, f in uploaded if f not in inuse]) supp_file_types_len = len(supp_file_types) for k, v in viewitems(sfiles): len_files = len(v) # if the number of files in the k group is larger than the # available columns add to the remaining group, if not put them in # the selected group if len_files > supp_file_types_len: remaining.extend(v) message.append("'%s' has %d matches." % (k, len_files)) else: v.sort() selected.append(v) else: num_prefixes = 0 remaining = [f for _, f in uploaded] # get file_types, format: filetype, required, list of files file_types = [(t, req, [x[i] for x in selected if i+1 <= len(x)]) for i, (t, req) in enumerate(supp_file_types)] # Create a list of artifacts that the user has access to, in case that # he wants to import the files from another artifact user = User(user_id) artifact_options = [] user_artifacts = user.user_artifacts(artifact_type=artifact_type) study = Study(study_id) if study not in user_artifacts: user_artifacts[study] = study.artifacts(artifact_type=artifact_type) for study, artifacts in viewitems(user_artifacts): study_label = "%s (%d)" % (study.title, study.id) for a in artifacts: artifact_options.append( (a.id, "%s - %s (%d)" % (study_label, a.name, a.id))) message = ('' if not message else '\n'.join(['Check these run_prefix:'] + message)) return {'status': 'success', 'message': message, 'remaining': sorted(remaining), 'file_types': file_types, 'num_prefixes': num_prefixes, 'artifacts': artifact_options}
<<<<<<< HEAD cols = ['study_id', 'email', 'principal_investigator_id', 'publication_doi', 'study_title', 'metadata_complete', 'number_samples_collected', 'study_abstract'] study_info = Study.get_info([s.id for s in study_set], cols) # get info for the studies infolist = [] for info in study_info: # Convert DictCursor to proper dict info = dict(info) study = Study(info['study_id']) # Build the processed data info for the study if none passed if build_samples: proc_data_list = [ar for ar in study.artifacts() if ar.artifact_type == 'BIOM'] proc_samples = {} study_proc = {study.id: defaultdict(list)} for proc_data in proc_data_list: study_proc[study.id][proc_data.data_type].append(proc_data.id) # there is only one prep template for each processed data proc_samples[proc_data.id] = proc_data.prep_templates[0].keys() study_info = _build_single_study_info(study, info, study_proc, proc_samples) infolist.append(study_info) return infolist ======= return generate_study_list([s.id for s in study_set],
def get(self): study_id = self.get_argument("study_id", None) artifact_id = self.get_argument("artifact_id", None) if study_id is None and artifact_id is None: raise HTTPError( 422, reason='You need to specify study_id or artifact_id') self.finish() elif study_id is not None: try: study = Study(int(study_id)) except QiitaDBUnknownIDError: raise HTTPError(422, reason="Study %s doesn't exist" % study_id) self.finish() artifact_ids = [ a.id for a in study.artifacts() if a.visibility == 'public' ] else: try: artifact = Artifact(int(artifact_id)) except QiitaDBUnknownIDError: raise HTTPError(422, reason="Artifact %s doesn't exist" % artifact_id) self.finish() if artifact.visibility != 'public': raise HTTPError(422, reason="Artifact %s is not public" % artifact_id) self.finish() study = artifact.study if study is None: raise HTTPError(422, reason="Artifact %s doesn't belong to " "a study" % artifact_id) self.finish() artifact_ids = [artifact.id] if study.status != 'public': raise HTTPError(422, reason='Not a public study') self.finish() study_info = study.info study_info['study_id'] = study.id study_info['study_title'] = study.title study_info['shared_with'] = [s.id for s in study.shared_with] study_info['status'] = study.status study_info['ebi_study_accession'] = study.ebi_study_accession study_info['ebi_submission_status'] = study.ebi_submission_status # Clean up StudyPerson objects to string for display email = '<a href="mailto:{email}">{name} ({affiliation})</a>' pi = study.info['principal_investigator'] study_info['principal_investigator'] = email.format( **{ 'name': pi.name, 'email': pi.email, 'affiliation': pi.affiliation }) study_info['owner'] = study.owner.id # Add needed info that is not part of the initial info pull study_info['publications'] = [] for pub, is_doi in study.publications: if is_doi: study_info['publications'].append(pubmed_linkifier([pub])) else: study_info['publications'].append(doi_linkifier([pub])) study_info['publications'] = ', '.join(study_info['publications']) if study_info['ebi_study_accession']: links = ''.join([ EBI_LINKIFIER.format(a) for a in study_info['ebi_study_accession'].split(',') ]) study_info['ebi_study_accession'] = '%s (%s)' % ( links, study_info['ebi_submission_status']) self.render("public.html", study_info=study_info, artifacts_info=get_artifacts_information( artifact_ids, False))
def study_get_req(study_id, user_id): """Returns information available for the given study Parameters ---------- study_id : int Study id to get prep template info for user_id : str User requesting the info Returns ------- dict Data types information in the form {'status': status, 'message': message, 'info': dict of objects status can be success, warning, or error depending on result message has the warnings or errors info contains study information seperated by data type, in the form {col_name: value, ...} with value being a string, int, or list of strings or ints """ access_error = check_access(study_id, user_id) if access_error: return access_error # Can only pass ids over API, so need to instantiate object study = Study(study_id) study_info = study.info # Add needed info that is not part of the initial info pull study_info['publication_doi'] = [] study_info['publication_pid'] = [] for pub, is_doi in study.publications: if is_doi: study_info['publication_doi'].append(pub) else: study_info['publication_pid'].append(pub) study_info['study_id'] = study.id study_info['study_title'] = study.title study_info['shared_with'] = [s.id for s in study.shared_with] study_info['status'] = study.status study_info['ebi_study_accession'] = study.ebi_study_accession study_info['ebi_submission_status'] = study.ebi_submission_status # Clean up StudyPerson objects to string for display pi = study_info['principal_investigator'] study_info['principal_investigator'] = { 'name': pi.name, 'email': pi.email, 'affiliation': pi.affiliation} lab_person = study_info['lab_person'] if lab_person: study_info['lab_person'] = { 'name': lab_person.name, 'email': lab_person.email, 'affiliation': lab_person.affiliation} samples = study.sample_template study_info['num_samples'] = 0 if samples is None else len(list(samples)) study_info['owner'] = study.owner.id # Study.has_access no_public=True, will return True only if the user_id is # the owner of the study or if the study is shared with the user_id study_info['has_access_to_raw_data'] = study.has_access( User(user_id), True) study_info['show_biom_download_button'] = 'BIOM' in [ a.artifact_type for a in study.artifacts()] study_info['show_raw_download_button'] = any([ True for pt in study.prep_templates() if pt.artifact is not None]) return {'status': 'success', 'message': '', 'study_info': study_info, 'editable': study.can_edit(User(user_id))}
def get(self, study_id): study_id = int(study_id) # Check access to study study_info = study_get_req(study_id, self.current_user.id) if study_info['status'] != 'success': raise HTTPError( 405, "%s: %s, %s" % (study_info['message'], self.current_user.email, str(study_id))) study = Study(study_id) basedir = get_db_files_base_dir() basedir_len = len(basedir) + 1 # loop over artifacts and retrieve those that we have access to to_download = [] for a in study.artifacts(): if a.artifact_type == 'BIOM': for i, (fid, path, data_type) in enumerate(a.filepaths): # ignore if tgz as they could create problems and the # raw data is in the folder if data_type == 'tgz': continue if data_type == 'directory': # If we have a directory, we actually need to list # all the files from the directory so NGINX can # actually download all of them for dp, _, fps in walk(path): for fname in fps: fullpath = join(dp, fname) spath = fullpath if fullpath.startswith(basedir): spath = fullpath[basedir_len:] to_download.append((fullpath, spath, spath)) elif path.startswith(basedir): spath = path[basedir_len:] to_download.append((path, spath, spath)) else: # We are not aware of any case that can trigger this # situation, but we wanted to be overly cautious # There is no test for this line cause we don't know # how to trigger it to_download.append((path, path, path)) for pt in a.prep_templates: qmf = pt.qiime_map_fp if qmf is not None: sqmf = qmf if qmf.startswith(basedir): sqmf = qmf[basedir_len:] to_download.append( (qmf, sqmf, 'mapping_files/%s_mapping_file.txt' % a.id)) # If we don't have nginx, write a file that indicates this all_files = '\n'.join([ "- %s /protected/%s %s" % (getsize(fp), sfp, n) for fp, sfp, n in to_download ]) self.write("%s\n" % all_files) zip_fn = 'study_%d_%s.zip' % (study_id, datetime.now().strftime('%m%d%y-%H%M%S')) self.set_header('Content-Description', 'File Transfer') self.set_header('Expires', '0') self.set_header('Cache-Control', 'no-cache') self.set_header('X-Archive-Files', 'zip') self.set_header('Content-Disposition', 'attachment; filename=%s' % zip_fn) self.finish()