def test_move_upload_files_to_trash(self): test_filename = 'this_is_a_test_file.txt' # create file to move to trash fid, folder = get_mountpoint("uploads")[0] open(join(folder, '1', test_filename), 'w').write('test') exp = [(fid, 'this_is_a_test_file.txt'), (fid, 'uploaded_file.txt')] obs = get_files_from_uploads_folders("1") self.assertItemsEqual(obs, exp) # move file move_upload_files_to_trash(1, [(fid, test_filename)]) exp = [(fid, 'uploaded_file.txt')] obs = get_files_from_uploads_folders("1") self.assertItemsEqual(obs, exp) # testing errors with self.assertRaises(QiitaDBError): move_upload_files_to_trash(2, [(fid, test_filename)]) with self.assertRaises(QiitaDBError): move_upload_files_to_trash(1, [(10, test_filename)]) with self.assertRaises(QiitaDBError): move_upload_files_to_trash(1, [(fid, test_filename)]) # removing trash folder rmtree(join(folder, '1', 'trash'))
def test_move_upload_files_to_trash(self): test_filename = "this_is_a_test_file.txt" # create file to move to trash fid, folder = get_mountpoint("uploads")[0] test_fp = join(folder, "1", test_filename) with open(test_fp, "w") as f: f.write("test") self.files_to_remove.append(test_fp) exp = [(fid, "this_is_a_test_file.txt"), (fid, "uploaded_file.txt")] obs = get_files_from_uploads_folders("1") self.assertItemsEqual(obs, exp) # move file move_upload_files_to_trash(1, [(fid, test_filename)]) exp = [(fid, "uploaded_file.txt")] obs = get_files_from_uploads_folders("1") self.assertItemsEqual(obs, exp) # testing errors with self.assertRaises(QiitaDBError): move_upload_files_to_trash(2, [(fid, test_filename)]) with self.assertRaises(QiitaDBError): move_upload_files_to_trash(1, [(10, test_filename)]) with self.assertRaises(QiitaDBError): move_upload_files_to_trash(1, [(fid, test_filename)]) # removing trash folder rmtree(join(folder, "1", "trash"))
def test_get_files_from_uploads_folders(self): # something has been uploaded exp = ['uploaded_file.txt'] obs = get_files_from_uploads_folders("1") self.assertEqual(obs, exp) # nothing has been uploaded exp = [] obs = get_files_from_uploads_folders("2") self.assertEqual(obs, exp)
def test_get_files_from_uploads_folders(self): # something has been uploaded and ignoring hidden files/folders # and folders exp = [(7, 'uploaded_file.txt')] obs = get_files_from_uploads_folders("1") self.assertEqual(obs, exp) # nothing has been uploaded exp = [] obs = get_files_from_uploads_folders("2") self.assertEqual(obs, exp)
def new_prep_template_get_req(study_id): """Returns the information needed to populate the new prep info template Parameters ---------- study_id : int The study id Returns ------- (list of str, list of str, dict of {str: list of str}) The list of txt, tsv files in the upload dir for the given study The list of available data types The investigation type ontology information """ prep_files = [f for _, f in get_files_from_uploads_folders(study_id) if f.endswith(('.txt', '.tsv'))] data_types = sorted(Study.all_data_types()) # Get all the ENA terms for the investigation type ontology_info = _get_ENA_ontology() return {'status': 'success', 'prep_files': prep_files, 'data_types': data_types, 'ontology': ontology_info}
def new_prep_template_get_req(study_id): """Returns the information needed to populate the new prep info template Parameters ---------- study_id : int The study id Returns ------- (list of str, list of str, dict of {str: list of str}) The list of txt, tsv files in the upload dir for the given study The list of available data types The investigation type ontology information """ prep_files = [f for _, f, _ in get_files_from_uploads_folders(study_id) if f.endswith(('.txt', '.tsv'))] data_types = sorted(Study.all_data_types()) # Get all the ENA terms for the investigation type ontology_info = _get_ENA_ontology() return {'status': 'success', 'prep_files': prep_files, 'data_types': data_types, 'ontology': ontology_info}
def get(self): """Send formatted summary page of sample template""" study_id = self.get_argument('study_id') files = [ f for _, f in get_files_from_uploads_folders(study_id) if f.endswith(('txt', 'tsv')) ] data_types = sorted(data_types_get_req()['data_types']) # Get the most recent version for download and build the link download = sample_template_filepaths_get_req(study_id, self.current_user.id) download_id = (download['filepaths'][0][0] if download['status'] == 'success' else None) stats = sample_template_summary_get_req(study_id, self.current_user.id) if stats['status'] != 'success': if 'does not exist' in stats['message']: raise HTTPError(404, stats['message']) if 'User does not have access to study' in stats['message']: raise HTTPError(403, stats['message']) stats['download_id'] = download_id stats['files'] = files stats['study_id'] = study_id stats['data_types'] = data_types # URL encode in case message has javascript-breaking characters in it stats['alert_message'] = url_escape(stats['alert_message']) self.render('study_ajax/sample_summary.html', **stats)
def get(self): """Send formatted summary page of sample template""" study_id = self.get_argument('study_id') files = [f for _, f in get_files_from_uploads_folders(study_id) if f.endswith(('txt', 'tsv'))] data_types = sorted(data_types_get_req()['data_types']) # Get the most recent version for download and build the link download = sample_template_filepaths_get_req( study_id, self.current_user.id) download_id = (download['filepaths'][0][0] if download['status'] == 'success' else None) stats = sample_template_summary_get_req(study_id, self.current_user.id) if stats['status'] != 'success': if 'does not exist' in stats['message']: raise HTTPError(404, stats['message']) if 'User does not have access to study' in stats['message']: raise HTTPError(403, stats['message']) stats['download_id'] = download_id stats['files'] = files stats['study_id'] = study_id stats['data_types'] = data_types # URL encode in case message has javascript-breaking characters in it stats['alert_message'] = url_escape(stats['alert_message']) self.render('study_ajax/sample_summary.html', **stats)
def render(self, study, full_access): files = [ f for _, f in get_files_from_uploads_folders(str(study.id)) if f.endswith(('txt', 'tsv')) ] data_types = sorted(viewitems(get_data_types()), key=itemgetter(1)) prep_templates_info = [ res for res in _template_generator(study, full_access) ] # Get all the ENA terms for the investigation type ontology = Ontology(convert_to_id('ENA', 'ontology')) # make "Other" show at the bottom of the drop down menu ena_terms = [] for v in sorted(ontology.terms): if v != 'Other': ena_terms.append('<option value="%s">%s</option>' % (v, v)) ena_terms.append('<option value="Other">Other</option>') # New Type is for users to add a new user-defined investigation type user_defined_terms = ontology.user_defined_terms + ['New Type'] return self.render_string( "study_description_templates/prep_template_tab.html", files=files, data_types=data_types, available_prep_templates=prep_templates_info, ena_terms=ena_terms, user_defined_terms=user_defined_terms, study=study, full_access=full_access)
def render(self, study, full_access): files = [f for _, f in get_files_from_uploads_folders(str(study.id)) if f.endswith(('txt', 'tsv'))] data_types = sorted(viewitems(get_data_types()), key=itemgetter(1)) prep_templates_info = [ res for res in _template_generator(study, full_access)] # Get all the ENA terms for the investigation type ontology = Ontology(convert_to_id('ENA', 'ontology')) # make "Other" show at the bottom of the drop down menu ena_terms = [] for v in sorted(ontology.terms): if v != 'Other': ena_terms.append('<option value="%s">%s</option>' % (v, v)) ena_terms.append('<option value="Other">Other</option>') # New Type is for users to add a new user-defined investigation type user_defined_terms = ontology.user_defined_terms + ['New Type'] return self.render_string( "study_description_templates/prep_template_tab.html", files=files, data_types=data_types, available_prep_templates=prep_templates_info, ena_terms=ena_terms, user_defined_terms=user_defined_terms, study=study, full_access=full_access)
def sample_template_overview_handler_get_request(study_id, user): # Check if the current user has access to the sample template sample_template_checks(study_id, user) # Check if the sample template exists exists = SampleTemplate.exists(study_id) # The following information should always be provided: # The files that have been uploaded to the system and can be a # sample template file files = [ f for _, f in get_files_from_uploads_folders(study_id) if f.endswith(('txt', 'tsv')) ] # If there is a job associated with the sample information, the job id job = None job_info = r_client.get(SAMPLE_TEMPLATE_KEY_FORMAT % study_id) if job_info: job = loads(job_info)['job_id'] # Specific information if it exists or not: data_types = [] st_fp_id = None old_files = [] num_samples = 0 num_cols = 0 if exists: # If it exists we need to provide: # The id of the sample template file so the user can download it and # the list of old filepaths st = SampleTemplate(study_id) all_st_files = st.get_filepaths() # The current sample template file is the first one in the list # (pop(0)) and we are interested only in the id ([0]) st_fp_id = all_st_files.pop(0)[0] # For the old filepaths we are only interested in their basename old_files = [basename(fp) for _, fp in all_st_files] # The number of samples - this is a space efficient way of counting # the number of samples. Doing len(list(st.keys())) creates a list # that we are not using num_samples = sum(1 for _ in st.keys()) # The number of columns num_cols = len(st.categories()) else: # It doesn't exist, we also need to provide the data_types in case # the user uploads a QIIME mapping file data_types = sorted(data_types_get_req()['data_types']) return { 'exists': exists, 'uploaded_files': files, 'data_types': data_types, 'user_can_edit': Study(study_id).can_edit(user), 'job': job, 'download_id': st_fp_id, 'old_files': old_files, 'num_samples': num_samples, 'num_columns': num_cols }
def render(self, study): study_info = study.info id = study.id abstract = study_info['study_abstract'] description = study_info['study_description'] pmids = ", ".join([pubmed_linkifier([pmid]) for pmid in study.pmids]) princ_inv = StudyPerson(study_info['principal_investigator_id']) pi_link = study_person_linkifier((princ_inv.email, princ_inv.name)) number_samples_promised = study_info['number_samples_promised'] number_samples_collected = study_info['number_samples_collected'] metadata_complete = study_info['metadata_complete'] data_types = sorted(viewitems(get_data_types()), key=itemgetter(1)) # Retrieve the files from the uploads folder, so the user can choose # the sample template of the study. Filter them to only include the # ones that ends with 'txt' or 'tsv'. files = [f for _, f in get_files_from_uploads_folders(str(study.id)) if f.endswith(('txt', 'tsv'))] # If the sample template exists, retrieve all its filepaths if SampleTemplate.exists(study.id): sample_templates = SampleTemplate(study.id).get_filepaths() else: # If the sample template does not exist, just pass an empty list sample_templates = [] # Check if the request came from a local source is_local_request = is_localhost(self.request.headers['host']) # The user can choose the sample template only if the study is # sandboxed or the current user is an admin show_select_sample = ( study.status == 'sandbox' or self.current_user.level == 'admin') # Ebi information ebi_status = study.ebi_submission_status ebi_accession = study.ebi_study_accession if ebi_accession: ebi_accession = (EBI_LINKIFIER.format(ebi_accession)) return self.render_string( "study_description_templates/study_information_tab.html", abstract=abstract, description=description, id=id, pmids=pmids, principal_investigator=pi_link, number_samples_promised=number_samples_promised, number_samples_collected=number_samples_collected, metadata_complete=metadata_complete, show_select_sample=show_select_sample, files=files, study_id=study.id, sample_templates=sample_templates, is_local_request=is_local_request, data_types=data_types, ebi_status=ebi_status, ebi_accession=ebi_accession)
def tearDown(self): new_uploaded_files = get_files_from_uploads_folders(str(self.study.id)) new_files = set(new_uploaded_files).difference(self.uploaded_files) path_builder = partial(join, get_mountpoint("uploads")[0][1], '1') for _, fp in new_files: self._clean_up_files.append(path_builder(fp)) for f in self._clean_up_files: if exists(f): remove(f)
def sample_template_overview_handler_get_request(study_id, user): # Check if the current user has access to the sample template sample_template_checks(study_id, user) # Check if the sample template exists exists = SampleTemplate.exists(study_id) # The following information should always be provided: # The files that have been uploaded to the system and can be a # sample template file files = [f for _, f, _ in get_files_from_uploads_folders(study_id) if f.endswith(('txt', 'tsv', 'xlsx'))] # If there is a job associated with the sample information, the job id job = None job_info = r_client.get(SAMPLE_TEMPLATE_KEY_FORMAT % study_id) if job_info: job = loads(job_info)['job_id'] # Specific information if it exists or not: data_types = [] st_fp_id = None old_files = [] num_samples = 0 num_cols = 0 if exists: # If it exists we need to provide: # The id of the sample template file so the user can download it and # the list of old filepaths st = SampleTemplate(study_id) all_st_files = st.get_filepaths() # The current sample template file is the first one in the list # (pop(0)) and we are interested only in the id ([0]) st_fp_id = all_st_files.pop(0)[0] # For the old filepaths we are only interested in their basename old_files = [basename(fp) for _, fp in all_st_files] # The number of samples - this is a space efficient way of counting # the number of samples. Doing len(list(st.keys())) creates a list # that we are not using num_samples = sum(1 for _ in st.keys()) # The number of columns num_cols = len(st.categories()) else: # It doesn't exist, we also need to provide the data_types in case # the user uploads a QIIME mapping file data_types = sorted(data_types_get_req()['data_types']) return {'exists': exists, 'uploaded_files': files, 'data_types': data_types, 'user_can_edit': Study(study_id).can_edit(user), 'job': job, 'download_id': st_fp_id, 'old_files': old_files, 'num_samples': num_samples, 'num_columns': num_cols}
def display_template(self, study_id, msg): """Simple function to avoid duplication of code""" study_id = int(study_id) study = Study(study_id) user = self.current_user check_access(user, study, no_public=True, raise_error=True) # getting the ontologies self.render('upload.html', study_title=study.title, study_info=study.info, study_id=study_id, is_admin=user.level == 'admin', extensions=','.join(qiita_config.valid_upload_extension), max_upload_size=qiita_config.max_upload_size, files=get_files_from_uploads_folders(str(study_id)))
def display_template(self, study_id, msg): """Simple function to avoid duplication of code""" study_id = int(study_id) study = Study(study_id) user = self.current_user level = 'info' message = '' remote_url = '' remote_files = [] check_access(user, study, no_public=True, raise_error=True) job_info = r_client.get(UPLOAD_STUDY_FORMAT % study_id) if job_info: job_info = defaultdict(lambda: '', loads(job_info)) job_id = job_info['job_id'] job = ProcessingJob(job_id) job_status = job.status processing = job_status not in ('success', 'error') url = job.parameters.values['url'] if processing: if job.command.name == 'list_remote_files': message = 'Retrieving remote files: listing %s' % url else: message = 'Retrieving remote files: download %s' % url elif job_status == 'error': level = 'danger' message = job.log.msg.replace('\n', '</br>') # making errors nicer for users if 'No such file' in message: message = 'URL not valid: <i>%s</i>, please review.' % url else: remote_url = job_info['url'] remote_files = job_info['files'] level = job_info['alert_type'] message = job_info['alert_msg'].replace('\n', '</br>') # getting the ontologies self.render('upload.html', study_title=study.title, study_info=study.info, study_id=study_id, is_admin=user.level == 'admin', extensions=','.join(qiita_config.valid_upload_extension), max_upload_size=qiita_config.max_upload_size, level=level, message=message, remote_url=remote_url, remote_files=remote_files, files=get_files_from_uploads_folders(str(study_id)))
def render(self, study): study_info = study.info abstract = study_info['study_abstract'] description = study_info['study_description'] pmids = ", ".join([pubmed_linkifier([pmid]) for pmid in study.pmids]) princ_inv = StudyPerson(study_info['principal_investigator_id']) pi_link = study_person_linkifier((princ_inv.email, princ_inv.name)) number_samples_promised = study_info['number_samples_promised'] number_samples_collected = study_info['number_samples_collected'] metadata_complete = study_info['metadata_complete'] # Retrieve the files from the uploads folder, so the user can choose # the sample template of the study files = [f for _, f in get_files_from_uploads_folders(str(study.id))] # If the sample template exists, retrieve all its filepaths if SampleTemplate.exists(study.id): sample_templates = SampleTemplate(study.id).get_filepaths() else: # If the sample template does not exist, just pass an empty list sample_templates = [] # Check if the request came from a local source is_local_request = self._is_local() # The user can choose the sample template only if the study is # sandboxed or the current user is an admin show_select_sample = ( study.status == 'sandbox' or self.current_user.level == 'admin') return self.render_string( "study_description_templates/study_information_tab.html", abstract=abstract, description=description, pmids=pmids, principal_investigator=pi_link, number_samples_promised=number_samples_promised, number_samples_collected=number_samples_collected, metadata_complete=metadata_complete, show_select_sample=show_select_sample, files=files, study_id=study.id, sample_templates=sample_templates, is_local_request=is_local_request)
def setUp(self): fd, seqs_fp = mkstemp(suffix='_seqs.fastq') close(fd) fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq') close(fd) self.filepaths = [seqs_fp, barcodes_fp] self.checksums = [] for fp in sorted(self.filepaths): with open(fp, 'w') as f: f.write("%s\n" % fp) self.checksums.append(compute_checksum(fp)) self.filepaths_types = ["raw_forward_seqs", "raw_barcodes"] self._clean_up_files = [seqs_fp, barcodes_fp] info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " "gut microbiome", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } self.new_study = Study.create(User("*****@*****.**"), "Update raw data test", efo=[1], info=info) self.study = Study(1) # The files for the RawData object attached to study 1 does not exist. # Create them so we can actually perform the tests for _, fp, _ in RawData(1).get_filepaths(): with open(fp, 'w') as f: f.write('\n') self._clean_up_files.append(fp) self.uploaded_files = get_files_from_uploads_folders( str(self.study.id))
def get(self): study_id = self.get_argument('study_id') prep_file = self.get_argument('prep_file') prep_type = self.get_argument('type') # TODO: Get file types for the artifact type # FILE TYPE IN POSTION 0 MUST BE DEFAULT FOR SELECTED file_types = supported_filepath_types(prep_type) selected = [] not_selected = [] _, base = get_mountpoint("uploads")[0] uploaded = get_files_from_uploads_folders(study_id) prep = pd.read_table(join(base, study_id, prep_file), sep='\t') if 'run_prefix' in prep.columns: # Use run_prefix column of prep template to auto-select # per-prefix uploaded files if available. per_prefix = True prep_prefixes = set(prep['run_prefix']) for _, filename in uploaded: for prefix in prep_prefixes: if filename.startswith(prefix): selected.append(filename) else: not_selected.append(filename) else: per_prefix = False not_selected = [f for _, f, _ in uploaded] # Write out if this prep template supports per-prefix files, and the # as well as pre-selected and remaining files self.write({ 'per_prefix': per_prefix, 'file_types': file_types, 'selected': selected, 'remaining': not_selected })
def get(self): study_id = self.get_argument('study_id') prep_file = self.get_argument('prep_file') prep_type = self.get_argument('type') # TODO: Get file types for the artifact type # FILE TYPE IN POSTION 0 MUST BE DEFAULT FOR SELECTED file_types = supported_filepath_types(prep_type) selected = [] not_selected = [] _, base = get_mountpoint("uploads")[0] uploaded = get_files_from_uploads_folders(study_id) prep = pd.read_table(join(base, study_id, prep_file), sep='\t') if 'run_prefix' in prep.columns: # Use run_prefix column of prep template to auto-select # per-prefix uploaded files if available. per_prefix = True prep_prefixes = set(prep['run_prefix']) for _, filename in uploaded: for prefix in prep_prefixes: if filename.startswith(prefix): selected.append(filename) else: not_selected.append(filename) else: per_prefix = False not_selected = [f for _, f in uploaded] # Write out if this prep template supports per-prefix files, and the # as well as pre-selected and remaining files self.write({ 'per_prefix': per_prefix, 'file_types': file_types, 'selected': selected, 'remaining': not_selected})
def display_template(self, study, msg, msg_level, tab_to_display=""): """Simple function to avoid duplication of code""" # Check if the request came from a local source is_local_request = ('localhost' in self.request.headers['host'] or '127.0.0.1' in self.request.headers['host']) # getting raw filepath_ types fts = [k.split('_', 1)[1].replace('_', ' ') for k in get_filepath_types() if k.startswith('raw_')] fts = ['<option value="%s">%s</option>' % (f, f) for f in fts] user = User(self.current_user) # getting the RawData and its prep templates available_raw_data = yield Task(self.get_raw_data, study.raw_data()) available_prep_templates = yield Task(self.get_prep_templates, available_raw_data) # set variable holding if we have files attached to all raw data or not raw_files = True if available_raw_data else False for r in available_raw_data: if not r.get_filepaths(): raw_files = False # set variable holding if we have all prep templates or not prep_templates = True if available_prep_templates else False for key, val in viewitems(available_prep_templates): if not val: prep_templates = False # other general vars, note that we create the select options here # so we do not have to loop several times over them in the template data_types = sorted(viewitems(get_data_types()), key=itemgetter(1)) data_types = ['<option value="%s">%s</option>' % (v, k) for k, v in data_types] filetypes = sorted(viewitems(get_filetypes()), key=itemgetter(1)) filetypes = ['<option value="%s">%s</option>' % (v, k) for k, v in filetypes] other_studies_rd = yield Task(self.get_raw_data_from_other_studies, user, study) other_studies_rd = ['<option value="%s">%s</option>' % (k, "id: %d, study: %s" % (k, v)) for k, v in viewitems(other_studies_rd)] ontology = Ontology(convert_to_id('ENA', 'ontology')) # make "Other" show at the bottom of the drop down menu ena_terms = [] for v in sorted(ontology.terms): if v != 'Other': ena_terms.append('<option value="%s">%s</option>' % (v, v)) ena_terms.append('<option value="Other">Other</option>') # New Type is for users to add a new user-defined investigation type user_defined_terms = ontology.user_defined_terms + ['New Type'] princ_inv = StudyPerson(study.info['principal_investigator_id']) pi_link = study_person_linkifier((princ_inv.email, princ_inv.name)) if SampleTemplate.exists(study.id): sample_templates = SampleTemplate(study.id).get_filepaths() else: sample_templates = [] self.render('study_description.html', user=self.current_user, study_title=study.title, study_info=study.info, study_id=study.id, filetypes=''.join(filetypes), user_level=user.level, data_types=''.join(data_types), available_raw_data=available_raw_data, available_prep_templates=available_prep_templates, ste=SampleTemplate.exists(study.id), study_status=study.status, filepath_types=''.join(fts), ena_terms=''.join(ena_terms), tab_to_display=tab_to_display, level=msg_level, message=msg, prep_templates=prep_templates, raw_files=raw_files, can_upload=check_access(user, study, no_public=True), other_studies_rd=''.join(other_studies_rd), user_defined_terms=user_defined_terms, files=get_files_from_uploads_folders(str(study.id)), is_public=study.status == 'public', pmids=", ".join([pubmed_linkifier([pmid]) for pmid in study.pmids]), principal_investigator=pi_link, is_local_request=is_local_request, sample_templates=sample_templates)
def prep_template_ajax_get_req(user_id, prep_id): """Returns the prep tempalte information needed for the AJAX handler Parameters ---------- user_id : str The user id prep_id : int The prep template id Returns ------- dict of {str: object} A dictionary with the following keys: - status: str, whether the request is successful or not - message: str, if the request is unsuccessful, a human readable error - name: str, the name of the prep template - files: list of str, the files available to update the prep template - download_prep: int, the filepath_id of the prep file - download_qiime, int, the filepath_id of the qiime mapping file - num_samples: int, the number of samples present in the template - num_columns: int, the number of columns present in the template - investigation_type: str, the investigation type of the template - ontology: str, dict of {str, list of str} containing the information of the ENA ontology - artifact_attached: bool, whether the template has an artifact attached - study_id: int, the study id of the template """ # Currently there is no name attribute, but it will be soon name = "Prep information %d" % prep_id pt = PrepTemplate(prep_id) artifact_attached = pt.artifact is not None study_id = pt.study_id files = [f for _, f in get_files_from_uploads_folders(study_id) if f.endswith(('.txt', '.tsv'))] # The call to list is needed because keys is an iterator num_samples = len(list(pt.keys())) num_columns = len(pt.categories()) investigation_type = pt.investigation_type # Retrieve the information to download the prep template and QIIME # mapping file. See issue https://github.com/biocore/qiita/issues/1675 download_prep = [] download_qiime = [] for fp_id, fp in pt.get_filepaths(): if 'qiime' in basename(fp): download_qiime.append(fp_id) else: download_prep.append(fp_id) download_prep = download_prep[0] download_qiime = download_qiime[0] ontology = _get_ENA_ontology() job_id = r_client.get(PREP_TEMPLATE_KEY_FORMAT % prep_id) if job_id: redis_info = loads(r_client.get(job_id)) processing = redis_info['status_msg'] == 'Running' if processing: alert_type = 'info' alert_msg = 'This prep template is currently being updated' else: alert_type = redis_info['return']['status'] alert_msg = redis_info['return']['message'].replace('\n', '</br>') else: processing = False alert_type = '' alert_msg = '' editable = Study(study_id).can_edit(User(user_id)) and not processing return {'status': 'success', 'message': '', 'name': name, 'files': files, 'download_prep': download_prep, 'download_qiime': download_qiime, 'num_samples': num_samples, 'num_columns': num_columns, 'investigation_type': investigation_type, 'ontology': ontology, 'artifact_attached': artifact_attached, 'study_id': study_id, 'editable': editable, 'data_type': pt.data_type(), 'alert_type': alert_type, 'alert_message': alert_msg}
def render(self, study, raw_data): user = self.current_user study_status = study.status user_level = user.level raw_data_id = raw_data.id files = [f for _, f in get_files_from_uploads_folders(str(study.id))] # Get the available prep template data types data_types = sorted(viewitems(get_data_types()), key=itemgetter(1)) # Get all the ENA terms for the investigation type ontology = Ontology(convert_to_id('ENA', 'ontology')) # make "Other" show at the bottom of the drop down menu ena_terms = [] for v in sorted(ontology.terms): if v != 'Other': ena_terms.append('<option value="%s">%s</option>' % (v, v)) ena_terms.append('<option value="Other">Other</option>') # New Type is for users to add a new user-defined investigation type user_defined_terms = ontology.user_defined_terms + ['New Type'] # Get all the information about the prep templates available_prep_templates = [] for p in sorted(raw_data.prep_templates): if PrepTemplate.exists(p): pt = PrepTemplate(p) # if the prep template doesn't belong to this study, skip if study.id == pt.study_id: available_prep_templates.append(pt) # getting filepath_types if raw_data.filetype == 'SFF': fts = ['sff'] elif raw_data.filetype == 'FASTA': fts = ['fasta', 'qual'] elif raw_data.filetype == 'FASTQ': fts = ['barcodes', 'forward seqs', 'reverse seqs'] else: fts = [ k.split('_', 1)[1].replace('_', ' ') for k in get_filepath_types() if k.startswith('raw_') ] # The raw data can be edited (e.i. adding prep templates and files) # only if the study is sandboxed or the current user is an admin is_editable = study_status == 'sandbox' or user_level == 'admin' # Get the files linked with the raw_data raw_data_files = raw_data.get_filepaths() # Get the status of the data linking raw_data_link_status = raw_data.link_filepaths_status # By default don't show the unlink button show_unlink_btn = False # By default disable the the link file button disable_link_btn = True # Define the message for the link status if raw_data_link_status == 'linking': link_msg = "Linking files..." elif raw_data_link_status == 'unlinking': link_msg = "Unlinking files..." else: # The link button is only disable if raw data link status is # linking or unlinking, so we can enable it here disable_link_btn = False # The unlink button is only shown if the study is editable, the raw # data linking status is not in linking or unlinking, and there are # files attached to the raw data. At this point, we are sure that # the raw data linking status is not in linking or unlinking so we # still need to check if it is editable or there are files attached show_unlink_btn = is_editable and raw_data_files if raw_data_link_status.startswith('failed'): link_msg = "Error (un)linking files: %s" % raw_data_link_status else: link_msg = "" # Get the raw_data filetype raw_data_filetype = raw_data.filetype return self.render_string( "study_description_templates/raw_data_editor_tab.html", study_id=study.id, study_status=study_status, user_level=user_level, raw_data_id=raw_data_id, files=files, data_types=data_types, ena_terms=ena_terms, user_defined_terms=user_defined_terms, available_prep_templates=available_prep_templates, filepath_types=fts, is_editable=is_editable, show_unlink_btn=show_unlink_btn, link_msg=link_msg, raw_data_files=raw_data_files, raw_data_filetype=raw_data_filetype, disable_link_btn=disable_link_btn)
def render(self, study, raw_data, full_access): user = self.current_user study_status = study.status user_level = user.level raw_data_id = raw_data.id files = [f for _, f in get_files_from_uploads_folders(str(study.id))] # Get the available prep template data types data_types = sorted(viewitems(get_data_types()), key=itemgetter(1)) # Get all the ENA terms for the investigation type ontology = Ontology(convert_to_id('ENA', 'ontology')) # make "Other" show at the bottom of the drop down menu ena_terms = [] for v in sorted(ontology.terms): if v != 'Other': ena_terms.append('<option value="%s">%s</option>' % (v, v)) ena_terms.append('<option value="Other">Other</option>') # New Type is for users to add a new user-defined investigation type user_defined_terms = ontology.user_defined_terms + ['New Type'] # Get all the information about the prep templates available_prep_templates = [] for p in sorted(raw_data.prep_templates): if PrepTemplate.exists(p): pt = PrepTemplate(p) # if the prep template doesn't belong to this study, skip if (study.id == pt.study_id and (full_access or pt.status == 'public')): available_prep_templates.append(pt) # getting filepath_types if raw_data.filetype == 'SFF': fts = ['sff'] elif raw_data.filetype == 'FASTA': fts = ['fasta', 'qual'] elif raw_data.filetype == 'FASTQ': fts = ['barcodes', 'forward seqs', 'reverse seqs'] else: fts = [k.split('_', 1)[1].replace('_', ' ') for k in get_filepath_types() if k.startswith('raw_')] # The raw data can be edited (e.i. adding prep templates and files) # only if the study is sandboxed or the current user is an admin is_editable = study_status == 'sandbox' or user_level == 'admin' # Get the files linked with the raw_data raw_data_files = raw_data.get_filepaths() # Get the status of the data linking raw_data_link_status = raw_data.link_filepaths_status # By default don't show the unlink button show_unlink_btn = False # By default disable the the link file button disable_link_btn = True # Define the message for the link status if raw_data_link_status == 'linking': link_msg = "Linking files..." elif raw_data_link_status == 'unlinking': link_msg = "Unlinking files..." else: # The link button is only disable if raw data link status is # linking or unlinking, so we can enable it here disable_link_btn = False # The unlink button is only shown if the study is editable, the raw # data linking status is not in linking or unlinking, and there are # files attached to the raw data. At this point, we are sure that # the raw data linking status is not in linking or unlinking so we # still need to check if it is editable or there are files attached show_unlink_btn = is_editable and raw_data_files if raw_data_link_status.startswith('failed'): link_msg = "Error (un)linking files: %s" % raw_data_link_status else: link_msg = "" # Get the raw_data filetype raw_data_filetype = raw_data.filetype return self.render_string( "study_description_templates/raw_data_editor_tab.html", study_id=study.id, study_status=study_status, user_level=user_level, raw_data_id=raw_data_id, files=files, data_types=data_types, ena_terms=ena_terms, user_defined_terms=user_defined_terms, available_prep_templates=available_prep_templates, filepath_types=fts, is_editable=is_editable, show_unlink_btn=show_unlink_btn, link_msg=link_msg, raw_data_files=raw_data_files, raw_data_filetype=raw_data_filetype, disable_link_btn=disable_link_btn)
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type): """Returns the uploaded files for the study id categorized by artifact_type It retrieves the files uploaded for the given study and tries to do a guess on how those files should be added to the artifact of the given type. Uses information on the prep template to try to do a better guess. Parameters ---------- user_id : str The id of the user making the request study_id : int The study id prep_template_id : int The prep template id artifact_type : str The artifact type Returns ------- dict of {str: object} A dict of the form {'status': str, 'message': str, 'remaining': list of str, 'file_types': list of (str, bool, list of str), 'num_prefixes': int} where 'status' is a string specifying whether the query is successfull, 'message' is a human-readable description of the error (optional), 'remaining' is the list of files that could not be categorized, 'file_types' is a list of the available filetypes, if it is required or not and the list of categorized files for the given artifact type and 'num_prefixes' is the number of different run prefix values in the given prep template. """ supp_file_types = supported_filepath_types(artifact_type) selected = [] remaining = [] uploaded = get_files_from_uploads_folders(study_id) pt = PrepTemplate(prep_template_id).to_dataframe() ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types if ft != 'raw_sff') if any(ftypes_if) and 'run_prefix' in pt.columns: prep_prefixes = tuple(set(pt['run_prefix'])) num_prefixes = len(prep_prefixes) for _, filename in uploaded: if filename.startswith(prep_prefixes): selected.append(filename) else: remaining.append(filename) else: num_prefixes = 0 remaining = [f for _, f in uploaded] # At this point we can't do anything smart about selecting by default # the files for each type. The only thing that we can do is assume that # the first in the supp_file_types list is the default one where files # should be added in case of 'run_prefix' being present file_types = [(fp_type, req, []) for fp_type, req in supp_file_types[1:]] first = supp_file_types[0] # Note that this works even if `run_prefix` is not in the prep template # because selected is initialized to the empty list file_types.insert(0, (first[0], first[1], selected)) # Create a list of artifacts that the user has access to, in case that # he wants to import the files from another artifact user = User(user_id) artifact_options = [] user_artifacts = user.user_artifacts(artifact_type=artifact_type) study = Study(study_id) if study not in user_artifacts: user_artifacts[study] = study.artifacts(artifact_type=artifact_type) for study, artifacts in viewitems(user_artifacts): study_label = "%s (%d)" % (study.title, study.id) for a in artifacts: artifact_options.append( (a.id, "%s - %s (%d)" % (study_label, a.name, a.id))) return {'status': 'success', 'message': '', 'remaining': sorted(remaining), 'file_types': file_types, 'num_prefixes': num_prefixes, 'artifacts': artifact_options}
def prep_template_ajax_get_req(user_id, prep_id): """Returns the prep tempalte information needed for the AJAX handler Parameters ---------- user_id : str The user id prep_id : int The prep template id Returns ------- dict of {str: object} A dictionary with the following keys: - status: str, whether the request is successful or not - message: str, if the request is unsuccessful, a human readable error - name: str, the name of the prep template - files: list of str, the files available to update the prep template - download_prep: int, the filepath_id of the prep file - download_qiime, int, the filepath_id of the qiime mapping file - num_samples: int, the number of samples present in the template - num_columns: int, the number of columns present in the template - investigation_type: str, the investigation type of the template - ontology: str, dict of {str, list of str} containing the information of the ENA ontology - artifact_attached: bool, whether the template has an artifact attached - study_id: int, the study id of the template """ pt = PrepTemplate(prep_id) name = pt.name # Initialize variables here processing = False alert_type = '' alert_msg = '' job_info = r_client.get(PREP_TEMPLATE_KEY_FORMAT % prep_id) if job_info: job_info = defaultdict(lambda: '', loads(job_info)) job_id = job_info['job_id'] job = ProcessingJob(job_id) job_status = job.status processing = job_status not in ('success', 'error') if processing: alert_type = 'info' alert_msg = 'This prep template is currently being updated' elif job_status == 'error': alert_type = 'danger' alert_msg = job.log.msg.replace('\n', '</br>') else: alert_type = job_info['alert_type'] alert_msg = job_info['alert_msg'].replace('\n', '</br>') artifact_attached = pt.artifact is not None study_id = pt.study_id files = [f for _, f, _ in get_files_from_uploads_folders(study_id) if f.endswith(('.txt', '.tsv'))] # The call to list is needed because keys is an iterator num_samples = len(list(pt.keys())) num_columns = len(pt.categories()) investigation_type = pt.investigation_type download_prep_id = None download_qiime_id = None other_filepaths = [] for fp_id, fp in pt.get_filepaths(): fp = basename(fp) if 'qiime' in fp: if download_qiime_id is None: download_qiime_id = fp_id else: if download_prep_id is None: download_prep_id = fp_id else: other_filepaths.append(fp) ontology = _get_ENA_ontology() editable = Study(study_id).can_edit(User(user_id)) and not processing return {'status': 'success', 'message': '', 'name': name, 'files': files, 'download_prep_id': download_prep_id, 'download_qiime_id': download_qiime_id, 'other_filepaths': other_filepaths, 'num_samples': num_samples, 'num_columns': num_columns, 'investigation_type': investigation_type, 'ontology': ontology, 'artifact_attached': artifact_attached, 'study_id': study_id, 'editable': editable, 'data_type': pt.data_type(), 'alert_type': alert_type, 'is_submitted_to_ebi': pt.is_submitted_to_ebi, 'alert_message': alert_msg}
def render(self, study, prep_template, full_access, ena_terms, user_defined_terms): user = self.current_user is_local_request = is_localhost(self.request.headers['host']) template_fps = [] qiime_fps = [] # Unfortunately, both the prep template and the qiime mapping files # have the sample type. The way to differentiate them is if we have # the substring 'qiime' in the basename for id_, fp in prep_template.get_filepaths(): if 'qiime' in basename(fp): qiime_fps.append( download_link_or_path( is_local_request, fp, id_, 'Qiime mapping')) else: template_fps.append( download_link_or_path( is_local_request, fp, id_, 'Prep template')) # Since get_filepaths returns the paths sorted from newest to oldest, # the first in both list is the latest one current_template_fp = template_fps[0] current_qiime_fp = qiime_fps[0] if len(template_fps) > 1: show_old_templates = True old_templates = template_fps[1:] else: show_old_templates = False old_templates = None if len(qiime_fps) > 1: show_old_qiime_fps = True old_qiime_fps = qiime_fps[1:] else: show_old_qiime_fps = False old_qiime_fps = None filetypes = sorted( ((ft, ft_id, fp_type_by_ft[ft]) for ft, ft_id in viewitems(get_filetypes())), key=itemgetter(1)) files = [f for _, f in get_files_from_uploads_folders(str(study.id))] other_studies_rd = sorted(viewitems( _get_accessible_raw_data(user))) # A prep template can be modified if its status is sandbox is_editable = prep_template.status == 'sandbox' raw_data_id = prep_template.raw_data preprocess_options = [] preprocessed_data = None show_preprocess_btn = True no_preprocess_msg = None if raw_data_id: rd = RawData(raw_data_id) rd_ft = rd.filetype # If the prep template has a raw data associated, it can be # preprocessed. Retrieve the pre-processing parameters if rd_ft in ('SFF', 'FASTA'): param_iter = Preprocessed454Params.iter() elif rd_ft == 'FASTQ': param_iter = [pip for pip in PreprocessedIlluminaParams.iter() if pip.values['barcode_type'] != 'not-barcoded'] elif rd_ft == 'per_sample_FASTQ': param_iter = [pip for pip in PreprocessedIlluminaParams.iter() if pip.values['barcode_type'] == 'not-barcoded'] else: raise NotImplementedError( "Pre-processing of %s files currently not supported." % rd_ft) preprocess_options = [] for param in param_iter: text = ("<b>%s:</b> %s" % (k, v) for k, v in viewitems(param.values)) preprocess_options.append((param.id, param.name, '<br>'.join(text))) preprocessed_data = prep_template.preprocessed_data # Check if the template have all the required columns for # preprocessing raw_data_files = rd.get_filepaths() if len(raw_data_files) == 0: show_preprocess_btn = False no_preprocess_msg = ( "Preprocessing disabled because there are no files " "linked with the Raw Data") else: if prep_template.data_type() in TARGET_GENE_DATA_TYPES: raw_forward_fps = [fp for _, fp, ftype in raw_data_files if ftype == 'raw_forward_seqs'] key = ('demultiplex_multiple' if len(raw_forward_fps) > 1 else 'demultiplex') missing_cols = prep_template.check_restrictions( [PREP_TEMPLATE_COLUMNS_TARGET_GENE[key]]) if rd_ft == 'per_sample_FASTQ': show_preprocess_btn = 'run_prefix' not in missing_cols else: show_preprocess_btn = len(missing_cols) == 0 no_preprocess_msg = None if not show_preprocess_btn: no_preprocess_msg = ( "Preprocessing disabled due to missing columns in " "the prep template: %s" % ', '.join(missing_cols)) preprocessing_status = prep_template.preprocessing_status return self.render_string( "study_description_templates/prep_template_info_tab.html", pt_id=prep_template.id, study_id=study.id, raw_data=raw_data_id, current_template_fp=current_template_fp, current_qiime_fp=current_qiime_fp, show_old_templates=show_old_templates, old_templates=old_templates, show_old_qiime_fps=show_old_qiime_fps, old_qiime_fps=old_qiime_fps, filetypes=filetypes, files=files, other_studies_rd=other_studies_rd, prep_template=prep_template, study=study, ena_terms=ena_terms, user_defined_terms=user_defined_terms, investigation_type=prep_template.investigation_type, is_editable=is_editable, preprocess_options=preprocess_options, preprocessed_data=preprocessed_data, preprocessing_status=preprocessing_status, show_preprocess_btn=show_preprocess_btn, no_preprocess_msg=no_preprocess_msg)
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type): """Returns the uploaded files for the study id categorized by artifact_type It retrieves the files uploaded for the given study and tries to guess on how those files should be added to the artifact of the given type. Uses information on the prep template to try to do a better guess. Parameters ---------- user_id : str The id of the user making the request study_id : int The study id prep_template_id : int The prep template id artifact_type : str The artifact type Returns ------- dict of {str: object} A dict of the form {'status': str, 'message': str, 'remaining': list of str, 'file_types': list of (str, bool, list of str), 'num_prefixes': int} where 'status' is a string specifying whether the query is successfull, 'message' is a human-readable description of the error (optional), 'remaining' is the list of files that could not be categorized, 'file_types' is a list of the available filetypes, if it is required or not and the list of categorized files for the given artifact type and 'num_prefixes' is the number of different run prefix values in the given prep template. """ supp_file_types = supported_filepath_types(artifact_type) selected = [] remaining = [] message = [] pt = PrepTemplate(prep_template_id) if pt.study_id != study_id: raise IncompetentQiitaDeveloperError( "The requested prep id (%d) doesn't belong to the study " "(%d)" % (pt.study_id, study_id)) uploaded = get_files_from_uploads_folders(study_id) pt = pt.to_dataframe() ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types if ft != 'raw_sff') if any(ftypes_if) and 'run_prefix' in pt.columns: prep_prefixes = tuple(set(pt['run_prefix'])) num_prefixes = len(prep_prefixes) # sorting prefixes by length to avoid collisions like: 100 1002 # 10003 prep_prefixes = sorted(prep_prefixes, key=len, reverse=True) # group files by prefix sfiles = defaultdict(list) for p in prep_prefixes: to_remove = [] for fid, f, _ in uploaded: if f.startswith(p): sfiles[p].append(f) to_remove.append((fid, f)) uploaded = [x for x in uploaded if x not in to_remove] inuse = [y for x in sfiles.values() for y in x] remaining.extend([f for _, f, _ in uploaded if f not in inuse]) supp_file_types_len = len(supp_file_types) for k, v in sfiles.items(): len_files = len(v) # if the number of files in the k group is larger than the # available columns add to the remaining group, if not put them in # the selected group if len_files > supp_file_types_len: remaining.extend(v) message.append("'%s' has %d matches." % (k, len_files)) else: v.sort() selected.append(v) else: num_prefixes = 0 remaining = [f for _, f, _ in uploaded] # get file_types, format: filetype, required, list of files file_types = [(t, req, [x[i] for x in selected if i + 1 <= len(x)]) for i, (t, req) in enumerate(supp_file_types)] # Create a list of artifacts that the user has access to, in case that # he wants to import the files from another artifact user = User(user_id) artifact_options = [] user_artifacts = user.user_artifacts(artifact_type=artifact_type) study = Study(study_id) if study not in user_artifacts: user_artifacts[study] = study.artifacts(artifact_type=artifact_type) for study, artifacts in user_artifacts.items(): study_label = "%s (%d)" % (study.title, study.id) for a in artifacts: artifact_options.append( (a.id, "%s - %s (%d)" % (study_label, a.name, a.id))) message = ('' if not message else '\n'.join(['Check these run_prefix:'] + message)) return { 'status': 'success', 'message': message, 'remaining': sorted(remaining), 'file_types': file_types, 'num_prefixes': num_prefixes, 'artifacts': artifact_options }
def render(self, study, prep_template, full_access, ena_terms, user_defined_terms): user = self.current_user is_local_request = is_localhost(self.request.headers['host']) template_fps = [] qiime_fps = [] # Unfortunately, both the prep template and the qiime mapping files # have the sample type. The way to differentiate them is if we have # the substring 'qiime' in the basename for id_, fp in prep_template.get_filepaths(): if 'qiime' in basename(fp): qiime_fps.append( download_link_or_path(is_local_request, fp, id_, 'Qiime mapping')) else: template_fps.append( download_link_or_path(is_local_request, fp, id_, 'Prep template')) # Since get_filepaths returns the paths sorted from newest to oldest, # the first in both list is the latest one current_template_fp = template_fps[0] current_qiime_fp = qiime_fps[0] if len(template_fps) > 1: show_old_templates = True old_templates = template_fps[1:] else: show_old_templates = False old_templates = None if len(qiime_fps) > 1: show_old_qiime_fps = True old_qiime_fps = qiime_fps[1:] else: show_old_qiime_fps = False old_qiime_fps = None filetypes = sorted(((ft, ft_id, fp_type_by_ft[ft]) for ft, ft_id in viewitems(get_artifact_types())), key=itemgetter(1)) files = [f for _, f in get_files_from_uploads_folders(str(study.id))] other_studies_rd = sorted(viewitems(_get_accessible_raw_data(user))) # A prep template can be modified if its status is sandbox is_editable = prep_template.status == 'sandbox' raw_data = prep_template.artifact preprocess_options = [] preprocessed_data = None show_preprocess_btn = True no_preprocess_msg = None preprocessing_status = 'Not processed' preprocessing_status_msg = "" if raw_data: raw_data_ft = raw_data.artifact_type # If the prep template has a raw data associated, it can be # preprocessed. Retrieve the pre-processing parameters # Hardcoding the command ids until the interface is refactored if raw_data_ft in ('SFF', 'FASTA'): param_iter = Command(2).default_parameter_sets elif raw_data_ft == 'FASTQ': param_iter = [ p for p in Command(1).default_parameter_sets if p.values['barcode_type'] != 'not-barcoded' ] elif raw_data_ft == 'per_sample_FASTQ': param_iter = [ p for p in Command(1).default_parameter_sets if p.values['barcode_type'] == 'not-barcoded' ] else: raise NotImplementedError( "Pre-processing of %s files currently not supported." % raw_data_ft) preprocess_options = [] for param in param_iter: text = ("<b>%s:</b> %s" % (k, v) for k, v in viewitems(param.values)) preprocess_options.append( (param.id, param.name, '<br>'.join(text))) preprocessed_data = raw_data.children # Check if the template have all the required columns for # preprocessing raw_data_files = raw_data.filepaths if len(raw_data_files) == 0: show_preprocess_btn = False no_preprocess_msg = ( "Preprocessing disabled because there are no files " "linked with the Raw Data") else: if prep_template.data_type() in TARGET_GENE_DATA_TYPES: raw_forward_fps = [ fp for _, fp, ftype in raw_data_files if ftype == 'raw_forward_seqs' ] key = ('demultiplex_multiple' if len(raw_forward_fps) > 1 else 'demultiplex') missing_cols = prep_template.check_restrictions( [PREP_TEMPLATE_COLUMNS_TARGET_GENE[key]]) if raw_data_ft == 'per_sample_FASTQ': show_preprocess_btn = 'run_prefix' not in missing_cols else: show_preprocess_btn = len(missing_cols) == 0 no_preprocess_msg = None if not show_preprocess_btn: no_preprocess_msg = ( "Preprocessing disabled due to missing columns in " "the prep template: %s" % ', '.join(missing_cols)) # Check the processing status preprocessing_status, preprocessing_status_msg = \ get_artifact_processing_status(raw_data) ebi_link = None if prep_template.is_submitted_to_ebi: ebi_link = EBI_LINKIFIER.format(study.ebi_study_accession) return self.render_string( "study_description_templates/prep_template_info_tab.html", raw_data=raw_data, current_template_fp=current_template_fp, current_qiime_fp=current_qiime_fp, show_old_templates=show_old_templates, old_templates=old_templates, show_old_qiime_fps=show_old_qiime_fps, old_qiime_fps=old_qiime_fps, filetypes=filetypes, files=files, other_studies_rd=other_studies_rd, prep_template=prep_template, study=study, ena_terms=ena_terms, user_defined_terms=user_defined_terms, investigation_type=prep_template.investigation_type, is_editable=is_editable, preprocess_options=preprocess_options, preprocessed_data=preprocessed_data, preprocessing_status=preprocessing_status, preprocessing_status_message=preprocessing_status_msg, show_preprocess_btn=show_preprocess_btn, no_preprocess_msg=no_preprocess_msg, ebi_link=ebi_link)
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type): """Returns the uploaded files for the study id categorized by artifact_type It retrieves the files uploaded for the given study and tries to guess on how those files should be added to the artifact of the given type. Uses information on the prep template to try to do a better guess. Parameters ---------- user_id : str The id of the user making the request study_id : int The study id prep_template_id : int The prep template id artifact_type : str The artifact type Returns ------- dict of {str: object} A dict of the form {'status': str, 'message': str, 'remaining': list of str, 'file_types': list of (str, bool, list of str), 'num_prefixes': int} where 'status' is a string specifying whether the query is successfull, 'message' is a human-readable description of the error (optional), 'remaining' is the list of files that could not be categorized, 'file_types' is a list of the available filetypes, if it is required or not and the list of categorized files for the given artifact type and 'num_prefixes' is the number of different run prefix values in the given prep template. """ supp_file_types = supported_filepath_types(artifact_type) selected = [] remaining = [] message = [] pt = PrepTemplate(prep_template_id) if pt.study_id != study_id: raise IncompetentQiitaDeveloperError( "The requested prep id (%d) doesn't belong to the study " "(%d)" % (pt.study_id, study_id)) uploaded = get_files_from_uploads_folders(study_id) pt = pt.to_dataframe() ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types if ft != 'raw_sff') if any(ftypes_if) and 'run_prefix' in pt.columns: prep_prefixes = tuple(set(pt['run_prefix'])) num_prefixes = len(prep_prefixes) # sorting prefixes by length to avoid collisions like: 100 1002 # 10003 prep_prefixes = sorted(prep_prefixes, key=len, reverse=True) # group files by prefix sfiles = defaultdict(list) for p in prep_prefixes: to_remove = [] for fid, f in uploaded: if f.startswith(p): sfiles[p].append(f) to_remove.append((fid, f)) uploaded = [x for x in uploaded if x not in to_remove] inuse = [y for x in sfiles.values() for y in x] remaining.extend([f for _, f in uploaded if f not in inuse]) supp_file_types_len = len(supp_file_types) for k, v in viewitems(sfiles): len_files = len(v) # if the number of files in the k group is larger than the # available columns add to the remaining group, if not put them in # the selected group if len_files > supp_file_types_len: remaining.extend(v) message.append("'%s' has %d matches." % (k, len_files)) else: v.sort() selected.append(v) else: num_prefixes = 0 remaining = [f for _, f in uploaded] # get file_types, format: filetype, required, list of files file_types = [(t, req, [x[i] for x in selected if i+1 <= len(x)]) for i, (t, req) in enumerate(supp_file_types)] # Create a list of artifacts that the user has access to, in case that # he wants to import the files from another artifact user = User(user_id) artifact_options = [] user_artifacts = user.user_artifacts(artifact_type=artifact_type) study = Study(study_id) if study not in user_artifacts: user_artifacts[study] = study.artifacts(artifact_type=artifact_type) for study, artifacts in viewitems(user_artifacts): study_label = "%s (%d)" % (study.title, study.id) for a in artifacts: artifact_options.append( (a.id, "%s - %s (%d)" % (study_label, a.name, a.id))) message = ('' if not message else '\n'.join(['Check these run_prefix:'] + message)) return {'status': 'success', 'message': message, 'remaining': sorted(remaining), 'file_types': file_types, 'num_prefixes': num_prefixes, 'artifacts': artifact_options}
def prep_template_ajax_get_req(user_id, prep_id): """Returns the prep tempalte information needed for the AJAX handler Parameters ---------- user_id : str The user id prep_id : int The prep template id Returns ------- dict of {str: object} A dictionary with the following keys: - status: str, whether the request is successful or not - message: str, if the request is unsuccessful, a human readable error - name: str, the name of the prep template - files: list of str, the files available to update the prep template - download_prep: int, the filepath_id of the prep file - download_qiime, int, the filepath_id of the qiime mapping file - num_samples: int, the number of samples present in the template - num_columns: int, the number of columns present in the template - investigation_type: str, the investigation type of the template - ontology: str, dict of {str, list of str} containing the information of the ENA ontology - artifact_attached: bool, whether the template has an artifact attached - study_id: int, the study id of the template """ # Currently there is no name attribute, but it will be soon name = "Prep information %d" % prep_id pt = PrepTemplate(prep_id) job_info = r_client.get(PREP_TEMPLATE_KEY_FORMAT % prep_id) if job_info: job_info = loads(job_info) job_id = job_info['job_id'] if job_id: redis_info = loads(r_client.get(job_id)) processing = redis_info['status_msg'] == 'Running' if processing: alert_type = 'info' alert_msg = 'This prep template is currently being updated' elif redis_info['status_msg'] == 'Success': alert_type = redis_info['return']['status'] alert_msg = redis_info['return']['message'].replace('\n', '</br>') payload = {'job_id': None, 'status': alert_type, 'message': alert_msg} r_client.set(PREP_TEMPLATE_KEY_FORMAT % prep_id, dumps(payload)) else: alert_type = redis_info['return']['status'] alert_msg = redis_info['return']['message'].replace('\n', '</br>') else: processing = False alert_type = job_info['status'] alert_msg = job_info['message'].replace('\n', '</br>') else: processing = False alert_type = '' alert_msg = '' artifact_attached = pt.artifact is not None study_id = pt.study_id files = [f for _, f in get_files_from_uploads_folders(study_id) if f.endswith(('.txt', '.tsv'))] # The call to list is needed because keys is an iterator num_samples = len(list(pt.keys())) num_columns = len(pt.categories()) investigation_type = pt.investigation_type # Retrieve the information to download the prep template and QIIME # mapping file. See issue https://github.com/biocore/qiita/issues/1675 download_prep = [] download_qiime = [] for fp_id, fp in pt.get_filepaths(): if 'qiime' in basename(fp): download_qiime.append(fp_id) else: download_prep.append(fp_id) download_prep = download_prep[0] download_qiime = download_qiime[0] ontology = _get_ENA_ontology() editable = Study(study_id).can_edit(User(user_id)) and not processing return {'status': 'success', 'message': '', 'name': name, 'files': files, 'download_prep': download_prep, 'download_qiime': download_qiime, 'num_samples': num_samples, 'num_columns': num_columns, 'investigation_type': investigation_type, 'ontology': ontology, 'artifact_attached': artifact_attached, 'study_id': study_id, 'editable': editable, 'data_type': pt.data_type(), 'alert_type': alert_type, 'is_submitted_to_ebi': pt.is_submitted_to_ebi, 'alert_message': alert_msg}
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type): """Returns the uploaded files for the study id categorized by artifact_type It retrieves the files uploaded for the given study and tries to do a guess on how those files should be added to the artifact of the given type. Uses information on the prep template to try to do a better guess. Parameters ---------- user_id : str The id of the user making the request study_id : int The study id prep_template_id : int The prep template id artifact_type : str The artifact type Returns ------- dict of {str: object} A dict of the form {'status': str, 'message': str, 'remaining': list of str, 'file_types': list of (str, bool, list of str), 'num_prefixes': int} where 'status' is a string specifying whether the query is successfull, 'message' is a human-readable description of the error (optional), 'remaining' is the list of files that could not be categorized, 'file_types' is a list of the available filetypes, if it is required or not and the list of categorized files for the given artifact type and 'num_prefixes' is the number of different run prefix values in the given prep template. """ supp_file_types = supported_filepath_types(artifact_type) selected = [] remaining = [] uploaded = get_files_from_uploads_folders(study_id) pt = PrepTemplate(prep_template_id).to_dataframe() if (any(ft.startswith('raw_') for ft, _ in supp_file_types) and 'run_prefix' in pt.columns): prep_prefixes = tuple(set(pt['run_prefix'])) num_prefixes = len(prep_prefixes) for _, filename in uploaded: if filename.startswith(prep_prefixes): selected.append(filename) else: remaining.append(filename) else: num_prefixes = 0 remaining = [f for _, f in uploaded] # At this point we can't do anything smart about selecting by default # the files for each type. The only thing that we can do is assume that # the first in the supp_file_types list is the default one where files # should be added in case of 'run_prefix' being present file_types = [(fp_type, req, []) for fp_type, req in supp_file_types[1:]] first = supp_file_types[0] # Note that this works even if `run_prefix` is not in the prep template # because selected is initialized to the empty list file_types.insert(0, (first[0], first[1], selected)) # Create a list of artifacts that the user has access to, in case that # he wants to import the files from another artifact user = User(user_id) artifact_options = [] user_artifacts = user.user_artifacts(artifact_type=artifact_type) study = Study(study_id) if study not in user_artifacts: user_artifacts[study] = study.artifacts(artifact_type=artifact_type) for study, artifacts in viewitems(user_artifacts): study_label = "%s (%d)" % (study.title, study.id) for a in artifacts: artifact_options.append( (a.id, "%s - %s (%d)" % (study_label, a.name, a.id))) return { 'status': 'success', 'message': '', 'remaining': remaining, 'file_types': file_types, 'num_prefixes': num_prefixes, 'artifacts': artifact_options }
def render(self, study): study_info = study.info id = study.id abstract = study_info['study_abstract'] description = study_info['study_description'] publications = [] for doi, pmid in study.publications: if doi is not None: publications.append(doi_linkifier([doi])) if pmid is not None: publications.append(pubmed_linkifier([pmid])) publications = ", ".join(publications) princ_inv = StudyPerson(study_info['principal_investigator_id']) pi_link = study_person_linkifier((princ_inv.email, princ_inv.name)) number_samples_promised = study_info['number_samples_promised'] number_samples_collected = study_info['number_samples_collected'] metadata_complete = study_info['metadata_complete'] data_types = sorted(viewitems(get_data_types()), key=itemgetter(1)) # Retrieve the files from the uploads folder, so the user can choose # the sample template of the study. Filter them to only include the # ones that ends with 'txt' or 'tsv'. files = [ f for _, f in get_files_from_uploads_folders(str(study.id)) if f.endswith(('txt', 'tsv')) ] # If the sample template exists, retrieve all its filepaths if SampleTemplate.exists(study.id): sample_templates = SampleTemplate(study.id).get_filepaths() else: # If the sample template does not exist, just pass an empty list sample_templates = [] # Check if the request came from a local source is_local_request = is_localhost(self.request.headers['host']) # The user can choose the sample template only if the study is # sandboxed or the current user is an admin show_select_sample = (study.status == 'sandbox' or self.current_user.level == 'admin') # EBI information ebi_status = study.ebi_submission_status ebi_accession = study.ebi_study_accession if ebi_accession: ebi_accession = (EBI_LINKIFIER.format(ebi_accession)) return self.render_string( "study_description_templates/study_information_tab.html", abstract=abstract, description=description, id=id, publications=publications, principal_investigator=pi_link, number_samples_promised=number_samples_promised, number_samples_collected=number_samples_collected, metadata_complete=metadata_complete, show_select_sample=show_select_sample, files=files, study_id=study.id, sample_templates=sample_templates, is_local_request=is_local_request, data_types=data_types, ebi_status=ebi_status, ebi_accession=ebi_accession)