def get_release_info(study_status='public'): """Returns the study status release MD5 Parameters ---------- study_status : str, optional The study status to search for. Note that this should always be set to 'public' but having this exposed helps with testing. The other options are 'private' and 'sandbox' Returns ------ str, str, str The release MD5, filepath and timestamp """ portal = qiita_config.portal md5sum = r_client.get('%s:release:%s:md5sum' % (portal, study_status)) filepath = r_client.get('%s:release:%s:filepath' % (portal, study_status)) timestamp = r_client.get('%s:release:%s:time' % (portal, study_status)) if md5sum is None: md5sum = '' if filepath is None: filepath = '' if timestamp is None: timestamp = '' return md5sum, filepath, timestamp
def test_sample_template_handler_post_request(self): # Test user doesn't have access with self.assertRaisesRegex(HTTPError, 'User does not have access to study'): sample_template_handler_post_request( 1, User('*****@*****.**'), 'ignored') # Test study doesn't exist user = User('*****@*****.**') with self.assertRaisesRegex(HTTPError, 'Study does not exist'): sample_template_handler_post_request(1000000, user, 'ignored') # Test file doesn't exist with self.assertRaisesRegex(HTTPError, 'Filepath not found'): sample_template_handler_post_request(1, user, 'DoesNotExist.txt') # Test looks like mapping file and no data_type provided uploads_dir = join(get_mountpoint('uploads')[0][1], '1') fd, fp = mkstemp(suffix='.txt', dir=uploads_dir) self._clean_up_files.append(fp) close(fd) with open(fp, 'w') as f: f.write('#SampleID\tCol1\nSample1\tVal1') with self.assertRaisesRegex( HTTPError, 'Please, choose a data type if uploading a QIIME ' 'mapping file'): sample_template_handler_post_request(1, user, fp) # Test success obs = sample_template_handler_post_request( 1, user, 'uploaded_file.txt') self.assertCountEqual(obs.keys(), ['job']) job_info = r_client.get('sample_template_1') self.assertIsNotNone(job_info) # Wait until the job is done wait_for_processing_job(loads(job_info)['job_id']) # Test direct upload obs = sample_template_handler_post_request( 1, user, fp, data_type='16S', direct_upload=True) self.assertCountEqual(obs.keys(), ['job']) job_info = r_client.get('sample_template_1') self.assertIsNotNone(job_info) # Wait until the job is done wait_for_processing_job(loads(job_info)['job_id'])
def test_create_sample_template(self): # Test error job = self._create_job('create_sample_template', { 'fp': self.fp, 'study_id': 1, 'is_mapping_file': False, 'data_type': None}) private_task(job.id) self.assertEqual(job.status, 'error') self.assertIn("The 'SampleTemplate' object with attributes (id: 1) " "already exists.", job.log.msg) # Test success with a warning info = {"timeseries_type_id": '1', "metadata_complete": 'true', "mixs_compliant": 'true', "study_alias": "TDST", "study_description": "Test create sample template", "study_abstract": "Test create sample template", "principal_investigator_id": StudyPerson(1)} study = Study.create(User('*****@*****.**'), "Create Sample Template test", info) job = self._create_job('create_sample_template', {'fp': self.fp, 'study_id': study.id, 'is_mapping_file': False, 'data_type': None}) private_task(job.id) self.assertEqual(job.status, 'success') obs = r_client.get("sample_template_%d" % study.id) self.assertIsNotNone(obs) obs = loads(obs) self.assertCountEqual(obs, ['job_id', 'alert_type', 'alert_msg']) self.assertEqual(obs['job_id'], job.id) self.assertEqual(obs['alert_type'], 'warning') self.assertIn( 'Some functionality will be disabled due to missing columns:', obs['alert_msg'])
def prep_template_jobs_get_req(prep_id, user_id): """Returns graph of all artifacts created from the prep base artifact Parameters ---------- prep_id : int Prep template ID to get graph for user_id : str User making the request Returns ------- dict with the jobs information Notes ----- Nodes are identified by the corresponding Artifact ID. """ prep = PrepTemplate(int(prep_id)) access_error = check_access(prep.study_id, user_id) if access_error: return access_error job_info = r_client.get(PREP_TEMPLATE_KEY_FORMAT % prep_id) result = {} if job_info: job_info = defaultdict(lambda: '', loads(job_info)) job_id = job_info['job_id'] job = ProcessingJob(job_id) result[job.id] = {'status': job.status, 'step': job.step, 'error': job.log.msg if job.log else ""} return result
def wait_for_prep_information_job(prep_id, raise_if_none=True): """Waits until a prep information job is completed Parameters ---------- prep_id : int Prep template id raise_if_none : bool, optional If True, raise an AssertionError if the correspondent redis key is empty. Default: True Raises ------ AssertionError If `raise_if_none` is True and the correspondent redis key is not set """ res = r_client.get('prep_template_%d' % prep_id) if raise_if_none and res is None: raise AssertionError("unexpectedly None") if res is not None: payload = loads(res) job_id = payload['job_id'] wait_for_processing_job(job_id)
def test_sample_template_handler_delete_request(self): # Test user doesn't have access with self.assertRaisesRegexp(HTTPError, 'User does not have access to study'): sample_template_handler_delete_request( 1, User('*****@*****.**')) # Test study doesn't exist user = User('*****@*****.**') with self.assertRaisesRegexp(HTTPError, 'Study does not exist'): sample_template_handler_delete_request(1000000, user) # Test sample information doesn't exist new_study = self._create_study('Study for deleting test') with self.assertRaisesRegexp(HTTPError, "Study %s doesn't have sample " "information" % new_study.id): sample_template_handler_delete_request(new_study.id, user) # Test success user = User('*****@*****.**') obs = sample_template_handler_delete_request(1, user) self.assertEqual(obs.keys(), ['job']) job_info = r_client.get('sample_template_1') self.assertIsNotNone(job_info) # Wait until the job is done wait_for_processing_job(loads(job_info)['job_id'])
def analysis_description_handler_get_request(analysis_id, user): """Returns the analysis information Parameters ---------- analysis_id : int The analysis id user : qiita_db.user.User The user performing the request """ analysis = Analysis(analysis_id) check_analysis_access(user, analysis) job_info = r_client.get("analysis_%s" % analysis.id) alert_type = 'info' alert_msg = '' if job_info: job_info = loads(job_info) job_id = job_info['job_id'] if job_id: r_payload = r_client.get(job_id) if r_payload: redis_info = loads(r_client.get(job_id)) if redis_info['status_msg'] == 'running': alert_msg = ('An artifact is being deleted from this ' 'analysis') elif redis_info['return'] is not None: alert_type = redis_info['return']['status'] alert_msg = redis_info['return']['message'].replace( '\n', '</br>') artifacts = {} for aid, samples in analysis.samples.items(): artifact = Artifact(aid) study = artifact.study artifacts[aid] = ( study.id, study.title, artifact.merging_scheme, samples) return {'analysis_name': analysis.name, 'analysis_id': analysis.id, 'analysis_is_public': analysis.is_public, 'analysis_description': analysis.description, 'analysis_mapping_id': analysis.mapping_file, 'alert_type': alert_type, 'artifacts': artifacts, 'alert_msg': alert_msg}
def sample_template_overview_handler_get_request(study_id, user): # Check if the current user has access to the sample template sample_template_checks(study_id, user) # Check if the sample template exists exists = SampleTemplate.exists(study_id) # The following information should always be provided: # The files that have been uploaded to the system and can be a # sample template file files = [f for _, f, _ in get_files_from_uploads_folders(study_id) if f.endswith(('txt', 'tsv', 'xlsx'))] # If there is a job associated with the sample information, the job id job = None job_info = r_client.get(SAMPLE_TEMPLATE_KEY_FORMAT % study_id) if job_info: job = loads(job_info)['job_id'] # Specific information if it exists or not: data_types = [] st_fp_id = None old_files = [] num_samples = 0 num_cols = 0 if exists: # If it exists we need to provide: # The id of the sample template file so the user can download it and # the list of old filepaths st = SampleTemplate(study_id) all_st_files = st.get_filepaths() # The current sample template file is the first one in the list # (pop(0)) and we are interested only in the id ([0]) st_fp_id = all_st_files.pop(0)[0] # For the old filepaths we are only interested in their basename old_files = [basename(fp) for _, fp in all_st_files] # The number of samples - this is a space efficient way of counting # the number of samples. Doing len(list(st.keys())) creates a list # that we are not using num_samples = sum(1 for _ in st.keys()) # The number of columns num_cols = len(st.categories()) else: # It doesn't exist, we also need to provide the data_types in case # the user uploads a QIIME mapping file data_types = sorted(data_types_get_req()['data_types']) return {'exists': exists, 'uploaded_files': files, 'data_types': data_types, 'user_can_edit': Study(study_id).can_edit(user), 'job': job, 'download_id': st_fp_id, 'old_files': old_files, 'num_samples': num_samples, 'num_columns': num_cols}
def test_artifact_post_request(self): # No access with self.assertRaises(QiitaHTTPError): artifact_post_req(User('*****@*****.**'), 1) obs = artifact_post_req(User('*****@*****.**'), 2) self.assertEqual(obs.keys(), ['job']) # Wait until the job is completed wait_for_prep_information_job(1) # Check that the delete function has been actually called job = ProcessingJob(loads(r_client.get('prep_template_1'))['job_id']) self.assertEqual(job.status, 'error') self.assertIn('Cannot delete artifact 2', job.log.msg)
def wrapper(handler, *args, **kwargs): header = handler.request.headers.get('Authorization', None) if header is None: _oauth_error(handler, 'Oauth2 error: invalid access token', 'invalid_request') return token_info = header.split() # Based on RFC6750 if reply is not 2 elements in the format of: # ['Bearer', token] we assume a wrong reply if len(token_info) != 2 or token_info[0] != 'Bearer': _oauth_error(handler, 'Oauth2 error: invalid access token', 'invalid_grant') return token = token_info[1] db_token = r_client.hgetall(token) if not db_token: # token has timed out or never existed _oauth_error(handler, 'Oauth2 error: token has timed out', 'invalid_grant') return # Check daily rate limit for key if password style key if db_token[b'grant_type'] == b'password': limit_key = '%s_%s_daily_limit' % ( db_token[b'client_id'].decode('ascii'), db_token[b'user'].decode('ascii')) limiter = r_client.get(limit_key) if limiter is None: # Set limit to 5,000 requests per day r_client.setex(limit_key, 86400, 5000) else: r_client.decr(limit_key) if int(r_client.get(limit_key)) <= 0: _oauth_error( handler, 'Oauth2 error: daily request limit reached', 'invalid_grant') return return f(handler, *args, **kwargs)
def test_authenticate_header_username(self): obs = self.get('/qiita_db/artifacts/1/', headers={ 'Authorization': 'Bearer ' + self.user_token}) self.assertEqual(obs.code, 200) # Check rate limiting works self.assertEqual(int(r_client.get(self.user_rate_key)), 1) r_client.setex('[email protected]_daily_limit', 1, 0) obs = self.get('/qiita_db/artifacts/100/', headers={ 'Authorization': 'Bearer ' + self.user_token}) exp = {'error': 'invalid_grant', 'error_description': 'Oauth2 error: daily request limit reached' } self.assertEqual(loads(obs.body), exp)
def set_token(self, client_id, grant_type, user=None, timeout=3600): """Create access token for the client on redis and send json response Parameters ---------- client_id : str Client that requested the token grant_type : str Type of key being requested user : str, optional If password grant type requested, the user requesting the key. timeout : int, optional The timeout, in seconds, for the token. Default 3600 Returns ------- Writes token information JSON in the form expected by RFC6750: {'access_token': token, 'token_type': 'Bearer', 'expires_in': timeout} access_token: the actual token to use token_type: 'Bearer', which is the expected token type for Oauth2 expires_in: time to token expiration, in seconds. """ token = self.generate_access_token() token_info = { 'timestamp': datetime.datetime.now().strftime('%m-%d-%y %H:%M:%S'), 'client_id': client_id, 'grant_type': grant_type } if user: token_info['user'] = user r_client.hmset(token, token_info) r_client.expire(token, timeout) if grant_type == 'password': # Check if client has access limit key, and if not, create it limit_key = '%s_%s_daily_limit' % (client_id, user) limiter = r_client.get(limit_key) if limiter is None: # Set limit to 5,000 requests per day r_client.setex(limit_key, 86400, 5000) self.write({'access_token': token, 'token_type': 'Bearer', 'expires_in': timeout}) self.finish()
def display_template(self, study_id, msg): """Simple function to avoid duplication of code""" study_id = int(study_id) study = Study(study_id) user = self.current_user level = 'info' message = '' remote_url = '' remote_files = [] check_access(user, study, no_public=True, raise_error=True) job_info = r_client.get(UPLOAD_STUDY_FORMAT % study_id) if job_info: job_info = defaultdict(lambda: '', loads(job_info)) job_id = job_info['job_id'] job = ProcessingJob(job_id) job_status = job.status processing = job_status not in ('success', 'error') url = job.parameters.values['url'] if processing: if job.command.name == 'list_remote_files': message = 'Retrieving remote files: listing %s' % url else: message = 'Retrieving remote files: download %s' % url elif job_status == 'error': level = 'danger' message = job.log.msg.replace('\n', '</br>') # making errors nicer for users if 'No such file' in message: message = 'URL not valid: <i>%s</i>, please review.' % url else: remote_url = job_info['url'] remote_files = job_info['files'] level = job_info['alert_type'] message = job_info['alert_msg'].replace('\n', '</br>') # getting the ontologies self.render('upload.html', study_title=study.title, study_info=study.info, study_id=study_id, is_admin=user.level == 'admin', extensions=','.join(qiita_config.valid_upload_extension), max_upload_size=qiita_config.max_upload_size, level=level, message=message, remote_url=remote_url, remote_files=remote_files, files=get_files_from_uploads_folders(str(study_id)))
def test_update_prep_template(self): fd, fp = mkstemp(suffix=".txt") close(fd) with open(fp, 'w') as f: f.write("sample_name\tnew_col\n1.SKD6.640190\tnew_value") job = self._create_job('update_prep_template', {'prep_template': 1, 'template_fp': fp}) private_task(job.id) self.assertEqual(job.status, 'success') self.assertEqual(PrepTemplate(1)['1.SKD6.640190']['new_col'], 'new_value') obs = r_client.get("prep_template_1") self.assertIsNotNone(obs) obs = loads(obs) self.assertItemsEqual(obs, ['job_id', 'alert_type', 'alert_msg']) self.assertEqual(obs['job_id'], job.id) self.assertEqual(obs['alert_type'], 'warning') self.assertIn('The following columns have been added to the existing ' 'template: new_col', obs['alert_msg'])
def get(self): user = self.current_user is_local_request = is_localhost(self.request.headers['host']) uanalyses = user.shared_analyses | user.private_analyses user_analysis_ids = set([a.id for a in uanalyses]) panalyses = Analysis.get_by_status('public') public_analysis_ids = set([a.id for a in panalyses]) public_analysis_ids = public_analysis_ids - user_analysis_ids user_analyses = generate_analysis_list(user_analysis_ids) public_analyses = generate_analysis_list(public_analysis_ids, True) dlop = partial(download_link_or_path, is_local_request) messages = {'info': '', 'danger': ''} for analysis_id in user_analysis_ids: job_info = r_client.get('analysis_delete_%d' % analysis_id) if job_info: job_info = defaultdict(lambda: '', loads(job_info)) job_id = job_info['job_id'] job = ProcessingJob(job_id) job_status = job.status processing = job_status not in ('success', 'error') if processing: messages['info'] += ( 'Analysis %s is being deleted<br/>' % analysis_id) elif job_status == 'error': messages['danger'] += ( job.log.msg.replace('\n', '<br/>') + '<br/>') else: if job_info['alert_type'] not in messages: messages[job_info['alert_type']] = [] messages[job_info['alert_type']] += ( job.log.msg.replace('\n', '<br/>') + '<br/>') self.render("list_analyses.html", user_analyses=user_analyses, public_analyses=public_analyses, messages=messages, dlop=dlop)
def get_release_info(study_status='public'): """Returns the studies and the archive release details Parameters ---------- study_status : str, optional The study status to search for. Note that this should always be set to 'public' but having this exposed helps with testing. The other options are 'private' and 'sandbox' Returns ------ ((str, str, str), (str, str, str)) The release MD5, filepath and timestamp """ portal = qiita_config.portal md5sum = r_client.get('%s:release:%s:md5sum' % (portal, study_status)) filepath = r_client.get('%s:release:%s:filepath' % (portal, study_status)) timestamp = r_client.get('%s:release:%s:time' % (portal, study_status)) # replacing None values for empty strings as the text is displayed nicely # in the GUI if md5sum is None: md5sum = '' if filepath is None: filepath = '' if timestamp is None: timestamp = '' biom_metadata_release = ((md5sum, filepath, timestamp)) md5sum = r_client.get('release-archive:md5sum') filepath = r_client.get('release-archive:filepath') timestamp = r_client.get('release-archive:time') # replacing None values for empty strings as the text is displayed nicely # in the GUI if md5sum is None: md5sum = '' if filepath is None: filepath = '' if timestamp is None: timestamp = '' archive_release = ((md5sum, filepath, timestamp)) return (biom_metadata_release, archive_release)
def sample_template_overview_handler_get_request(study_id, user): # Check if the current user has access to the sample template sample_template_checks(study_id, user) # Check if the sample template exists exists = SampleTemplate.exists(study_id) # The following information should always be provided: # The files that have been uploaded to the system and can be a # sample template file files = [ f for _, f, _ in get_files_from_uploads_folders(study_id) if f.endswith(('txt', 'tsv', 'xlsx')) ] # If there is a job associated with the sample information, the job id job = None job_info = r_client.get(SAMPLE_TEMPLATE_KEY_FORMAT % study_id) if job_info: job = loads(job_info)['job_id'] # Specific information if it exists or not: data_types = [] st_fp_id = None old_files = [] num_samples = 0 num_cols = 0 columns = [] specimen_id_column = None sample_restrictions = '' if exists: # If it exists we need to provide: # The id of the sample template file so the user can download it and # the list of old filepaths st = SampleTemplate(study_id) all_st_files = st.get_filepaths() # The current sample template file is the first one in the list # (pop(0)) and we are interested only in the id ([0]) st_fp_id = all_st_files.pop(0)[0] # For the old filepaths we are only interested in their basename old_files = [basename(fp) for _, fp in all_st_files] # The number of samples - this is a space efficient way of counting # the number of samples. Doing len(list(st.keys())) creates a list # that we are not using num_samples = sum(1 for _ in st.keys()) columns = st.categories # The number of columns num_cols = len(columns) specimen_id_column = Study(study_id).specimen_id_column _, sample_restrictions = st.validate_restrictions() else: # It doesn't exist, we also need to provide the data_types in case # the user uploads a QIIME mapping file data_types = sorted(data_types_get_req()['data_types']) return { 'exists': exists, 'uploaded_files': files, 'data_types': data_types, 'user_can_edit': Study(study_id).can_edit(user), 'job': job, 'download_id': st_fp_id, 'old_files': old_files, 'num_samples': num_samples, 'num_columns': num_cols, 'columns': columns, 'sample_restrictions': sample_restrictions, 'specimen_id_column': specimen_id_column }
def correct_redis_data(key, cmd, values_dict, user): """Corrects the data stored in the redis DB Parameters ---------- key: str The redis key to fix cmd : qiita_db.software.Command Command to use to create the processing job values_dict : dict Dictionary used to instantiate the parameters of the command user : qiita_db.user. User The user that will own the job """ info = r_client.get(key) if info: info = loads(info) if info['job_id'] is not None: if 'is_qiita_job' in info: if info['is_qiita_job']: try: job = ProcessingJob(info['job_id']) payload = { 'job_id': info['job_id'], 'alert_type': info['status'], 'alert_msg': info['alert_msg'] } r_client.set(key, dumps(payload)) except (QiitaDBUnknownIDError, KeyError): # We shomehow lost the information of this job # Simply delete the key r_client.delete(key) else: # These jobs don't contain any information on the live # dump. We can safely delete the key r_client.delete(key) else: # These jobs don't contain any information on the live # dump. We can safely delete the key r_client.delete(key) else: # Job is null, we have the information here if info['status'] == 'success': # In the success case no information is stored. We can # safely delete the key r_client.delete(key) elif info['status'] == 'warning': # In case of warning the key message stores the warning # message. We need to create a new job, mark it as # successful and store the error message as expected by # the new structure params = Parameters.load(cmd, values_dict=values_dict) job = ProcessingJob.create(user, params) job._set_status('success') payload = { 'job_id': job.id, 'alert_type': 'warning', 'alert_msg': info['message'] } r_client.set(key, dumps(payload)) else: # The status is error. The key message stores the error # message. We need to create a new job and mark it as # failed with the given error message params = Parameters.load(cmd, values_dict=values_dict) job = ProcessingJob.create(user, params) job._set_error(info['message']) payload = {'job_id': job.id} r_client.set(key, dumps(payload)) else: # The key doesn't contain any information. Delete the key r_client.delete(key)
def study_get_req(study_id, user_id): """Returns information available for the given study Parameters ---------- study_id : int Study id to get prep template info for user_id : str User requesting the info Returns ------- dict Data types information in the form {'status': status, 'message': message, 'info': dict of objects status can be success, warning, or error depending on result message has the warnings or errors info contains study information seperated by data type, in the form {col_name: value, ...} with value being a string, int, or list of strings or ints """ access_error = check_access(study_id, user_id) if access_error: return access_error # Can only pass ids over API, so need to instantiate object study = Study(study_id) study_info = study.info # Add needed info that is not part of the initial info pull study_info['publication_doi'] = [] study_info['publication_pid'] = [] for pub, is_doi in study.publications: if is_doi: study_info['publication_doi'].append(pub) else: study_info['publication_pid'].append(pub) study_info['study_id'] = study.id study_info['study_title'] = study.title study_info['shared_with'] = [s.id for s in study.shared_with] study_info['status'] = study.status study_info['ebi_study_accession'] = study.ebi_study_accession study_info['ebi_submission_status'] = study.ebi_submission_status # Clean up StudyPerson objects to string for display pi = study_info['principal_investigator'] study_info['principal_investigator'] = { 'name': pi.name, 'email': pi.email, 'affiliation': pi.affiliation } lab_person = study_info['lab_person'] if lab_person: study_info['lab_person'] = { 'name': lab_person.name, 'email': lab_person.email, 'affiliation': lab_person.affiliation } samples = study.sample_template study_info['num_samples'] = 0 if samples is None else len(list(samples)) study_info['owner'] = study.owner.id # Study.has_access no_public=True, will return True only if the user_id is # the owner of the study or if the study is shared with the user_id study_info['has_access_to_raw_data'] = study.has_access( User(user_id), True) study_info['show_biom_download_button'] = 'BIOM' in [ a.artifact_type for a in study.artifacts() ] study_info['show_raw_download_button'] = any( [True for pt in study.prep_templates() if pt.artifact is not None]) # getting study processing status from redis processing = False study_info['level'] = '' study_info['message'] = '' job_info = r_client.get(STUDY_KEY_FORMAT % study_id) if job_info: job_info = defaultdict(lambda: '', loads(job_info)) job_id = job_info['job_id'] job = ProcessingJob(job_id) job_status = job.status processing = job_status not in ('success', 'error') if processing: study_info['level'] = 'info' study_info['message'] = 'This study is currently being processed' elif job_status == 'error': study_info['level'] = 'danger' study_info['message'] = job.log.msg.replace('\n', '</br>') else: study_info['level'] = job_info['alert_type'] study_info['message'] = job_info['alert_msg'].replace( '\n', '</br>') return { 'status': 'success', 'message': '', 'study_info': study_info, 'editable': study.can_edit(User(user_id)) }
def correct_redis_data(key, cmd, values_dict, user): """Corrects the data stored in the redis DB Parameters ---------- key: str The redis key to fix cmd : qiita_db.software.Command Command to use to create the processing job values_dict : dict Dictionary used to instantiate the parameters of the command user : qiita_db.user. User The user that will own the job """ info = r_client.get(key) if info: info = loads(info) if info['job_id'] is not None: if 'is_qiita_job' in info: if info['is_qiita_job']: try: job = ProcessingJob(info['job_id']) payload = {'job_id': info['job_id'], 'alert_type': info['status'], 'alert_msg': info['alert_msg']} r_client.set(key, dumps(payload)) except (QiitaDBUnknownIDError, KeyError): # We shomehow lost the information of this job # Simply delete the key r_client.delete(key) else: # These jobs don't contain any information on the live # dump. We can safely delete the key r_client.delete(key) else: # These jobs don't contain any information on the live # dump. We can safely delete the key r_client.delete(key) else: # Job is null, we have the information here if info['status'] == 'success': # In the success case no information is stored. We can # safely delete the key r_client.delete(key) elif info['status'] == 'warning': # In case of warning the key message stores the warning # message. We need to create a new job, mark it as # successful and store the error message as expected by # the new structure params = Parameters.load(cmd, values_dict=values_dict) job = ProcessingJob.create(user, params) job._set_status('success') payload = {'job_id': job.id, 'alert_type': 'warning', 'alert_msg': info['message']} r_client.set(key, dumps(payload)) else: # The status is error. The key message stores the error # message. We need to create a new job and mark it as # failed with the given error message params = Parameters.load(cmd, values_dict=values_dict) job = ProcessingJob.create(user, params) job._set_error(info['message']) payload = {'job_id': job.id} r_client.set(key, dumps(payload)) else: # The key doesn't contain any information. Delete the key r_client.delete(key)
def test_prep_template_patch_req(self): metadata = pd.DataFrame.from_dict( {'SKD6.640190': {'center_name': 'ANL', 'target_subfragment': 'V4', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status': 'EMP', 'str_column': 'Value for sample 1', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'GTCCGCAAGTTA', 'run_prefix': "s_G1_L001_sequences", 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB'}}, orient='index', dtype=str) pt = PrepTemplate.create(metadata, Study(1), '16S') # Update investigation type obs = prep_template_patch_req( '*****@*****.**', 'replace', '/%s/investigation_type' % pt.id, 'Cancer Genomics') exp = {'status': 'success', 'message': ''} self.assertEqual(obs, exp) self.assertEqual(pt.investigation_type, 'Cancer Genomics') # Update prep template data obs = prep_template_patch_req( '*****@*****.**', 'replace', '/%s/data' % pt.id, 'update.txt') self.assertEqual(obs, exp) obs = r_client.get('prep_template_%s' % pt.id) self.assertIsNotNone(obs) self._wait_for_parallel_job('prep_template_%s' % pt.id) # Delete a prep template column obs = prep_template_patch_req( '*****@*****.**', 'remove', '/%s/10/columns/target_subfragment/' % pt.id) exp = {'status': 'success', 'message': '', 'row_id': '10'} self.assertEqual(obs, exp) self._wait_for_parallel_job('prep_template_%s' % pt.id) self.assertNotIn('target_subfragment', pt.categories()) # Change the name of the prep template obs = prep_template_patch_req( '*****@*****.**', 'replace', '/%s/name' % pt.id, ' My New Name ') exp = {'status': 'success', 'message': ''} self.assertEqual(obs, exp) self.assertEqual(pt.name, 'My New Name') # Test all the errors # Operation not supported obs = prep_template_patch_req( '*****@*****.**', 'add', '/1/investigation_type', 'Cancer Genomics') exp = {'status': 'error', 'message': 'Operation "add" not supported. ' 'Current supported operations: replace, remove', 'row_id': '0'} self.assertEqual(obs, exp) # Incorrect path parameter obs = prep_template_patch_req( '*****@*****.**', 'replace', '/investigation_type', 'Cancer Genomics') exp = {'status': 'error', 'message': 'Incorrect path parameter'} self.assertEqual(obs, exp) # Incorrect attribute obs = prep_template_patch_req( '*****@*****.**', 'replace', '/1/other_attribute', 'Cancer Genomics') exp = {'status': 'error', 'message': 'Attribute "other_attribute" not found. ' 'Please, check the path parameter'} self.assertEqual(obs, exp) # User doesn't have access obs = prep_template_patch_req( '*****@*****.**', 'replace', '/%s/investigation_type' % pt.id, 'Cancer Genomics') exp = {'status': 'error', 'message': 'User does not have access to study'} self.assertEqual(obs, exp) # File does not exists obs = prep_template_patch_req( '*****@*****.**', 'replace', '/1/data', 'unknown_file.txt') exp = {'status': 'error', 'message': 'file does not exist', 'file': 'unknown_file.txt'} self.assertEqual(obs, exp)
def _wait_for_parallel_job(self, key): # This is needed so the clean up works - this is a distributed system # so we need to make sure that all processes are done before we reset # the test database obs = r_client.get(key) wait_for_processing_job(loads(obs)['job_id'])
def prep_template_ajax_get_req(user_id, prep_id): """Returns the prep tempalte information needed for the AJAX handler Parameters ---------- user_id : str The user id prep_id : int The prep template id Returns ------- dict of {str: object} A dictionary with the following keys: - status: str, whether the request is successful or not - message: str, if the request is unsuccessful, a human readable error - name: str, the name of the prep template - files: list of str, the files available to update the prep template - download_prep: int, the filepath_id of the prep file - download_qiime, int, the filepath_id of the qiime mapping file - num_samples: int, the number of samples present in the template - num_columns: int, the number of columns present in the template - investigation_type: str, the investigation type of the template - ontology: str, dict of {str, list of str} containing the information of the ENA ontology - artifact_attached: bool, whether the template has an artifact attached - study_id: int, the study id of the template """ pt = PrepTemplate(prep_id) name = pt.name # Initialize variables here processing = False alert_type = '' alert_msg = '' job_info = r_client.get(PREP_TEMPLATE_KEY_FORMAT % prep_id) if job_info: job_info = defaultdict(lambda: '', loads(job_info)) job_id = job_info['job_id'] job = ProcessingJob(job_id) job_status = job.status processing = job_status not in ('success', 'error') if processing: alert_type = 'info' alert_msg = 'This prep template is currently being updated' elif job_status == 'error': alert_type = 'danger' alert_msg = job.log.msg.replace('\n', '</br>') else: alert_type = job_info['alert_type'] alert_msg = job_info['alert_msg'].replace('\n', '</br>') artifact_attached = pt.artifact is not None study_id = pt.study_id files = [f for _, f, _ in get_files_from_uploads_folders(study_id) if f.endswith(('.txt', '.tsv'))] # The call to list is needed because keys is an iterator num_samples = len(list(pt.keys())) num_columns = len(pt.categories()) investigation_type = pt.investigation_type download_prep_id = None download_qiime_id = None other_filepaths = [] for fp_id, fp in pt.get_filepaths(): fp = basename(fp) if 'qiime' in fp: if download_qiime_id is None: download_qiime_id = fp_id else: if download_prep_id is None: download_prep_id = fp_id else: other_filepaths.append(fp) ontology = _get_ENA_ontology() editable = Study(study_id).can_edit(User(user_id)) and not processing return {'status': 'success', 'message': '', 'name': name, 'files': files, 'download_prep_id': download_prep_id, 'download_qiime_id': download_qiime_id, 'other_filepaths': other_filepaths, 'num_samples': num_samples, 'num_columns': num_columns, 'investigation_type': investigation_type, 'ontology': ontology, 'artifact_attached': artifact_attached, 'study_id': study_id, 'editable': editable, 'data_type': pt.data_type(), 'alert_type': alert_type, 'is_submitted_to_ebi': pt.is_submitted_to_ebi, 'alert_message': alert_msg}
def prep_template_ajax_get_req(user_id, prep_id): """Returns the prep tempalte information needed for the AJAX handler Parameters ---------- user_id : str The user id prep_id : int The prep template id Returns ------- dict of {str: object} A dictionary with the following keys: - status: str, whether the request is successful or not - message: str, if the request is unsuccessful, a human readable error - name: str, the name of the prep template - files: list of str, the files available to update the prep template - download_prep: int, the filepath_id of the prep file - download_qiime, int, the filepath_id of the qiime mapping file - num_samples: int, the number of samples present in the template - num_columns: int, the number of columns present in the template - investigation_type: str, the investigation type of the template - ontology: str, dict of {str, list of str} containing the information of the ENA ontology - artifact_attached: bool, whether the template has an artifact attached - study_id: int, the study id of the template """ pt = PrepTemplate(prep_id) name = pt.name deprecated = pt.deprecated # Initialize variables here processing = False alert_type = '' alert_msg = '' job_info = r_client.get(PREP_TEMPLATE_KEY_FORMAT % prep_id) if job_info: job_info = defaultdict(lambda: '', loads(job_info)) job_id = job_info['job_id'] job = ProcessingJob(job_id) job_status = job.status processing = job_status not in ('success', 'error') if processing: alert_type = 'info' alert_msg = 'This prep template is currently being updated' elif job_status == 'error': alert_type = 'danger' alert_msg = job.log.msg.replace('\n', '</br>') else: alert_type = job_info['alert_type'] alert_msg = job_info['alert_msg'].replace('\n', '</br>') artifact_attached = pt.artifact is not None study_id = pt.study_id files = [ f for _, f, _ in get_files_from_uploads_folders(study_id) if f.endswith(('.txt', '.tsv', '.xlsx')) ] # The call to list is needed because keys is an iterator num_samples = len(list(pt.keys())) num_columns = len(pt.categories()) investigation_type = pt.investigation_type download_prep_id = None download_qiime_id = None other_filepaths = [] for fp_id, fp in pt.get_filepaths(): fp = basename(fp) if 'qiime' in fp: if download_qiime_id is None: download_qiime_id = fp_id else: if download_prep_id is None: download_prep_id = fp_id else: other_filepaths.append(fp) ontology = _get_ENA_ontology() editable = Study(study_id).can_edit(User(user_id)) and not processing success, restrictions = pt.validate_restrictions() return { 'status': 'success', 'message': '', 'name': name, 'files': files, 'download_prep_id': download_prep_id, 'download_qiime_id': download_qiime_id, 'other_filepaths': other_filepaths, 'num_samples': num_samples, 'num_columns': num_columns, 'investigation_type': investigation_type, 'ontology': ontology, 'artifact_attached': artifact_attached, 'study_id': study_id, 'editable': editable, 'data_type': pt.data_type(), 'alert_type': alert_type, 'is_submitted_to_ebi': pt.is_submitted_to_ebi, 'prep_restrictions': restrictions, 'samples': sorted(list(pt.keys())), 'deprecated': deprecated, 'alert_message': alert_msg }
def test_sample_template_handler_patch_request(self): user = User('*****@*****.**') # Test user doesn't have access with self.assertRaisesRegexp(HTTPError, 'User does not have access to study'): sample_template_handler_patch_request( User('*****@*****.**'), "remove", "/1/columns/season_environment/") # Test study doesn't exist with self.assertRaisesRegexp(HTTPError, 'Study does not exist'): sample_template_handler_patch_request( user, "remove", "/10000/columns/season_environment/") # Test sample template doesn't exist new_study = self._create_study('Patching test') with self.assertRaisesRegexp( HTTPError, "Study %s doesn't have sample information" % new_study.id): sample_template_handler_patch_request( user, "remove", "/%s/columns/season_environment/" % new_study.id) # Test wrong operation value with self.assertRaisesRegexp( HTTPError, 'Operation add not supported. Current supported ' 'operations: remove.'): sample_template_handler_patch_request( user, 'add', '/1/columns/season_environment') # Test wrong path parameter < 2 with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'): sample_template_handler_patch_request(user, 'ignored', '1') # TESTS FOR OPERATION: remove # Test wrong path parameter with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'): sample_template_handler_patch_request(user, 'remove', '/1/season_environment/') # Add sample information to the new study so we can delete one column # without affecting the other tests md = pd.DataFrame.from_dict( {'Sample1': { 'col1': 'val1', 'col2': 'val2' }}, orient='index', dtype=str) st = SampleTemplate.create(md, new_study) # Test success obs = sample_template_handler_patch_request( user, "remove", "/%s/columns/col2/" % new_study.id) self.assertEqual(obs.keys(), ['job']) job_info = r_client.get('sample_template_%s' % new_study.id) self.assertIsNotNone(job_info) # Wait until the job is done wait_for_processing_job(loads(job_info)['job_id']) self.assertNotIn('col2', st.categories()) # TESTS FOR OPERATION: replace # Test incorrect path parameter with replace with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'): sample_template_handler_patch_request(user, "replace", "/1/") # Test attribute not found with self.assertRaisesRegexp(HTTPError, 'Attribute name not found'): sample_template_handler_patch_request(user, "replace", "/1/name") # Test missing value with self.assertRaisesRegexp( HTTPError, 'Value is required when updating sample ' 'information'): sample_template_handler_patch_request(user, "replace", "/1/data") # Test file doesn't exist with self.assertRaisesRegexp(HTTPError, 'Filepath not found'): sample_template_handler_patch_request(user, "replace", "/1/data", req_value='DoesNotExist') # Test success obs = sample_template_handler_patch_request( user, "replace", "/1/data", req_value='uploaded_file.txt') self.assertEqual(obs.keys(), ['job']) job_info = r_client.get('sample_template_1') self.assertIsNotNone(job_info) # Wait until the job is done wait_for_processing_job(loads(job_info)['job_id'])
def test_prep_template_patch_req(self): metadata = pd.DataFrame.from_dict( { 'SKD6.640190': { 'center_name': 'ANL', 'target_subfragment': 'V4', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status': 'EMP', 'str_column': 'Value for sample 1', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'GTCCGCAAGTTA', 'run_prefix': "s_G1_L001_sequences", 'platform': 'Illumina', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB' } }, orient='index', dtype=str) pt = PrepTemplate.create(metadata, Study(1), '16S') # Update investigation type obs = prep_template_patch_req('*****@*****.**', 'replace', '/%s/investigation_type' % pt.id, 'Cancer Genomics') exp = {'status': 'success', 'message': ''} self.assertEqual(obs, exp) self.assertEqual(pt.investigation_type, 'Cancer Genomics') # Update prep template data obs = prep_template_patch_req('*****@*****.**', 'replace', '/%s/data' % pt.id, 'update.txt') self.assertEqual(obs, exp) obs = r_client.get('prep_template_%s' % pt.id) self.assertIsNotNone(obs) self._wait_for_parallel_job('prep_template_%s' % pt.id) # Delete a prep template column obs = prep_template_patch_req( '*****@*****.**', 'remove', '/%s/10/columns/target_subfragment/' % pt.id) exp = {'status': 'success', 'message': '', 'row_id': '10'} self.assertEqual(obs, exp) self._wait_for_parallel_job('prep_template_%s' % pt.id) self.assertNotIn('target_subfragment', pt.categories()) # Change the name of the prep template obs = prep_template_patch_req('*****@*****.**', 'replace', '/%s/name' % pt.id, ' My New Name ') exp = {'status': 'success', 'message': ''} self.assertEqual(obs, exp) self.assertEqual(pt.name, 'My New Name') # Test all the errors # Operation not supported obs = prep_template_patch_req('*****@*****.**', 'add', '/1/investigation_type', 'Cancer Genomics') exp = { 'status': 'error', 'message': 'Operation "add" not supported. ' 'Current supported operations: replace, remove', 'row_id': '0' } self.assertEqual(obs, exp) # Incorrect path parameter obs = prep_template_patch_req('*****@*****.**', 'replace', '/investigation_type', 'Cancer Genomics') exp = {'status': 'error', 'message': 'Incorrect path parameter'} self.assertEqual(obs, exp) # Incorrect attribute obs = prep_template_patch_req('*****@*****.**', 'replace', '/1/other_attribute', 'Cancer Genomics') exp = { 'status': 'error', 'message': 'Attribute "other_attribute" not found. ' 'Please, check the path parameter' } self.assertEqual(obs, exp) # User doesn't have access obs = prep_template_patch_req('*****@*****.**', 'replace', '/%s/investigation_type' % pt.id, 'Cancer Genomics') exp = { 'status': 'error', 'message': 'User does not have access to study' } self.assertEqual(obs, exp) # File does not exists obs = prep_template_patch_req('*****@*****.**', 'replace', '/1/data', 'unknown_file.txt') exp = { 'status': 'error', 'message': 'file does not exist', 'file': 'unknown_file.txt' } self.assertEqual(obs, exp)
def test_sample_template_handler_patch_request(self): user = User('*****@*****.**') # Test user doesn't have access with self.assertRaisesRegexp(HTTPError, 'User does not have access to study'): sample_template_handler_patch_request( User('*****@*****.**'), "remove", "/1/columns/season_environment/") # Test study doesn't exist with self.assertRaisesRegexp(HTTPError, 'Study does not exist'): sample_template_handler_patch_request( user, "remove", "/10000/columns/season_environment/") # Test sample template doesn't exist new_study = self._create_study('Patching test') with self.assertRaisesRegexp(HTTPError, "Study %s doesn't have sample information" % new_study.id): sample_template_handler_patch_request( user, "remove", "/%s/columns/season_environment/" % new_study.id) # Test wrong operation value with self.assertRaisesRegexp( HTTPError, 'Operation add not supported. Current supported ' 'operations: remove.'): sample_template_handler_patch_request( user, 'add', '/1/columns/season_environment') # Test wrong path parameter < 2 with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'): sample_template_handler_patch_request(user, 'ignored', '1') # TESTS FOR OPERATION: remove # Test wrong path parameter with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'): sample_template_handler_patch_request( user, 'remove', '/1/season_environment/') # Add sample information to the new study so we can delete one column # without affecting the other tests md = pd.DataFrame.from_dict( {'Sample1': {'col1': 'val1', 'col2': 'val2'}}, orient='index', dtype=str) st = SampleTemplate.create(md, new_study) # Test success obs = sample_template_handler_patch_request( user, "remove", "/%s/columns/col2/" % new_study.id) self.assertEqual(obs.keys(), ['job']) job_info = r_client.get('sample_template_%s' % new_study.id) self.assertIsNotNone(job_info) # Wait until the job is done wait_for_processing_job(loads(job_info)['job_id']) self.assertNotIn('col2', st.categories()) # TESTS FOR OPERATION: replace # Test incorrect path parameter with replace with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'): sample_template_handler_patch_request(user, "replace", "/1/") # Test attribute not found with self.assertRaisesRegexp(HTTPError, 'Attribute name not found'): sample_template_handler_patch_request(user, "replace", "/1/name") # Test missing value with self.assertRaisesRegexp(HTTPError, 'Value is required when updating sample ' 'information'): sample_template_handler_patch_request(user, "replace", "/1/data") # Test file doesn't exist with self.assertRaisesRegexp(HTTPError, 'Filepath not found'): sample_template_handler_patch_request(user, "replace", "/1/data", req_value='DoesNotExist') # Test success obs = sample_template_handler_patch_request( user, "replace", "/1/data", req_value='uploaded_file.txt') self.assertEqual(obs.keys(), ['job']) job_info = r_client.get('sample_template_1') self.assertIsNotNone(job_info) # Wait until the job is done wait_for_processing_job(loads(job_info)['job_id'])
def study_get_req(study_id, user_id): """Returns information available for the given study Parameters ---------- study_id : int Study id to get prep template info for user_id : str User requesting the info Returns ------- dict Data types information in the form {'status': status, 'message': message, 'info': dict of objects status can be success, warning, or error depending on result message has the warnings or errors info contains study information seperated by data type, in the form {col_name: value, ...} with value being a string, int, or list of strings or ints """ access_error = check_access(study_id, user_id) if access_error: return access_error # Can only pass ids over API, so need to instantiate object study = Study(study_id) study_info = study.info # Add needed info that is not part of the initial info pull study_info['publication_doi'] = [] study_info['publication_pid'] = [] for pub, is_doi in study.publications: if is_doi: study_info['publication_doi'].append(pub) else: study_info['publication_pid'].append(pub) study_info['study_id'] = study.id study_info['study_title'] = study.title study_info['shared_with'] = [s.id for s in study.shared_with] study_info['status'] = study.status study_info['ebi_study_accession'] = study.ebi_study_accession study_info['ebi_submission_status'] = study.ebi_submission_status # Clean up StudyPerson objects to string for display pi = study_info['principal_investigator'] study_info['principal_investigator'] = { 'name': pi.name, 'email': pi.email, 'affiliation': pi.affiliation} lab_person = study_info['lab_person'] if lab_person: study_info['lab_person'] = { 'name': lab_person.name, 'email': lab_person.email, 'affiliation': lab_person.affiliation} samples = study.sample_template study_info['num_samples'] = 0 if samples is None else len(list(samples)) study_info['owner'] = study.owner.id # Study.has_access no_public=True, will return True only if the user_id is # the owner of the study or if the study is shared with the user_id study_info['has_access_to_raw_data'] = study.has_access( User(user_id), True) study_info['show_biom_download_button'] = 'BIOM' in [ a.artifact_type for a in study.artifacts()] study_info['show_raw_download_button'] = any([ True for pt in study.prep_templates() if pt.artifact is not None]) # getting study processing status from redis processing = False study_info['level'] = '' study_info['message'] = '' job_info = r_client.get(STUDY_KEY_FORMAT % study_id) if job_info: job_info = defaultdict(lambda: '', loads(job_info)) job_id = job_info['job_id'] job = ProcessingJob(job_id) job_status = job.status processing = job_status not in ('success', 'error') if processing: study_info['level'] = 'info' study_info['message'] = 'This study is currently being processed' elif job_status == 'error': study_info['level'] = 'danger' study_info['message'] = job.log.msg.replace('\n', '</br>') else: study_info['level'] = job_info['alert_type'] study_info['message'] = job_info['alert_msg'].replace( '\n', '</br>') return {'status': 'success', 'message': '', 'study_info': study_info, 'editable': study.can_edit(User(user_id))}