def get(self, prep_template_id): pid = int(prep_template_id) pt = PrepTemplate(pid) sid = pt.study_id self._check_permissions(sid) st = SampleTemplate(sid) text = st.to_dataframe(samples=list(pt)).to_csv(None, sep='\t') self._finish_generate_files( 'sample_information_from_prep_%s.tsv' % pid, text)
def sample_template_summary_get_req(study_id, user): """Returns a summary of the sample template metadata columns Parameters ---------- study_id: int The study to retrieve the sample information summary user: qiita_db.user The user performing the request Returns ------- dict of {str: object} Keys are metadata categories and the values are list of tuples. Each tuple is an observed value in the category and the number of times it's seen. Raises ------ HTTPError 404 If the sample template doesn't exist """ # Check if the current user has access to the study and if the sample # template exists sample_template_checks(study_id, user, check_exists=True) st = SampleTemplate(study_id) df = st.to_dataframe() # Drop the study_id column if it exists if 'study_id' in df.columns: df.drop('study_id', axis=1, inplace=True) res = {} for column in df.columns: counts = df[column].value_counts() res[str(column)] = [(str(key), counts[key]) for key in natsorted( counts.index, key=lambda x: unicode(x, errors='ignore'))] return res
def sample_template_summary_get_req(study_id, user): """Returns a summary of the sample template metadata columns Parameters ---------- study_id: int The study to retrieve the sample information summary user: qiita_db.user The user performing the request Returns ------- dict of {str: object} Keys are metadata categories and the values are list of tuples. Each tuple is an observed value in the category and the number of times it's seen. Raises ------ HTTPError 404 If the sample template doesn't exist """ # Check if the current user has access to the study and if the sample # template exists sample_template_checks(study_id, user, check_exists=True) st = SampleTemplate(study_id) df = st.to_dataframe() # Drop the study_id column if it exists if 'study_id' in df.columns: df.drop('study_id', axis=1, inplace=True) res = {} for column in df.columns: counts = df[column].value_counts() res[str(column)] = [(str(key), counts[key]) for key in natsorted( counts.index, key=lambda x: unicode(x, errors='ignore'))] return res
def sample_template_get_req(samp_id, user_id): """Gets the json of the full sample template Parameters ---------- samp_id : int or int castable string SampleTemplate id to get info for user_id : str User requesting the sample template info Returns ------- dict of objects {'status': status, 'message': msg, 'template': dict of {str: {str: object, ...}, ...} template is dictionary where the keys access_error the metadata samples and the values are a dictionary of column and value. Format {sample: {column: value, ...}, ...} """ exists = _check_sample_template_exists(int(samp_id)) if exists['status'] != 'success': return exists access_error = check_access(int(samp_id), user_id) if access_error: return access_error template = SampleTemplate(int(samp_id)) access_error = check_access(template.study_id, user_id) if access_error: return access_error df = template.to_dataframe() return { 'status': 'success', 'message': '', 'template': df.to_dict(orient='index') }
def sample_template_get_req(samp_id, user_id): """Gets the json of the full sample template Parameters ---------- samp_id : int or int castable string SampleTemplate id to get info for user_id : str User requesting the sample template info Returns ------- dict of objects {'status': status, 'message': msg, 'template': dict of {str: {str: object, ...}, ...} template is dictionary where the keys access_error the metadata samples and the values are a dictionary of column and value. Format {sample: {column: value, ...}, ...} """ exists = _check_sample_template_exists(int(samp_id)) if exists['status'] != 'success': return exists access_error = check_access(int(samp_id), user_id) if access_error: return access_error template = SampleTemplate(int(samp_id)) access_error = check_access(template.study_id, user_id) if access_error: return access_error df = template.to_dataframe() return {'status': 'success', 'message': '', 'template': df.to_dict(orient='index')}
def sample_template_summary_get_req(samp_id, user_id): """Returns a summary of the sample template metadata columns Parameters ---------- samp_id : int SampleTemplate id to get info for user_id : str User requesting the sample template info Returns ------- dict Returns summary information in the form {'status': str, 'message': str, 'info': dict of {str: object} status can be success, warning, or error depending on result message has the warnings or errors info dictionary contains the keys as the metadata categories and the values are list of tuples. Each tuple is an observed value in the category and the number of times its seen. Format {num_samples: value, category: [(val1, count1), (val2, count2), ...], ...} """ access_error = check_access(samp_id, user_id) if access_error: return access_error processing, alert_type, alert_msg = get_sample_template_processing_status( samp_id) exists = _check_sample_template_exists(int(samp_id)) if exists['status'] != 'success': return {'status': 'success', 'message': '', 'num_samples': 0, 'num_columns': 0, 'editable': not processing, 'alert_type': alert_type, 'alert_message': alert_msg, 'stats': {}} template = SampleTemplate(int(samp_id)) df = template.to_dataframe() editable = (Study(template.study_id).can_edit(User(user_id)) and not processing) out = {'status': 'success', 'message': '', 'num_samples': df.shape[0], 'num_columns': df.shape[1], 'editable': editable, 'alert_type': alert_type, 'alert_message': alert_msg, 'stats': {}} # drop the samp_id column if it exists if 'study_id' in df.columns: df.drop('study_id', axis=1, inplace=True) for column in df.columns: counts = df[column].value_counts() out['stats'][str(column)] = [(str(key), counts[key]) for key in natsorted(counts.index)] return out
# 'prep_template_sample' sql = """SELECT table_name, array_agg(column_name::text) FROM information_schema.columns WHERE column_name IN %s AND table_name LIKE 'sample_%%' AND table_name NOT IN ( 'prep_template', 'prep_template_sample') GROUP BY table_name""" # note that we are looking for those columns with duplicated names in # the headers TRN.add(sql, [tuple(set(cols_sample))]) for table, columns in dict(TRN.execute_fetchindex()).items(): # [1] the format is table_# so taking the # st = SampleTemplate(int(table.split('_')[1])) # getting just the columns of interest st_df = st.to_dataframe()[columns] # converting to datetime for col in columns: st_df[col] = st_df[col].apply(transform_date) st.update(st_df) if cols_prep: with TRN: # a few notes: just getting the preps with duplicated values; ignoring # column 'sample_id' and tables 'study_sample', 'prep_template', # 'prep_template_sample' sql = """SELECT table_name, array_agg(column_name::text) FROM information_schema.columns WHERE column_name IN %s AND table_name LIKE 'prep_%%' AND table_name NOT IN (
def sample_template_summary_get_req(samp_id, user_id): """Returns a summary of the sample template metadata columns Parameters ---------- samp_id : int SampleTemplate id to get info for user_id : str User requesting the sample template info Returns ------- dict Returns summary information in the form {'status': str, 'message': str, 'info': dict of {str: object} status can be success, warning, or error depending on result message has the warnings or errors info dictionary contains the keys as the metadata categories and the values are list of tuples. Each tuple is an observed value in the category and the number of times its seen. Format {num_samples: value, category: [(val1, count1), (val2, count2), ...], ...} """ access_error = check_access(samp_id, user_id) if access_error: return access_error processing, alert_type, alert_msg = get_sample_template_processing_status( samp_id) exists = _check_sample_template_exists(int(samp_id)) if exists['status'] != 'success': return { 'status': 'success', 'message': '', 'num_samples': 0, 'num_columns': 0, 'editable': not processing, 'alert_type': alert_type, 'alert_message': alert_msg, 'stats': {} } template = SampleTemplate(int(samp_id)) df = template.to_dataframe() editable = (Study(template.study_id).can_edit(User(user_id)) and not processing) out = { 'status': 'success', 'message': '', 'num_samples': df.shape[0], 'num_columns': df.shape[1], 'editable': editable, 'alert_type': alert_type, 'alert_message': alert_msg, 'stats': {} } # drop the samp_id column if it exists if 'study_id' in df.columns: df.drop('study_id', axis=1, inplace=True) for column in df.columns: counts = df[column].value_counts() out['stats'][str(column)] = [ (str(key), counts[key]) for key in natsorted(counts.index, key=lambda x: unicode(x, errors='ignore')) ] return out
def sample_template_summary_get_req(samp_id, user_id): """Returns a summary of the sample template metadata columns Parameters ---------- samp_id : int SampleTemplate id to get info for user_id : str User requesting the sample template info Returns ------- dict Returns summary information in the form {'status': str, 'message': str, 'info': dict of {str: object} status can be success, warning, or error depending on result message has the warnings or errors info dictionary contains the keys as the metadata categories and the values are list of tuples. Each tuple is an observed value in the category and the number of times its seen. Format {num_samples: value, category: [(val1, count1), (val2, count2), ...], ...} """ access_error = check_access(samp_id, user_id) if access_error: return access_error job_info = r_client.get(SAMPLE_TEMPLATE_KEY_FORMAT % samp_id) if job_info: job_info = loads(job_info) job_id = job_info['job_id'] if job_id: redis_info = loads(r_client.get(job_id)) processing = redis_info['status_msg'] == 'Running' if processing: alert_type = 'info' alert_msg = 'This sample template is currently being processed' elif redis_info['status_msg'] == 'Success': alert_type = redis_info['return']['status'] alert_msg = redis_info['return']['message'].replace('\n', '</br>') payload = {'job_id': None, 'status': alert_type, 'message': alert_msg} r_client.set(SAMPLE_TEMPLATE_KEY_FORMAT % samp_id, dumps(payload)) else: alert_type = redis_info['return']['status'] alert_msg = redis_info['return']['message'].replace('\n', '</br>') else: processing = False alert_type = job_info['status'] alert_msg = job_info['message'].replace('\n', '</br>') else: processing = False alert_type = '' alert_msg = '' exists = _check_sample_template_exists(int(samp_id)) if exists['status'] != 'success': return {'status': 'success', 'message': '', 'num_samples': 0, 'num_columns': 0, 'editable': not processing, 'alert_type': alert_type, 'alert_message': alert_msg, 'stats': {}} template = SampleTemplate(int(samp_id)) df = template.to_dataframe() editable = (Study(template.study_id).can_edit(User(user_id)) and not processing) out = {'status': 'success', 'message': '', 'num_samples': df.shape[0], 'num_columns': df.shape[1], 'editable': editable, 'alert_type': alert_type, 'alert_message': alert_msg, 'stats': {}} # drop the samp_id column if it exists if 'study_id' in df.columns: df.drop('study_id', axis=1, inplace=True) for column in df.columns: counts = df[column].value_counts() out['stats'][str(column)] = [(str(key), counts[key]) for key in natsorted(counts.index)] return out
# 'prep_template_sample' sql = """SELECT table_name, array_agg(column_name::text) FROM information_schema.columns WHERE column_name IN %s AND table_name LIKE 'sample_%%' AND table_name NOT IN ( 'prep_template', 'prep_template_sample') GROUP BY table_name""" # note that we are looking for those columns with duplicated names in # the headers TRN.add(sql, [tuple(set(cols_sample))]) for table, columns in viewitems(dict(TRN.execute_fetchindex())): # [1] the format is table_# so taking the # st = SampleTemplate(int(table.split('_')[1])) # getting just the columns of interest st_df = st.to_dataframe()[columns] # converting to datetime for col in columns: st_df[col] = st_df[col].apply(transform_date) st.update(st_df) if cols_prep: with TRN: # a few notes: just getting the preps with duplicated values; ignoring # column 'sample_id' and tables 'study_sample', 'prep_template', # 'prep_template_sample' sql = """SELECT table_name, array_agg(column_name::text) FROM information_schema.columns WHERE column_name IN %s AND table_name LIKE 'prep_%%' AND table_name NOT IN (
def sample_template_summary_get_req(samp_id, user_id): """Returns a summary of the sample template metadata columns Parameters ---------- samp_id : int SampleTemplate id to get info for user_id : str User requesting the sample template info Returns ------- dict Returns summary information in the form {'status': str, 'message': str, 'info': dict of {str: object} status can be success, warning, or error depending on result message has the warnings or errors info dictionary contains the keys as the metadata categories and the values are list of tuples. Each tuple is an observed value in the category and the number of times its seen. Format {num_samples: value, category: [(val1, count1), (val2, count2), ...], ...} """ access_error = check_access(samp_id, user_id) if access_error: return access_error job_info = r_client.get(SAMPLE_TEMPLATE_KEY_FORMAT % samp_id) if job_info: job_info = loads(job_info) job_id = job_info['job_id'] if job_id: redis_info = loads(r_client.get(job_id)) processing = redis_info['status_msg'] == 'Running' if processing: alert_type = 'info' alert_msg = 'This sample template is currently being processed' elif redis_info['status_msg'] == 'Success': alert_type = redis_info['return']['status'] alert_msg = redis_info['return']['message'].replace( '\n', '</br>') payload = { 'job_id': None, 'status': alert_type, 'message': alert_msg } r_client.set(SAMPLE_TEMPLATE_KEY_FORMAT % samp_id, dumps(payload)) else: alert_type = redis_info['return']['status'] alert_msg = redis_info['return']['message'].replace( '\n', '</br>') else: processing = False alert_type = job_info['status'] alert_msg = job_info['message'].replace('\n', '</br>') else: processing = False alert_type = '' alert_msg = '' exists = _check_sample_template_exists(int(samp_id)) if exists['status'] != 'success': return { 'status': 'success', 'message': '', 'num_samples': 0, 'num_columns': 0, 'editable': not processing, 'alert_type': alert_type, 'alert_message': alert_msg, 'stats': {} } template = SampleTemplate(int(samp_id)) df = template.to_dataframe() editable = (Study(template.study_id).can_edit(User(user_id)) and not processing) out = { 'status': 'success', 'message': '', 'num_samples': df.shape[0], 'num_columns': df.shape[1], 'editable': editable, 'alert_type': alert_type, 'alert_message': alert_msg, 'stats': {} } # drop the samp_id column if it exists if 'study_id' in df.columns: df.drop('study_id', axis=1, inplace=True) for column in df.columns: counts = df[column].value_counts() out['stats'][str(column)] = [(str(key), counts[key]) for key in natsorted(counts.index)] return out