def get(self, analysis_id): analysis_id = int(analysis_id.split("/")[0]) analysis = Analysis(analysis_id) check_analysis_access(self.current_user, analysis) jobres = defaultdict(list) for job in analysis.jobs: jobject = Job(job) jobres[jobject.datatype].append((jobject.command[0], jobject.results)) dropped_samples = analysis.dropped_samples dropped = defaultdict(list) for proc_data_id, samples in viewitems(dropped_samples): if not samples: continue proc_data = ProcessedData(proc_data_id) data_type = proc_data.data_type() study = proc_data.study dropped[data_type].append((Study(study).title, len(samples), ', '.join(samples))) self.render("analysis_results.html", analysis_id=analysis_id, jobres=jobres, aname=analysis.name, dropped=dropped, basefolder=get_db_files_base_dir())
def get(self, analysis_id): analysis_id = int(analysis_id.split("/")[0]) analysis = Analysis(analysis_id) check_analysis_access(self.current_user, analysis) jobres = defaultdict(list) for job in analysis.jobs: jobject = Job(job) jobres[jobject.datatype].append( (jobject.command[0], jobject.results)) dropped = {} dropped_samples = analysis.dropped_samples if dropped_samples: for proc_data_id, samples in viewitems(dropped_samples): proc_data = ProcessedData(proc_data_id) key = "Data type %s, Study: %s" % (proc_data.data_type(), proc_data.study) dropped[key] = samples self.render("analysis_results.html", jobres=jobres, aname=analysis.name, dropped=dropped, basefolder=get_db_files_base_dir())
def get(self, analysis_id): user = self.current_user analysis_id = int(analysis_id) check_analysis_access(User(user), analysis_id) analysis = Analysis(analysis_id) jobres = defaultdict(list) for job in analysis.jobs: jobject = Job(job) jobres[jobject.datatype].append( (jobject.command[0], jobject.results)) dropped = {} for proc_data_id, samples in viewitems(analysis.dropped_samples): proc_data = ProcessedData(proc_data_id) key = "Data type %s, Study: %s" % (proc_data.data_type(), proc_data.study) dropped[key] = samples self.render("analysis_results.html", user=self.current_user, jobres=jobres, aname=analysis.name, dropped=dropped, basefolder=get_db_files_base_dir()) # wipe out cached messages for this analysis r_server = Redis() key = '%s:messages' % self.current_user oldmessages = r_server.lrange(key, 0, -1) if oldmessages is not None: for message in oldmessages: if '"analysis": %d' % analysis_id in message: r_server.lrem(key, message, 1)
def get(self, analysis_id): user = self.current_user analysis_id = int(analysis_id) check_analysis_access(User(user), analysis_id) analysis = Analysis(analysis_id) jobres = defaultdict(list) for job in analysis.jobs: jobject = Job(job) jobres[jobject.datatype].append((jobject.command[0], jobject.results)) dropped = {} for proc_data_id, samples in viewitems(analysis.dropped_samples): proc_data = ProcessedData(proc_data_id) key = "Data type %s, Study: %s" % (proc_data.data_type(), proc_data.study) dropped[key] = samples self.render("analysis_results.html", user=self.current_user, jobres=jobres, aname=analysis.name, dropped=dropped, basefolder=get_db_files_base_dir()) # wipe out cached messages for this analysis r_server = Redis() key = '%s:messages' % self.current_user oldmessages = r_server.lrange(key, 0, -1) if oldmessages is not None: for message in oldmessages: if '"analysis": %d' % analysis_id in message: r_server.lrem(key, message, 1)
def filter_by_processed_data(self, datatypes=None): """Filters results to what is available in each processed data Parameters ---------- datatypes : list of str, optional Datatypes to selectively return. Default all datatypes available Returns ------- study_proc_ids : dict of dicts of lists Processed data ids with samples for each study, in the format {study_id: {datatype: [proc_id, proc_id, ...], ...}, ...} proc_data_samples : dict of lists Samples available in each processed data id, in the format {proc_data_id: [samp_id1, samp_id2, ...], ...} samples_meta : dict of pandas DataFrames metadata for the found samples, keyed by study. Pandas indexed on sample_id, column headers are the metadata categories searched over """ with TRN: if datatypes is not None: # convert to set for easy lookups datatypes = set(datatypes) study_proc_ids = {} proc_data_samples = {} samples_meta = {} headers = {c: val for c, val in enumerate(self.meta_headers)} for study_id, study_meta in viewitems(self.results): # add metadata to dataframe and dict # use from_dict because pandas doesn't like cursor objects samples_meta[study_id] = pd.DataFrame.from_dict( {s[0]: s[1:] for s in study_meta}, orient='index') samples_meta[study_id].rename(columns=headers, inplace=True) # set up study-based data needed study = Study(study_id) study_sample_ids = {s[0] for s in study_meta} study_proc_ids[study_id] = defaultdict(list) for proc_data_id in study.processed_data(): proc_data = ProcessedData(proc_data_id) datatype = proc_data.data_type() # skip processed data if it doesn't fit the given datatypes if datatypes is not None and datatype not in datatypes: continue filter_samps = proc_data.samples.intersection( study_sample_ids) if filter_samps: proc_data_samples[proc_data_id] = sorted(filter_samps) study_proc_ids[study_id][datatype].append(proc_data_id) return study_proc_ids, proc_data_samples, samples_meta
def get(self): # Format sel_data to get study IDs for the processed data sel_data = defaultdict(dict) proc_data_info = {} sel_samps = Analysis(self.current_user.default_analysis).samples for pid, samps in viewitems(sel_samps): proc_data = ProcessedData(pid) sel_data[proc_data.study][pid] = samps # Also get processed data info proc_data_info[pid] = proc_data.processing_info proc_data_info[pid]['data_type'] = proc_data.data_type() self.render("analysis_selected.html", sel_data=sel_data, proc_info=proc_data_info)
def get(self, analysis_id): user = self.current_user analysis_id = int(analysis_id.split("/")[0]) analysis = Analysis(analysis_id) check_analysis_access(User(user), analysis) jobres = defaultdict(list) for job in analysis.jobs: jobject = Job(job) jobres[jobject.datatype].append((jobject.command[0], jobject.results)) dropped = {} for proc_data_id, samples in viewitems(analysis.dropped_samples): proc_data = ProcessedData(proc_data_id) key = "Data type %s, Study: %s" % (proc_data.data_type(), proc_data.study) dropped[key] = samples self.render("analysis_results.html", user=self.current_user, jobres=jobres, aname=analysis.name, dropped=dropped, basefolder=get_db_files_base_dir())
def _build_study_info(user, study_proc=None, proc_samples=None): """Builds list of dicts for studies table, with all HTML formatted Parameters ---------- user : User object logged in user study_proc : dict of lists, optional Dictionary keyed on study_id that lists all processed data associated with that study. Required if proc_samples given. proc_samples : dict of lists, optional Dictionary keyed on proc_data_id that lists all samples associated with that processed data. Required if study_proc given. Returns ------- infolist: list of dict of lists and dicts study and processed data info for JSON serialiation for datatables Each dict in the list is a single study, and contains the text Notes ----- Both study_proc and proc_samples must be passed, or neither passed. """ build_samples = False # Logic check to make sure both needed parts passed if study_proc is not None and proc_samples is None: raise IncompetentQiitaDeveloperError( 'Must pass proc_samples when study_proc given') elif proc_samples is not None and study_proc is None: raise IncompetentQiitaDeveloperError( 'Must pass study_proc when proc_samples given') elif study_proc is None: build_samples = True # get list of studies for table study_set = user.user_studies.union( Study.get_by_status('public')).union(user.shared_studies) if study_proc is not None: study_set = study_set.intersection(study_proc) if not study_set: # No studies left so no need to continue return [] # get info for the studies cols = ['study_id', 'email', 'principal_investigator_id', 'pmid', 'study_title', 'metadata_complete', 'number_samples_collected', 'study_abstract'] study_info = Study.get_info(study_set, cols) infolist = [] for info in study_info: # Convert DictCursor to proper dict info = dict(info) study = Study(info['study_id']) # Build the processed data info for the study if none passed if build_samples: proc_data_list = study.processed_data() proc_samples = {} study_proc = {study.id: defaultdict(list)} for pid in proc_data_list: proc_data = ProcessedData(pid) study_proc[study.id][proc_data.data_type()].append(pid) proc_samples[pid] = proc_data.samples study_info = _build_single_study_info(study, info, study_proc, proc_samples) infolist.append(study_info) return infolist
def test_data_type_id(self): pd = ProcessedData(1) self.assertEqual(pd.data_type(ret_id=True), 2)
def test_data_type(self): pd = ProcessedData(1) self.assertEqual(pd.data_type(), "18S")
def test_data_type_id(self): """Correctly returns the data_type of preprocessed_data""" pd = ProcessedData(1) self.assertEqual(pd.data_type(ret_id=True), 2)
def test_data_type(self): """Correctly returns the data_type of preprocessed_data""" pd = ProcessedData(1) self.assertEqual(pd.data_type(), "18S")
def test_data_type_id(self): pd = ProcessedData(1) self.assertEqual(pd.data_type(ret_id=True), 2)
def test_data_type(self): pd = ProcessedData(1) self.assertEqual(pd.data_type(), "18S")
def test_data_type_id(self): """Correctly returns the data_type of preprocessed_data""" pd = ProcessedData(1) self.assertEqual(pd.data_type(ret_id=True), 2)
def test_data_type(self): """Correctly returns the data_type of preprocessed_data""" pd = ProcessedData(1) self.assertEqual(pd.data_type(), "18S")