def get(self, analysis_id): user = self.current_user analysis_id = int(analysis_id) check_analysis_access(User(user), analysis_id) analysis = Analysis(analysis_id) jobres = defaultdict(list) for job in analysis.jobs: jobject = Job(job) jobres[jobject.datatype].append( (jobject.command[0], jobject.results)) dropped = {} for proc_data_id, samples in viewitems(analysis.dropped_samples): proc_data = ProcessedData(proc_data_id) key = "Data type %s, Study: %s" % (proc_data.data_type(), proc_data.study) dropped[key] = samples self.render("analysis_results.html", user=self.current_user, jobres=jobres, aname=analysis.name, dropped=dropped, basefolder=get_db_files_base_dir()) # wipe out cached messages for this analysis r_server = Redis() key = '%s:messages' % self.current_user oldmessages = r_server.lrange(key, 0, -1) if oldmessages is not None: for message in oldmessages: if '"analysis": %d' % analysis_id in message: r_server.lrem(key, message, 1)
def get(self, analysis_id): user = self.current_user analysis_id = int(analysis_id) check_analysis_access(User(user), analysis_id) analysis = Analysis(analysis_id) jobres = defaultdict(list) for job in analysis.jobs: jobject = Job(job) jobres[jobject.datatype].append((jobject.command[0], jobject.results)) dropped = {} for proc_data_id, samples in viewitems(analysis.dropped_samples): proc_data = ProcessedData(proc_data_id) key = "Data type %s, Study: %s" % (proc_data.data_type(), proc_data.study) dropped[key] = samples self.render("analysis_results.html", user=self.current_user, jobres=jobres, aname=analysis.name, dropped=dropped, basefolder=get_db_files_base_dir()) # wipe out cached messages for this analysis r_server = Redis() key = '%s:messages' % self.current_user oldmessages = r_server.lrange(key, 0, -1) if oldmessages is not None: for message in oldmessages: if '"analysis": %d' % analysis_id in message: r_server.lrem(key, message, 1)
def test_status_setter_error(self): pd = ProcessedData(1) pd.status = 'public' self.assertEqual(pd.status, 'public') with self.assertRaises(QiitaDBStatusError): pd.status = 'sandbox'
def post(self, analysis_id): command_args = self.get_arguments("commands") split = [x.split("#") for x in command_args] analysis = Analysis(analysis_id) commands = [] # HARD CODED HACKY THING FOR DEMO, FIX Issue #164 fp, mapping_file = mkstemp(suffix="_map_file.txt") close(fp) SampleTemplate(1).to_file(mapping_file) study_fps = {} for pd in Study(1).processed_data: processed = ProcessedData(pd) study_fps[processed.data_type] = processed.get_filepaths()[0][0] for data_type, command in split: opts = { "--otu_table_fp": study_fps[data_type], "--mapping_fp": mapping_file } if command == "Beta Diversity" and data_type in {'16S', '18S'}: opts["--tree_fp"] = join(get_db_files_base_dir(), "reference", "gg_97_otus_4feb2011.tre") elif command == "Beta Diversity": opts["--parameter_fp"] = join(get_db_files_base_dir(), "reference", "params_qiime.txt") Job.create(data_type, command, opts, analysis) commands.append("%s: %s" % (data_type, command)) user = self.get_current_user() self.render("analysis_waiting.html", user=user, aid=analysis_id, aname=analysis.name, commands=commands) # fire off analysis run here # currently synch run so redirect done here. Will remove after demo run_analysis(user, analysis)
def get(self, analysis_id): analysis_id = int(analysis_id.split("/")[0]) analysis = Analysis(analysis_id) check_analysis_access(self.current_user, analysis) jobres = defaultdict(list) for job in analysis.jobs: jobject = Job(job) jobres[jobject.datatype].append((jobject.command[0], jobject.results)) dropped_samples = analysis.dropped_samples dropped = defaultdict(list) for proc_data_id, samples in viewitems(dropped_samples): if not samples: continue proc_data = ProcessedData(proc_data_id) data_type = proc_data.data_type() study = proc_data.study dropped[data_type].append((Study(study).title, len(samples), ', '.join(samples))) self.render("analysis_results.html", analysis_id=analysis_id, jobres=jobres, aname=analysis.name, dropped=dropped, basefolder=get_db_files_base_dir())
def get(self, analysis_id): analysis_id = int(analysis_id.split("/")[0]) analysis = Analysis(analysis_id) check_analysis_access(self.current_user, analysis) jobres = defaultdict(list) for job in analysis.jobs: jobject = Job(job) jobres[jobject.datatype].append( (jobject.command[0], jobject.results)) dropped = {} dropped_samples = analysis.dropped_samples if dropped_samples: for proc_data_id, samples in viewitems(dropped_samples): proc_data = ProcessedData(proc_data_id) key = "Data type %s, Study: %s" % (proc_data.data_type(), proc_data.study) dropped[key] = samples self.render("analysis_results.html", jobres=jobres, aname=analysis.name, dropped=dropped, basefolder=get_db_files_base_dir())
def test_create_preprocessed_and_study_error(self): with self.assertRaises(IncompetentQiitaDeveloperError): ProcessedData.create(self.params_table, self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data, study=Study(1))
def delete_processed_data(self, study, user, callback): """Delete the selected processed data Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done """ pd_id = int(self.get_argument('processed_data_id')) try: ProcessedData.delete(pd_id) msg = ("Processed data %d has been deleted" % pd_id) msg_level = "success" pd_id = None except Exception as e: msg = ("Couldn't remove processed data %d: %s" % (pd_id, str(e))) msg_level = "danger" callback((msg, msg_level, 'processed_data_tab', pd_id, None))
def test_get_filepath(self): """Correctly returns the filepaths to the processed files""" # check the test data pd = ProcessedData(1) obs = pd.get_filepaths() exp = [(join(self.db_test_pd_dir, '1_study_1001_closed_reference_otu_table.biom'), "biom")] self.assertEqual(obs, exp)
def test_get_filepath(self): """Correctly returns the filepaths to the processed files""" # check the test data pd = ProcessedData(1) obs = pd.get_filepaths() exp = [(11, join(self.db_test_pd_dir, '1_study_1001_closed_reference_otu_table.biom'), "biom")] self.assertEqual(obs, exp)
def test_create_preprocessed_and_data_type_error(self): with self.assertRaises(IncompetentQiitaDeveloperError): ProcessedData.create( self.params_table, self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data, data_type="Metabolomics", )
def test_get_by_status_grouped_by_study(self): obs = ProcessedData.get_by_status_grouped_by_study('sandbox') self.assertEqual(obs, dict()) obs = ProcessedData.get_by_status_grouped_by_study('private') self.assertEqual(obs, {1: [1]}) ProcessedData.create(self.params_table, self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data) obs = ProcessedData.get_by_status_grouped_by_study('sandbox') self.assertEqual(obs, {1: [2]})
def filter_by_processed_data(self, datatypes=None): """Filters results to what is available in each processed data Parameters ---------- datatypes : list of str, optional Datatypes to selectively return. Default all datatypes available Returns ------- study_proc_ids : dict of dicts of lists Processed data ids with samples for each study, in the format {study_id: {datatype: [proc_id, proc_id, ...], ...}, ...} proc_data_samples : dict of lists Samples available in each processed data id, in the format {proc_data_id: [samp_id1, samp_id2, ...], ...} samples_meta : dict of pandas DataFrames metadata for the found samples, keyed by study. Pandas indexed on sample_id, column headers are the metadata categories searched over """ with TRN: if datatypes is not None: # convert to set for easy lookups datatypes = set(datatypes) study_proc_ids = {} proc_data_samples = {} samples_meta = {} headers = {c: val for c, val in enumerate(self.meta_headers)} for study_id, study_meta in viewitems(self.results): # add metadata to dataframe and dict # use from_dict because pandas doesn't like cursor objects samples_meta[study_id] = pd.DataFrame.from_dict( {s[0]: s[1:] for s in study_meta}, orient='index') samples_meta[study_id].rename(columns=headers, inplace=True) # set up study-based data needed study = Study(study_id) study_sample_ids = {s[0] for s in study_meta} study_proc_ids[study_id] = defaultdict(list) for proc_data_id in study.processed_data(): proc_data = ProcessedData(proc_data_id) datatype = proc_data.data_type() # skip processed data if it doesn't fit the given datatypes if datatypes is not None and datatype not in datatypes: continue filter_samps = proc_data.samples.intersection( study_sample_ids) if filter_samps: proc_data_samples[proc_data_id] = sorted(filter_samps) study_proc_ids[study_id][datatype].append(proc_data_id) return study_proc_ids, proc_data_samples, samples_meta
def get(self): # Format sel_data to get study IDs for the processed data sel_data = defaultdict(dict) proc_data_info = {} sel_samps = Analysis(self.current_user.default_analysis).samples for pid, samps in viewitems(sel_samps): proc_data = ProcessedData(pid) sel_data[proc_data.study][pid] = samps # Also get processed data info proc_data_info[pid] = proc_data.processing_info proc_data_info[pid]['data_type'] = proc_data.data_type() self.render("analysis_selected.html", sel_data=sel_data, proc_info=proc_data_info)
def _insert_processed_data_target_gene(preprocessed_data, params, pick_otus_out, **kwargs): """Inserts the preprocessed data to the database Parameters ---------- preprocessed_data : PreprocessedData The preprocessed_data to process params : ProcessedSortmernaParams The parameters to use for the processing pick_otus_out : str Path to the pick_closed_reference_otus.py output directory kwargs: ignored Necessary to include to support execution via moi. Raises ------ ValueError If the processed output directory does not contain all the expected files """ from os.path import exists, join, isdir from glob import glob from functools import partial from qiita_db.data import ProcessedData # The filepaths that we are interested in are: # 1) otu_table.biom -> the output OTU table # 2) sortmerna_picked_otus -> intermediate output of pick_otus.py # 3) log_20141217091339.log -> log file path_builder = partial(join, pick_otus_out) biom_fp = path_builder('otu_table.biom') otus_dp = path_builder('sortmerna_picked_otus') log_fp = glob(path_builder('log_*.txt'))[0] # Check that all the files exist if not (exists(biom_fp) and isdir(otus_dp) and exists(log_fp)): raise ValueError("The output directory %s does not contain all the " "expected files." % pick_otus_out) filepaths = [(biom_fp, "biom"), (otus_dp, "directory"), (log_fp, "log")] ProcessedData.create(params._table, params.id, filepaths, preprocessed_data=preprocessed_data) # Change the preprocessed_data status to processed preprocessed_data.processing_status = 'processed'
def test_status(self): rd = RawData(1) s = Study(1) self.assertEqual(rd.status(s), 'private') # Since the status is inferred from the processed data, change the # status of the processed data so we can check how it changes in the # preprocessed data pd = ProcessedData(1) pd.status = 'public' self.assertEqual(rd.status(s), 'public') # Check that new raw data has sandbox as status since no # processed data exists for them rd = RawData.create(self.filetype, self.studies, self.filepaths) self.assertEqual(rd.status(s), 'sandbox')
def render(self, study): avail_pd = [(pd_id, ProcessedData(pd_id)) for pd_id in study.processed_data()] return self.render_string( "study_description_templates/processed_data_tab.html", available_processed_data=avail_pd, study_id=study.id)
def test_link_filepaths_status_setter(self): pd = ProcessedData(1) self.assertEqual(pd.link_filepaths_status, 'idle') pd._set_link_filepaths_status('linking') self.assertEqual(pd.link_filepaths_status, 'linking') pd._set_link_filepaths_status('unlinking') self.assertEqual(pd.link_filepaths_status, 'unlinking') pd._set_link_filepaths_status('failed: error') self.assertEqual(pd.link_filepaths_status, 'failed: error')
def test_status(self): ppd = PreprocessedData(1) self.assertEqual(ppd.status, 'private') # Since the status is inferred from the processed data, change the # status of the processed data so we can check how it changes in the # preprocessed data pd = ProcessedData(1) pd.status = 'public' self.assertEqual(ppd.status, 'public') # Check that new preprocessed data has sandbox as status since no # processed data exists for them ppd = PreprocessedData.create(self.study, self.params_table, self.params_id, self.filepaths, data_type="16S") self.assertEqual(ppd.status, 'sandbox')
def test_status(self): pd = ProcessedData(1) self.assertEqual(pd.status, 'private') pd = ProcessedData.create(self.params_table, self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data) self.assertEqual(pd.status, 'sandbox')
def _selected_parser(self, analysis): """builds dictionaries of selected samples from analysis object""" selsamples = {} selproc_data = defaultdict(list) for proc_data_id, samps in viewitems(analysis.samples): study = ProcessedData(proc_data_id).study selproc_data[study].append(proc_data_id) selsamples[study] = set(samps) return selproc_data, selsamples
def test_create_no_date(self): """Correctly adds a processed data with no date on it""" # All the other settings have been already tested on test_create # here we will only check that the code added a good date before = datetime.now() ProcessedData.create(self.params_table, self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data) after = datetime.now() obs = self.conn_handler.execute_fetchone( "SELECT processed_date FROM qiita.processed_data WHERE " "processed_data_id=2")[0] # Make sure that we clean up the environment exp_biom_fp = join(self.db_test_pd_dir, "2_%s" % basename(self.biom_fp)) self._clean_up_files.append(exp_biom_fp) self.assertTrue(before <= obs <= after)
def test_create(self): """Correctly creates all the rows in the DB for the processed data""" # Check that the returned object has the correct id obs = ProcessedData.create(self.params_table, self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data, processed_date=self.date) self.assertEqual(obs.id, 2) # Check that the processed data have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.processed_data WHERE processed_data_id=2") # processed_data_id, processed_params_table, processed_params_id, # processed_date, data_type_id, link_filepaths_status, # processed_data_status_id exp = [[2, "processed_params_uclust", 1, self.date, 2, 'idle', 4]] self.assertEqual(obs, exp) # Check that the files have been copied to right location exp_biom_fp = join(self.db_test_pd_dir, "2_%s" % basename(self.biom_fp)) self.assertTrue(exists(exp_biom_fp)) self._clean_up_files.append(exp_biom_fp) # Check that the filepaths have been correctly added to the DB obs_id = self.conn_handler.execute_fetchone( "SELECT count(1) from qiita.filepath")[0] obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id=%d" % obs_id) exp_biom_fp = "2_%s" % basename(self.biom_fp) # Filepath_id, path, filepath_type_id exp = [[obs_id, exp_biom_fp, 6, '852952723', 1, 4]] self.assertEqual(obs, exp) # Check that the processed data have been correctly linked # with the fileapths obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.processed_filepath WHERE processed_data_id=2") # processed_data_id, filepath_id self.assertEqual(obs, [[2, obs_id]]) # Check that the processed data have been correctly linked with the # study obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_processed_data WHERE " "processed_data_id=2") # study_id, processed_data self.assertEqual(obs, [[1, 2]]) # Check that the processed data have been correctly linked with the # preprocessed data obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.preprocessed_processed_data WHERE " "processed_data_id=2") # preprocessed_data_id, processed_Data_id self.assertEqual(obs, [[1, 2]])
def make_public(self, study, user, callback): """Makes the current study public Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done """ pd_id = int(self.get_argument('pd_id')) pd = ProcessedData(pd_id) pd.status = 'public' msg = "Processed data set to public" msg_level = "success" callback((msg, msg_level, "processed_data_tab", pd_id, None))
def make_sandbox(self, study, user, callback): """Reverts the current study to the 'sandbox' status Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done """ pd_id = int(self.get_argument('pd_id')) pd = ProcessedData(pd_id) pd.status = 'sandbox' msg = "Processed data reverted to sandbox" msg_level = "success" callback((msg, msg_level, "processed_data_tab", pd_id, None))
def request_approval(self, study, user, callback): """Changes the status of the current study to "awaiting_approval" Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done """ pd_id = int(self.get_argument('pd_id')) pd = ProcessedData(pd_id) pd.status = 'awaiting_approval' msg = "Processed data sent to admin for approval" msg_level = "success" callback((msg, msg_level, "processed_data_tab", pd_id, None))
def test_create(self): """Correctly creates all the rows in the DB for the processed data""" # Check that the returned object has the correct id obs = ProcessedData.create(self.params_table, self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data, processed_date=self.date) self.assertEqual(obs.id, 2) # Check that the processed data have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.processed_data WHERE processed_data_id=2") # processed_data_id, processed_params_table, processed_params_id, # processed_date, data_type_id, link_filepaths_status exp = [[2, "processed_params_uclust", 1, self.date, 2, 'idle']] self.assertEqual(obs, exp) # Check that the files have been copied to right location exp_biom_fp = join(self.db_test_pd_dir, "2_%s" % basename(self.biom_fp)) self.assertTrue(exists(exp_biom_fp)) self._clean_up_files.append(exp_biom_fp) # Check that the filepaths have been correctly added to the DB obs_id = self.conn_handler.execute_fetchone( "SELECT count(1) from qiita.filepath")[0] obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id=%d" % obs_id) exp_biom_fp = "2_%s" % basename(self.biom_fp) # Filepath_id, path, filepath_type_id exp = [[obs_id, exp_biom_fp, 6, '852952723', 1, 4]] self.assertEqual(obs, exp) # Check that the processed data have been correctly linked # with the fileapths obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.processed_filepath WHERE processed_data_id=2") # processed_data_id, filepath_id self.assertEqual(obs, [[2, obs_id]]) # Check that the processed data have been correctly linked with the # study obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_processed_data WHERE " "processed_data_id=2") # study_id, processed_data self.assertEqual(obs, [[1, 2]]) # Check that the processed data have been correctly linked with the # preprocessed data obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.preprocessed_processed_data WHERE " "processed_data_id=2") # preprocessed_data_id, processed_Data_id self.assertEqual(obs, [[1, 2]])
def get(self, analysis_id): user = self.current_user analysis_id = int(analysis_id.split("/")[0]) analysis = Analysis(analysis_id) check_analysis_access(User(user), analysis) jobres = defaultdict(list) for job in analysis.jobs: jobject = Job(job) jobres[jobject.datatype].append((jobject.command[0], jobject.results)) dropped = {} for proc_data_id, samples in viewitems(analysis.dropped_samples): proc_data = ProcessedData(proc_data_id) key = "Data type %s, Study: %s" % (proc_data.data_type(), proc_data.study) dropped[key] = samples self.render("analysis_results.html", user=self.current_user, jobres=jobres, aname=analysis.name, dropped=dropped, basefolder=get_db_files_base_dir())
def get(self): user = self.current_user if user.level != 'admin': raise HTTPError(403, 'User %s is not admin' % self.current_user) result_generator = viewitems( ProcessedData.get_by_status_grouped_by_study('awaiting_approval')) study_generator = ((Study(sid), pds) for sid, pds in result_generator) parsed_studies = [(s.id, s.title, s.owner, pds) for s, pds in study_generator] self.render('admin_approval.html', study_info=parsed_studies)
def approve_study(self, study, user, callback): """Approves the current study if and only if the current user is admin Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done """ if _approve(user.level): pd_id = int(self.get_argument("pd_id")) pd = ProcessedData(pd_id) pd.status = "private" msg = "Processed data approved" msg_level = "success" else: msg = "The current user does not have permission to approve " "the processed data" msg_level = "danger" callback((msg, msg_level, "processed_data_tab", pd_id, None))
def test_create_params_table_error(self): """Raises an error if the processed_params_table does not exist""" with self.assertRaises(IncompetentQiitaDeveloperError): ProcessedData.create("foo", self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data) with self.assertRaises(IncompetentQiitaDeveloperError): ProcessedData.create("processed_params_foo", self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data) with self.assertRaises(IncompetentQiitaDeveloperError): ProcessedData.create("processed_params_", self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data)
def test_create_w_study(self): """Correctly adds a processed data passing a study""" obs = ProcessedData.create(self.params_table, self.params_id, self.filepaths, study=Study(1), processed_date=self.date, data_type="18S") # Check that the processed data have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.processed_data WHERE processed_data_id=2") # processed_data_id, preprocessed_data_id, processed_params_table, # processed_params_id, processed_date exp = [[2, "processed_params_uclust", 1, self.date, 2]] self.assertEqual(obs, exp) # Check that the files have been copied to right location exp_biom_fp = join(self.db_test_pd_dir, "2_%s" % basename(self.biom_fp)) self.assertTrue(exists(exp_biom_fp)) self._clean_up_files.append(exp_biom_fp) # Check that the filepaths have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id=15") # Filepath_id, path, filepath_type_id exp = [[15, exp_biom_fp, 6, '852952723', 1]] self.assertEqual(obs, exp) # Check that the processed data have been correctly linked # with the fileapths obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.processed_filepath WHERE processed_data_id=2") # processed_data_id, filepath_id self.assertTrue(obs, [[2, 10]]) # Check that the processed data have been correctly linked with the # study obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_processed_data WHERE " "processed_data_id=2") # study_id, processed_data self.assertEqual(obs, [[1, 2]])
def test_create_w_study(self): """Correctly adds a processed data passing a study""" obs = ProcessedData.create(self.params_table, self.params_id, self.filepaths, study=Study(1), processed_date=self.date, data_type="18S") # Check that the processed data have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.processed_data WHERE processed_data_id=2") # processed_data_id, processed_params_table, processed_params_id, # processed_date, data_type_id, link_filepaths_status exp = [[2, "processed_params_uclust", 1, self.date, 2, 'idle']] self.assertEqual(obs, exp) # Check that the files have been copied to right location exp_biom_fp = join(self.db_test_pd_dir, "2_%s" % basename(self.biom_fp)) self.assertTrue(exists(exp_biom_fp)) self._clean_up_files.append(exp_biom_fp) # Check that the filepaths have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id=17") exp_biom_fp = "2_%s" % basename(self.biom_fp) # Filepath_id, path, filepath_type_id exp = [[17, exp_biom_fp, 6, '852952723', 1, 4]] self.assertEqual(obs, exp) # Check that the processed data have been correctly linked # with the fileapths obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.processed_filepath WHERE processed_data_id=2") # processed_data_id, filepath_id self.assertTrue(obs, [[2, 10]]) # Check that the processed data have been correctly linked with the # study obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_processed_data WHERE " "processed_data_id=2") # study_id, processed_data self.assertEqual(obs, [[1, 2]])
def test_get_by_status(self): pds = ProcessedData.get_by_status('sandbox') self.assertEqual(pds, set()) pds = ProcessedData.get_by_status('private') self.assertEqual(pds, set([1])) ProcessedData.create(self.params_table, self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data) pds = ProcessedData.get_by_status('sandbox') self.assertEqual(pds, set([2])) pds = ProcessedData.get_by_status('private') self.assertEqual(pds, set([1]))
def test_get_filepath_ids(self): pd = ProcessedData(1) self.assertEqual(pd.get_filepath_ids(), [11])
def test_link_filepaths_status(self): pd = ProcessedData(1) self.assertEqual(pd.link_filepaths_status, 'idle')
def test_data_type_id(self): pd = ProcessedData(1) self.assertEqual(pd.data_type(ret_id=True), 2)
def test_data_type(self): pd = ProcessedData(1) self.assertEqual(pd.data_type(), "18S")
def test_get_filepath_ids(self): pd = ProcessedData(1) self.assertEqual(pd.get_filepath_ids(), [10])
def test_data_type(self): """Correctly returns the data_type of preprocessed_data""" pd = ProcessedData(1) self.assertEqual(pd.data_type(), "18S")
def test_data_type_id(self): """Correctly returns the data_type of preprocessed_data""" pd = ProcessedData(1) self.assertEqual(pd.data_type(ret_id=True), 2)
def test_preprocessed_data(self): """Correctly returns the preprocessed_data""" pd = ProcessedData(1) self.assertEqual(pd.preprocessed_data, 1)
def test_processed_date(self): pd = ProcessedData(1) self.assertEqual(pd.processed_date, datetime(2012, 10, 1, 9, 30, 27))
def _build_study_info(user, study_proc=None, proc_samples=None): """Builds list of dicts for studies table, with all HTML formatted Parameters ---------- user : User object logged in user study_proc : dict of lists, optional Dictionary keyed on study_id that lists all processed data associated with that study. Required if proc_samples given. proc_samples : dict of lists, optional Dictionary keyed on proc_data_id that lists all samples associated with that processed data. Required if study_proc given. Returns ------- infolist: list of dict of lists and dicts study and processed data info for JSON serialiation for datatables Each dict in the list is a single study, and contains the text Notes ----- Both study_proc and proc_samples must be passed, or neither passed. """ build_samples = False # Logic check to make sure both needed parts passed if study_proc is not None and proc_samples is None: raise IncompetentQiitaDeveloperError( 'Must pass proc_samples when study_proc given') elif proc_samples is not None and study_proc is None: raise IncompetentQiitaDeveloperError( 'Must pass study_proc when proc_samples given') elif study_proc is None: build_samples = True # get list of studies for table study_set = user.user_studies.union( Study.get_by_status('public')).union(user.shared_studies) if study_proc is not None: study_set = study_set.intersection(study_proc) if not study_set: # No studies left so no need to continue return [] # get info for the studies cols = ['study_id', 'email', 'principal_investigator_id', 'pmid', 'study_title', 'metadata_complete', 'number_samples_collected', 'study_abstract'] study_info = Study.get_info(study_set, cols) infolist = [] for info in study_info: # Convert DictCursor to proper dict info = dict(info) study = Study(info['study_id']) # Build the processed data info for the study if none passed if build_samples: proc_data_list = study.processed_data() proc_samples = {} study_proc = {study.id: defaultdict(list)} for pid in proc_data_list: proc_data = ProcessedData(pid) study_proc[study.id][proc_data.data_type()].append(pid) proc_samples[pid] = proc_data.samples study_info = _build_single_study_info(study, info, study_proc, proc_samples) infolist.append(study_info) return infolist
def test_create_no_preprocessed_no_study_error(self): with self.assertRaises(IncompetentQiitaDeveloperError): ProcessedData.create(self.params_table, self.params_id, self.filepaths)
def test_retrieve_dropped_samples(self): # Create and populate second study to do test with info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "portal_type_id": 3, "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " "gut microbiome", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } metadata_dict = { 'SKB8.640193': {'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 1', 'str_column': 'Value for sample 1', 'latitude': 42.42, 'longitude': 41.41}, 'SKD8.640184': {'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 2', 'str_column': 'Value for sample 2', 'latitude': 4.2, 'longitude': 1.1}, 'SKB7.640196': {'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 3', 'str_column': 'Value for sample 3', 'latitude': 4.8, 'longitude': 4.41}, } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') Study.create(User("*****@*****.**"), "Test study 2", [1], info) SampleTemplate.create(metadata, Study(2)) mp = get_mountpoint("processed_data")[0][1] study_fp = join(mp, "2_study_1001_closed_reference_otu_table.biom") ProcessedData.create("processed_params_uclust", 1, [(study_fp, 6)], study=Study(2), data_type="16S") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample (analysis_id, " "processed_data_id, sample_id) VALUES " "(1,2,'2.SKB8.640193'), (1,2,'2.SKD8.640184'), " "(1,2,'2.SKB7.640196')") samples = {1: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'], 2: ['2.SKB8.640193', '2.SKD8.640184']} self.analysis._build_biom_tables(samples, 10000, conn_handler=self.conn_handler) exp = {1: {'1.SKM4.640180', '1.SKM9.640192'}, 2: {'2.SKB7.640196'}} self.assertEqual(self.analysis.dropped_samples, exp)
def test_retrieve_dropped_samples(self): # Create and populate second study to do test with info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "portal_type_id": 3, "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " "gut microbiome", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } metadata_dict = { 'SKB8.640193': { 'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 1', 'str_column': 'Value for sample 1', 'latitude': 42.42, 'longitude': 41.41 }, 'SKD8.640184': { 'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 2', 'str_column': 'Value for sample 2', 'latitude': 4.2, 'longitude': 1.1 }, 'SKB7.640196': { 'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 3', 'str_column': 'Value for sample 3', 'latitude': 4.8, 'longitude': 4.41 }, } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') Study.create(User("*****@*****.**"), "Test study 2", [1], info) SampleTemplate.create(metadata, Study(2)) mp = get_mountpoint("processed_data")[0][1] study_fp = join(mp, "2_study_1001_closed_reference_otu_table.biom") ProcessedData.create("processed_params_uclust", 1, [(study_fp, 6)], study=Study(2), data_type="16S") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample (analysis_id, " "processed_data_id, sample_id) VALUES " "(1,2,'2.SKB8.640193'), (1,2,'2.SKD8.640184'), " "(1,2,'2.SKB7.640196')") samples = { 1: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'], 2: ['2.SKB8.640193', '2.SKD8.640184'] } self.analysis._build_biom_tables(samples, 10000, conn_handler=self.conn_handler) exp = {1: {'1.SKM4.640180', '1.SKM9.640192'}, 2: {'2.SKB7.640196'}} self.assertEqual(self.analysis.dropped_samples, exp)