def test_get_by_status_grouped_by_study(self): obs = ProcessedData.get_by_status_grouped_by_study('sandbox') self.assertEqual(obs, dict()) obs = ProcessedData.get_by_status_grouped_by_study('private') self.assertEqual(obs, {1: [1]}) ProcessedData.create(self.params_table, self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data) obs = ProcessedData.get_by_status_grouped_by_study('sandbox') self.assertEqual(obs, {1: [2]})
def _insert_processed_data_target_gene(preprocessed_data, params, pick_otus_out, **kwargs): """Inserts the preprocessed data to the database Parameters ---------- preprocessed_data : PreprocessedData The preprocessed_data to process params : ProcessedSortmernaParams The parameters to use for the processing pick_otus_out : str Path to the pick_closed_reference_otus.py output directory kwargs: ignored Necessary to include to support execution via moi. Raises ------ ValueError If the processed output directory does not contain all the expected files """ from os.path import exists, join, isdir from glob import glob from functools import partial from qiita_db.data import ProcessedData # The filepaths that we are interested in are: # 1) otu_table.biom -> the output OTU table # 2) sortmerna_picked_otus -> intermediate output of pick_otus.py # 3) log_20141217091339.log -> log file path_builder = partial(join, pick_otus_out) biom_fp = path_builder('otu_table.biom') otus_dp = path_builder('sortmerna_picked_otus') log_fp = glob(path_builder('log_*.txt'))[0] # Check that all the files exist if not (exists(biom_fp) and isdir(otus_dp) and exists(log_fp)): raise ValueError("The output directory %s does not contain all the " "expected files." % pick_otus_out) filepaths = [(biom_fp, "biom"), (otus_dp, "directory"), (log_fp, "log")] ProcessedData.create(params._table, params.id, filepaths, preprocessed_data=preprocessed_data) # Change the preprocessed_data status to processed preprocessed_data.processing_status = 'processed'
def test_get_by_status(self): pds = ProcessedData.get_by_status('sandbox') self.assertEqual(pds, set()) pds = ProcessedData.get_by_status('private') self.assertEqual(pds, set([1])) ProcessedData.create(self.params_table, self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data) pds = ProcessedData.get_by_status('sandbox') self.assertEqual(pds, set([2])) pds = ProcessedData.get_by_status('private') self.assertEqual(pds, set([1]))
def test_status(self): pd = ProcessedData(1) self.assertEqual(pd.status, 'private') pd = ProcessedData.create(self.params_table, self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data) self.assertEqual(pd.status, 'sandbox')
def test_create_no_date(self): """Correctly adds a processed data with no date on it""" # All the other settings have been already tested on test_create # here we will only check that the code added a good date before = datetime.now() ProcessedData.create(self.params_table, self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data) after = datetime.now() obs = self.conn_handler.execute_fetchone( "SELECT processed_date FROM qiita.processed_data WHERE " "processed_data_id=2")[0] # Make sure that we clean up the environment exp_biom_fp = join(self.db_test_pd_dir, "2_%s" % basename(self.biom_fp)) self._clean_up_files.append(exp_biom_fp) self.assertTrue(before <= obs <= after)
def test_create(self): """Correctly creates all the rows in the DB for the processed data""" # Check that the returned object has the correct id obs = ProcessedData.create(self.params_table, self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data, processed_date=self.date) self.assertEqual(obs.id, 2) # Check that the processed data have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.processed_data WHERE processed_data_id=2") # processed_data_id, processed_params_table, processed_params_id, # processed_date, data_type_id, link_filepaths_status, # processed_data_status_id exp = [[2, "processed_params_uclust", 1, self.date, 2, 'idle', 4]] self.assertEqual(obs, exp) # Check that the files have been copied to right location exp_biom_fp = join(self.db_test_pd_dir, "2_%s" % basename(self.biom_fp)) self.assertTrue(exists(exp_biom_fp)) self._clean_up_files.append(exp_biom_fp) # Check that the filepaths have been correctly added to the DB obs_id = self.conn_handler.execute_fetchone( "SELECT count(1) from qiita.filepath")[0] obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id=%d" % obs_id) exp_biom_fp = "2_%s" % basename(self.biom_fp) # Filepath_id, path, filepath_type_id exp = [[obs_id, exp_biom_fp, 6, '852952723', 1, 4]] self.assertEqual(obs, exp) # Check that the processed data have been correctly linked # with the fileapths obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.processed_filepath WHERE processed_data_id=2") # processed_data_id, filepath_id self.assertEqual(obs, [[2, obs_id]]) # Check that the processed data have been correctly linked with the # study obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_processed_data WHERE " "processed_data_id=2") # study_id, processed_data self.assertEqual(obs, [[1, 2]]) # Check that the processed data have been correctly linked with the # preprocessed data obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.preprocessed_processed_data WHERE " "processed_data_id=2") # preprocessed_data_id, processed_Data_id self.assertEqual(obs, [[1, 2]])
def test_create(self): """Correctly creates all the rows in the DB for the processed data""" # Check that the returned object has the correct id obs = ProcessedData.create(self.params_table, self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data, processed_date=self.date) self.assertEqual(obs.id, 2) # Check that the processed data have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.processed_data WHERE processed_data_id=2") # processed_data_id, processed_params_table, processed_params_id, # processed_date, data_type_id, link_filepaths_status exp = [[2, "processed_params_uclust", 1, self.date, 2, 'idle']] self.assertEqual(obs, exp) # Check that the files have been copied to right location exp_biom_fp = join(self.db_test_pd_dir, "2_%s" % basename(self.biom_fp)) self.assertTrue(exists(exp_biom_fp)) self._clean_up_files.append(exp_biom_fp) # Check that the filepaths have been correctly added to the DB obs_id = self.conn_handler.execute_fetchone( "SELECT count(1) from qiita.filepath")[0] obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id=%d" % obs_id) exp_biom_fp = "2_%s" % basename(self.biom_fp) # Filepath_id, path, filepath_type_id exp = [[obs_id, exp_biom_fp, 6, '852952723', 1, 4]] self.assertEqual(obs, exp) # Check that the processed data have been correctly linked # with the fileapths obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.processed_filepath WHERE processed_data_id=2") # processed_data_id, filepath_id self.assertEqual(obs, [[2, obs_id]]) # Check that the processed data have been correctly linked with the # study obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_processed_data WHERE " "processed_data_id=2") # study_id, processed_data self.assertEqual(obs, [[1, 2]]) # Check that the processed data have been correctly linked with the # preprocessed data obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.preprocessed_processed_data WHERE " "processed_data_id=2") # preprocessed_data_id, processed_Data_id self.assertEqual(obs, [[1, 2]])
def test_create_params_table_error(self): """Raises an error if the processed_params_table does not exist""" with self.assertRaises(IncompetentQiitaDeveloperError): ProcessedData.create("foo", self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data) with self.assertRaises(IncompetentQiitaDeveloperError): ProcessedData.create("processed_params_foo", self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data) with self.assertRaises(IncompetentQiitaDeveloperError): ProcessedData.create("processed_params_", self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data)
def test_create_w_study(self): """Correctly adds a processed data passing a study""" obs = ProcessedData.create(self.params_table, self.params_id, self.filepaths, study=Study(1), processed_date=self.date, data_type="18S") # Check that the processed data have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.processed_data WHERE processed_data_id=2") # processed_data_id, processed_params_table, processed_params_id, # processed_date, data_type_id, link_filepaths_status exp = [[2, "processed_params_uclust", 1, self.date, 2, 'idle']] self.assertEqual(obs, exp) # Check that the files have been copied to right location exp_biom_fp = join(self.db_test_pd_dir, "2_%s" % basename(self.biom_fp)) self.assertTrue(exists(exp_biom_fp)) self._clean_up_files.append(exp_biom_fp) # Check that the filepaths have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id=17") exp_biom_fp = "2_%s" % basename(self.biom_fp) # Filepath_id, path, filepath_type_id exp = [[17, exp_biom_fp, 6, '852952723', 1, 4]] self.assertEqual(obs, exp) # Check that the processed data have been correctly linked # with the fileapths obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.processed_filepath WHERE processed_data_id=2") # processed_data_id, filepath_id self.assertTrue(obs, [[2, 10]]) # Check that the processed data have been correctly linked with the # study obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_processed_data WHERE " "processed_data_id=2") # study_id, processed_data self.assertEqual(obs, [[1, 2]])
def test_delete(self): """Correctly deletes a processed data""" # testing regular delete pd = ProcessedData.create(self.params_table, self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data, processed_date=self.date) ProcessedData.delete(pd.id) # testing that it raises an error if ID doesn't exist with self.assertRaises(QiitaDBUnknownIDError): ProcessedData.delete(pd.id) # testing that we can not remove cause the processed data != sandbox with self.assertRaises(QiitaDBStatusError): ProcessedData.delete(1) # testing that we can not remove cause processed data has analyses pd = ProcessedData(1) pd.status = 'sandbox' with self.assertRaises(QiitaDBError): ProcessedData.delete(1)
def test_retrieve_dropped_samples(self): # Create and populate second study to do test with info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "portal_type_id": 3, "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " "gut microbiome", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } metadata_dict = { 'SKB8.640193': {'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 1', 'str_column': 'Value for sample 1', 'latitude': 42.42, 'longitude': 41.41}, 'SKD8.640184': {'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 2', 'str_column': 'Value for sample 2', 'latitude': 4.2, 'longitude': 1.1}, 'SKB7.640196': {'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 3', 'str_column': 'Value for sample 3', 'latitude': 4.8, 'longitude': 4.41}, } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') Study.create(User("*****@*****.**"), "Test study 2", [1], info) SampleTemplate.create(metadata, Study(2)) mp = get_mountpoint("processed_data")[0][1] study_fp = join(mp, "2_study_1001_closed_reference_otu_table.biom") ProcessedData.create("processed_params_uclust", 1, [(study_fp, 6)], study=Study(2), data_type="16S") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample (analysis_id, " "processed_data_id, sample_id) VALUES " "(1,2,'2.SKB8.640193'), (1,2,'2.SKD8.640184'), " "(1,2,'2.SKB7.640196')") samples = {1: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'], 2: ['2.SKB8.640193', '2.SKD8.640184']} self.analysis._build_biom_tables(samples, 10000, conn_handler=self.conn_handler) exp = {1: {'1.SKM4.640180', '1.SKM9.640192'}, 2: {'2.SKB7.640196'}} self.assertEqual(self.analysis.dropped_samples, exp)
def test_create_no_preprocessed_and_study_error(self): with self.assertRaises(IncompetentQiitaDeveloperError): ProcessedData.create(self.params_table, self.params_id, self.filepaths)
def test_create_preprocessed_and_data_type_error(self): with self.assertRaises(IncompetentQiitaDeveloperError): ProcessedData.create(self.params_table, self.params_id, self.filepaths, preprocessed_data=self.preprocessed_data, data_type="Metabolomics",)