예제 #1
0
파일: test_data.py 프로젝트: zonca/qiita
    def test_get_by_status_grouped_by_study(self):
        obs = ProcessedData.get_by_status_grouped_by_study('sandbox')
        self.assertEqual(obs, dict())

        obs = ProcessedData.get_by_status_grouped_by_study('private')
        self.assertEqual(obs, {1: [1]})

        ProcessedData.create(self.params_table, self.params_id,
                             self.filepaths,
                             preprocessed_data=self.preprocessed_data)
        obs = ProcessedData.get_by_status_grouped_by_study('sandbox')
        self.assertEqual(obs, {1: [2]})
예제 #2
0
def _insert_processed_data_target_gene(preprocessed_data, params,
                                       pick_otus_out, **kwargs):
    """Inserts the preprocessed data to the database

    Parameters
    ----------
    preprocessed_data : PreprocessedData
        The preprocessed_data to process
    params : ProcessedSortmernaParams
        The parameters to use for the processing
    pick_otus_out : str
        Path to the pick_closed_reference_otus.py output directory
    kwargs: ignored
        Necessary to include to support execution via moi.

    Raises
    ------
    ValueError
        If the processed output directory does not contain all the expected
        files
    """
    from os.path import exists, join, isdir
    from glob import glob
    from functools import partial
    from qiita_db.data import ProcessedData

    # The filepaths that we are interested in are:
    #   1) otu_table.biom -> the output OTU table
    #   2) sortmerna_picked_otus -> intermediate output of pick_otus.py
    #   3) log_20141217091339.log -> log file

    path_builder = partial(join, pick_otus_out)
    biom_fp = path_builder('otu_table.biom')
    otus_dp = path_builder('sortmerna_picked_otus')
    log_fp = glob(path_builder('log_*.txt'))[0]

    # Check that all the files exist
    if not (exists(biom_fp) and isdir(otus_dp) and exists(log_fp)):
        raise ValueError("The output directory %s does not contain all the "
                         "expected files." % pick_otus_out)

    filepaths = [(biom_fp, "biom"),
                 (otus_dp, "directory"),
                 (log_fp, "log")]

    ProcessedData.create(params._table, params.id, filepaths,
                         preprocessed_data=preprocessed_data)

    # Change the preprocessed_data status to processed
    preprocessed_data.processing_status = 'processed'
예제 #3
0
파일: test_data.py 프로젝트: zonca/qiita
    def test_get_by_status(self):
        pds = ProcessedData.get_by_status('sandbox')
        self.assertEqual(pds, set())

        pds = ProcessedData.get_by_status('private')
        self.assertEqual(pds, set([1]))

        ProcessedData.create(self.params_table, self.params_id,
                             self.filepaths,
                             preprocessed_data=self.preprocessed_data)
        pds = ProcessedData.get_by_status('sandbox')
        self.assertEqual(pds, set([2]))

        pds = ProcessedData.get_by_status('private')
        self.assertEqual(pds, set([1]))
예제 #4
0
파일: test_data.py 프로젝트: zonca/qiita
    def test_status(self):
        pd = ProcessedData(1)
        self.assertEqual(pd.status, 'private')

        pd = ProcessedData.create(self.params_table, self.params_id,
                                  self.filepaths,
                                  preprocessed_data=self.preprocessed_data)
        self.assertEqual(pd.status, 'sandbox')
예제 #5
0
    def test_create_no_date(self):
        """Correctly adds a processed data with no date on it"""
        # All the other settings have been already tested on test_create
        # here we will only check that the code added a good date
        before = datetime.now()
        ProcessedData.create(self.params_table, self.params_id, self.filepaths,
                             preprocessed_data=self.preprocessed_data)
        after = datetime.now()
        obs = self.conn_handler.execute_fetchone(
            "SELECT processed_date FROM qiita.processed_data WHERE "
            "processed_data_id=2")[0]

        # Make sure that we clean up the environment
        exp_biom_fp = join(self.db_test_pd_dir,
                           "2_%s" % basename(self.biom_fp))
        self._clean_up_files.append(exp_biom_fp)

        self.assertTrue(before <= obs <= after)
예제 #6
0
    def test_create_no_date(self):
        """Correctly adds a processed data with no date on it"""
        # All the other settings have been already tested on test_create
        # here we will only check that the code added a good date
        before = datetime.now()
        ProcessedData.create(self.params_table, self.params_id, self.filepaths,
                             preprocessed_data=self.preprocessed_data)
        after = datetime.now()
        obs = self.conn_handler.execute_fetchone(
            "SELECT processed_date FROM qiita.processed_data WHERE "
            "processed_data_id=2")[0]

        # Make sure that we clean up the environment
        exp_biom_fp = join(self.db_test_pd_dir,
                           "2_%s" % basename(self.biom_fp))
        self._clean_up_files.append(exp_biom_fp)

        self.assertTrue(before <= obs <= after)
예제 #7
0
파일: test_data.py 프로젝트: zonca/qiita
    def test_create(self):
        """Correctly creates all the rows in the DB for the processed data"""
        # Check that the returned object has the correct id
        obs = ProcessedData.create(self.params_table, self.params_id,
                                   self.filepaths,
                                   preprocessed_data=self.preprocessed_data,
                                   processed_date=self.date)
        self.assertEqual(obs.id, 2)

        # Check that the processed data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.processed_data WHERE processed_data_id=2")
        # processed_data_id, processed_params_table, processed_params_id,
        # processed_date, data_type_id, link_filepaths_status,
        # processed_data_status_id
        exp = [[2, "processed_params_uclust", 1, self.date, 2, 'idle', 4]]
        self.assertEqual(obs, exp)

        # Check that the files have been copied to right location
        exp_biom_fp = join(self.db_test_pd_dir,
                           "2_%s" % basename(self.biom_fp))
        self.assertTrue(exists(exp_biom_fp))
        self._clean_up_files.append(exp_biom_fp)

        # Check that the filepaths have been correctly added to the DB
        obs_id = self.conn_handler.execute_fetchone(
            "SELECT count(1) from qiita.filepath")[0]
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=%d" % obs_id)
        exp_biom_fp = "2_%s" % basename(self.biom_fp)
        # Filepath_id, path, filepath_type_id
        exp = [[obs_id, exp_biom_fp, 6, '852952723', 1, 4]]
        self.assertEqual(obs, exp)

        # Check that the processed data have been correctly linked
        # with the fileapths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.processed_filepath WHERE processed_data_id=2")
        # processed_data_id, filepath_id
        self.assertEqual(obs, [[2, obs_id]])

        # Check that the processed data have been correctly linked with the
        # study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_processed_data WHERE "
            "processed_data_id=2")
        # study_id, processed_data
        self.assertEqual(obs, [[1, 2]])

        # Check that the processed data have been correctly linked with the
        # preprocessed data
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_processed_data WHERE "
            "processed_data_id=2")
        # preprocessed_data_id, processed_Data_id
        self.assertEqual(obs, [[1, 2]])
예제 #8
0
    def test_create(self):
        """Correctly creates all the rows in the DB for the processed data"""
        # Check that the returned object has the correct id
        obs = ProcessedData.create(self.params_table, self.params_id,
                                   self.filepaths,
                                   preprocessed_data=self.preprocessed_data,
                                   processed_date=self.date)
        self.assertEqual(obs.id, 2)

        # Check that the processed data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.processed_data WHERE processed_data_id=2")
        # processed_data_id, processed_params_table, processed_params_id,
        # processed_date, data_type_id, link_filepaths_status
        exp = [[2, "processed_params_uclust", 1, self.date, 2, 'idle']]
        self.assertEqual(obs, exp)

        # Check that the files have been copied to right location
        exp_biom_fp = join(self.db_test_pd_dir,
                           "2_%s" % basename(self.biom_fp))
        self.assertTrue(exists(exp_biom_fp))
        self._clean_up_files.append(exp_biom_fp)

        # Check that the filepaths have been correctly added to the DB
        obs_id = self.conn_handler.execute_fetchone(
            "SELECT count(1) from qiita.filepath")[0]
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=%d" % obs_id)
        exp_biom_fp = "2_%s" % basename(self.biom_fp)
        # Filepath_id, path, filepath_type_id
        exp = [[obs_id, exp_biom_fp, 6, '852952723', 1, 4]]
        self.assertEqual(obs, exp)

        # Check that the processed data have been correctly linked
        # with the fileapths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.processed_filepath WHERE processed_data_id=2")
        # processed_data_id, filepath_id
        self.assertEqual(obs, [[2, obs_id]])

        # Check that the processed data have been correctly linked with the
        # study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_processed_data WHERE "
            "processed_data_id=2")
        # study_id, processed_data
        self.assertEqual(obs, [[1, 2]])

        # Check that the processed data have been correctly linked with the
        # preprocessed data
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_processed_data WHERE "
            "processed_data_id=2")
        # preprocessed_data_id, processed_Data_id
        self.assertEqual(obs, [[1, 2]])
예제 #9
0
 def test_create_params_table_error(self):
     """Raises an error if the processed_params_table does not exist"""
     with self.assertRaises(IncompetentQiitaDeveloperError):
         ProcessedData.create("foo", self.params_id, self.filepaths,
                              preprocessed_data=self.preprocessed_data)
     with self.assertRaises(IncompetentQiitaDeveloperError):
         ProcessedData.create("processed_params_foo", self.params_id,
                              self.filepaths,
                              preprocessed_data=self.preprocessed_data)
     with self.assertRaises(IncompetentQiitaDeveloperError):
         ProcessedData.create("processed_params_", self.params_id,
                              self.filepaths,
                              preprocessed_data=self.preprocessed_data)
예제 #10
0
 def test_create_params_table_error(self):
     """Raises an error if the processed_params_table does not exist"""
     with self.assertRaises(IncompetentQiitaDeveloperError):
         ProcessedData.create("foo", self.params_id, self.filepaths,
                              preprocessed_data=self.preprocessed_data)
     with self.assertRaises(IncompetentQiitaDeveloperError):
         ProcessedData.create("processed_params_foo", self.params_id,
                              self.filepaths,
                              preprocessed_data=self.preprocessed_data)
     with self.assertRaises(IncompetentQiitaDeveloperError):
         ProcessedData.create("processed_params_", self.params_id,
                              self.filepaths,
                              preprocessed_data=self.preprocessed_data)
예제 #11
0
    def test_create_w_study(self):
        """Correctly adds a processed data passing a study"""
        obs = ProcessedData.create(self.params_table, self.params_id,
                                   self.filepaths, study=Study(1),
                                   processed_date=self.date, data_type="18S")

        # Check that the processed data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.processed_data WHERE processed_data_id=2")
        # processed_data_id, processed_params_table, processed_params_id,
        # processed_date, data_type_id, link_filepaths_status
        exp = [[2, "processed_params_uclust", 1, self.date, 2, 'idle']]
        self.assertEqual(obs, exp)

        # Check that the files have been copied to right location
        exp_biom_fp = join(self.db_test_pd_dir,
                           "2_%s" % basename(self.biom_fp))
        self.assertTrue(exists(exp_biom_fp))
        self._clean_up_files.append(exp_biom_fp)

        # Check that the filepaths have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=17")
        exp_biom_fp = "2_%s" % basename(self.biom_fp)
        # Filepath_id, path, filepath_type_id
        exp = [[17, exp_biom_fp, 6, '852952723', 1, 4]]
        self.assertEqual(obs, exp)

        # Check that the processed data have been correctly linked
        # with the fileapths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.processed_filepath WHERE processed_data_id=2")
        # processed_data_id, filepath_id
        self.assertTrue(obs, [[2, 10]])

        # Check that the processed data have been correctly linked with the
        # study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_processed_data WHERE "
            "processed_data_id=2")
        # study_id, processed_data
        self.assertEqual(obs, [[1, 2]])
예제 #12
0
파일: test_data.py 프로젝트: zonca/qiita
    def test_delete(self):
        """Correctly deletes a processed data"""
        # testing regular delete
        pd = ProcessedData.create(self.params_table, self.params_id,
                                  self.filepaths,
                                  preprocessed_data=self.preprocessed_data,
                                  processed_date=self.date)
        ProcessedData.delete(pd.id)

        # testing that it raises an error if ID doesn't exist
        with self.assertRaises(QiitaDBUnknownIDError):
            ProcessedData.delete(pd.id)

        # testing that we can not remove cause the processed data != sandbox
        with self.assertRaises(QiitaDBStatusError):
            ProcessedData.delete(1)

        # testing that we can not remove cause processed data has analyses
        pd = ProcessedData(1)
        pd.status = 'sandbox'
        with self.assertRaises(QiitaDBError):
            ProcessedData.delete(1)
예제 #13
0
    def test_retrieve_dropped_samples(self):
        # Create and populate second study to do test with
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 25,
            "number_samples_promised": 28,
            "portal_type_id": 3,
            "study_alias": "FCM",
            "study_description": "Microbiome of people who eat nothing but "
                                 "fried chicken",
            "study_abstract": "Exploring how a high fat diet changes the "
                              "gut microbiome",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        metadata_dict = {
            'SKB8.640193': {'physical_location': 'location1',
                            'has_physical_specimen': True,
                            'has_extracted_data': True,
                            'sample_type': 'type1',
                            'required_sample_info_status': 'received',
                            'collection_timestamp':
                            datetime(2014, 5, 29, 12, 24, 51),
                            'host_subject_id': 'NotIdentified',
                            'Description': 'Test Sample 1',
                            'str_column': 'Value for sample 1',
                            'latitude': 42.42,
                            'longitude': 41.41},
            'SKD8.640184': {'physical_location': 'location1',
                            'has_physical_specimen': True,
                            'has_extracted_data': True,
                            'sample_type': 'type1',
                            'required_sample_info_status': 'received',
                            'collection_timestamp':
                            datetime(2014, 5, 29, 12, 24, 51),
                            'host_subject_id': 'NotIdentified',
                            'Description': 'Test Sample 2',
                            'str_column': 'Value for sample 2',
                            'latitude': 4.2,
                            'longitude': 1.1},
            'SKB7.640196': {'physical_location': 'location1',
                            'has_physical_specimen': True,
                            'has_extracted_data': True,
                            'sample_type': 'type1',
                            'required_sample_info_status': 'received',
                            'collection_timestamp':
                            datetime(2014, 5, 29, 12, 24, 51),
                            'host_subject_id': 'NotIdentified',
                            'Description': 'Test Sample 3',
                            'str_column': 'Value for sample 3',
                            'latitude': 4.8,
                            'longitude': 4.41},
            }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')

        Study.create(User("*****@*****.**"), "Test study 2", [1], info)

        SampleTemplate.create(metadata, Study(2))

        mp = get_mountpoint("processed_data")[0][1]
        study_fp = join(mp, "2_study_1001_closed_reference_otu_table.biom")
        ProcessedData.create("processed_params_uclust", 1, [(study_fp, 6)],
                             study=Study(2), data_type="16S")
        self.conn_handler.execute(
            "INSERT INTO qiita.analysis_sample (analysis_id, "
            "processed_data_id, sample_id) VALUES "
            "(1,2,'2.SKB8.640193'), (1,2,'2.SKD8.640184'), "
            "(1,2,'2.SKB7.640196')")

        samples = {1: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'],
                   2: ['2.SKB8.640193', '2.SKD8.640184']}
        self.analysis._build_biom_tables(samples, 10000,
                                         conn_handler=self.conn_handler)
        exp = {1: {'1.SKM4.640180', '1.SKM9.640192'},
               2: {'2.SKB7.640196'}}
        self.assertEqual(self.analysis.dropped_samples, exp)
예제 #14
0
 def test_create_no_preprocessed_and_study_error(self):
     with self.assertRaises(IncompetentQiitaDeveloperError):
         ProcessedData.create(self.params_table, self.params_id,
                              self.filepaths)
예제 #15
0
 def test_create_preprocessed_and_data_type_error(self):
     with self.assertRaises(IncompetentQiitaDeveloperError):
         ProcessedData.create(self.params_table, self.params_id,
                              self.filepaths,
                              preprocessed_data=self.preprocessed_data,
                              data_type="Metabolomics",)