Example #1
0
    def test_load_processed_data_from_cmd(self):
        filepaths = [self.otu_table_fp, self.otu_table_2_fp]
        filepath_types = ['biom', 'biom']

        initial_processed_data_count = get_count('qiita.processed_data')
        initial_processed_fp_count = get_count('qiita.processed_filepath')
        initial_fp_count = get_count('qiita.filepath')

        new = load_processed_data_cmd(filepaths, filepath_types,
                                      'processed_params_uclust', 1, 1, None)
        processed_data_id = new.id
        self.files_to_remove.append(
            join(self.db_test_processed_data_dir,
                 '%d_%s' % (processed_data_id, basename(self.otu_table_fp))))
        self.files_to_remove.append(
            join(self.db_test_processed_data_dir,
                 '%d_%s' % (processed_data_id,
                            basename(self.otu_table_2_fp))))

        self.assertTrue(check_count('qiita.processed_data',
                                    initial_processed_data_count + 1))
        self.assertTrue(check_count('qiita.processed_filepath',
                                    initial_processed_fp_count + 2))
        self.assertTrue(check_count('qiita.filepath',
                                    initial_fp_count + 2))

        # Ensure that the ValueError is raised when a filepath_type is not
        # provided for each and every filepath
        with self.assertRaises(ValueError):
            load_processed_data_cmd(filepaths, filepath_types[:-1],
                                    'processed_params_uclust', 1, 1, None)
Example #2
0
    def _common_purge_filpeaths_test(self):
        # Get all the filepaths so we can test if they've been removed or not
        sql_fp = "SELECT filepath, data_directory_id FROM qiita.filepath"
        fps = [join(get_mountpoint_path_by_id(dd_id), fp) for fp, dd_id in self.conn_handler.execute_fetchall(sql_fp)]

        # Make sure that the files exist - specially for travis
        for fp in fps:
            if not exists(fp):
                with open(fp, "w") as f:
                    f.write("\n")
                self.files_to_remove.append(fp)

        _, raw_data_mp = get_mountpoint("raw_data")[0]

        removed_fps = [join(raw_data_mp, "2_sequences_barcodes.fastq.gz"), join(raw_data_mp, "2_sequences.fastq.gz")]

        for fp in removed_fps:
            with open(fp, "w") as f:
                f.write("\n")

        sql = """INSERT INTO qiita.filepath
                    (filepath, filepath_type_id, checksum,
                     checksum_algorithm_id, data_directory_id)
                VALUES ('2_sequences_barcodes.fastq.gz', 3, '852952723', 1, 5),
                       ('2_sequences.fastq.gz', 1, '852952723', 1, 5)
                RETURNING filepath_id"""
        fp_ids = self.conn_handler.execute_fetchall(sql)

        fps = set(fps).difference(removed_fps)

        # Check that the files exist
        for fp in fps:
            self.assertTrue(exists(fp))
        for fp in removed_fps:
            self.assertTrue(exists(fp))

        exp_count = get_count("qiita.filepath") - 2

        purge_filepaths()

        obs_count = get_count("qiita.filepath")

        # Check that only 2 rows have been removed
        self.assertEqual(obs_count, exp_count)

        # Check that the 2 rows that have been removed are the correct ones
        sql = """SELECT EXISTS(
                    SELECT * FROM qiita.filepath WHERE filepath_id = %s)"""
        obs = self.conn_handler.execute_fetchone(sql, (fp_ids[0][0],))[0]
        self.assertFalse(obs)
        obs = self.conn_handler.execute_fetchone(sql, (fp_ids[1][0],))[0]
        self.assertFalse(obs)

        # Check that the files have been successfully removed
        for fp in removed_fps:
            self.assertFalse(exists(fp))

        # Check that all the other files still exist
        for fp in fps:
            self.assertTrue(exists(fp))
Example #3
0
    def test_delete(self):
        # successful delete
        total_analyses = get_count("qiita.analysis")
        Analysis.delete(1)
        self.assertEqual(total_analyses - 1, get_count("qiita.analysis"))

        # no possible to delete
        with self.assertRaises(QiitaDBUnknownIDError):
            Analysis.delete(total_analyses + 1)
Example #4
0
 def test_exists(self):
     qiita_config.portal = 'QIITA'
     self.assertTrue(Analysis.exists(1))
     new_id = get_count("qiita.analysis") + 1
     self.assertFalse(Analysis.exists(new_id))
     qiita_config.portal = 'EMP'
     self.assertFalse(Analysis.exists(1))
     new_id = get_count("qiita.analysis") + 1
     self.assertFalse(Analysis.exists(new_id))
Example #5
0
    def test_artifact_post_req(self):
        # Create new prep template to attach artifact to
        pt = npt.assert_warns(
            QiitaDBWarning, PrepTemplate.create,
            pd.DataFrame({'new_col': {'1.SKD6.640190': 1}}), Study(1), '16S')
        self._files_to_remove.extend([fp for _, fp in pt.get_filepaths()])

        new_artifact_id = get_count('qiita.artifact') + 1
        filepaths = {'raw_forward_seqs': 'uploaded_file.txt',
                     'raw_barcodes': 'update.txt'}
        obs = artifact_post_req(
            '*****@*****.**', filepaths, 'FASTQ', 'New Test Artifact', pt.id)
        exp = {'status': 'success',
               'message': ''}
        self.assertEqual(obs, exp)

        obs = r_client.get('prep_template_%d' % pt.id)
        self.assertIsNotNone(obs)
        redis_info = loads(r_client.get(loads(obs)['job_id']))
        while redis_info['status_msg'] == 'Running':
            sleep(0.05)
            redis_info = loads(r_client.get(loads(obs)['job_id']))

        # Instantiate the artifact to make sure it was made and
        # to clean the environment
        a = Artifact(new_artifact_id)
        self._files_to_remove.extend([fp for _, fp, _ in a.filepaths])

        # Test importing an artifact
        # Create new prep template to attach artifact to
        pt = npt.assert_warns(
            QiitaDBWarning, PrepTemplate.create,
            pd.DataFrame({'new_col': {'1.SKD6.640190': 1}}), Study(1), '16S')
        self._files_to_remove.extend([fp for _, fp in pt.get_filepaths()])

        new_artifact_id_2 = get_count('qiita.artifact') + 1
        obs = artifact_post_req(
            '*****@*****.**', {}, 'FASTQ', 'New Test Artifact 2', pt.id,
            new_artifact_id)
        exp = {'status': 'success',
               'message': ''}
        self.assertEqual(obs, exp)

        obs = r_client.get('prep_template_%d' % pt.id)
        self.assertIsNotNone(obs)
        redis_info = loads(r_client.get(loads(obs)['job_id']))
        while redis_info['status_msg'] == 'Running':
            sleep(0.05)
            redis_info = loads(r_client.get(loads(obs)['job_id']))
        # Instantiate the artifact to make sure it was made and
        # to clean the environment
        a = Artifact(new_artifact_id_2)
        self._files_to_remove.extend([fp for _, fp, _ in a.filepaths])
Example #6
0
    def _common_purge_filpeaths_test(self):
        # Get all the filepaths so we can test if they've been removed or not
        sql_fp = "SELECT filepath, data_directory_id FROM qiita.filepath"
        fps = [join(get_mountpoint_path_by_id(dd_id), fp) for fp, dd_id in
               self.conn_handler.execute_fetchall(sql_fp)]

        # Make sure that the files exist - specially for travis
        for fp in fps:
            if not exists(fp):
                with open(fp, 'w') as f:
                    f.write('\n')
                self.files_to_remove.append(fp)

        _, raw_data_mp = get_mountpoint('raw_data')[0]

        removed_fps = [
            join(raw_data_mp, '2_sequences_barcodes.fastq.gz'),
            join(raw_data_mp, '2_sequences.fastq.gz')]

        fps = set(fps).difference(removed_fps)

        # Check that the files exist
        for fp in fps:
            self.assertTrue(exists(fp))
        for fp in removed_fps:
            self.assertTrue(exists(fp))

        exp_count = get_count("qiita.filepath") - 2

        purge_filepaths(self.conn_handler)

        obs_count = get_count("qiita.filepath")

        # Check that only 2 rows have been removed
        self.assertEqual(obs_count, exp_count)

        # Check that the 2 rows that have been removed are the correct ones
        sql = """SELECT EXISTS(
                    SELECT * FROM qiita.filepath WHERE filepath_id = %s)"""
        obs = self.conn_handler.execute_fetchone(sql, (3,))[0]
        self.assertFalse(obs)
        obs = self.conn_handler.execute_fetchone(sql, (4,))[0]
        self.assertFalse(obs)

        # Check that the files have been successfully removed
        for fp in removed_fps:
            self.assertFalse(exists(fp))

        # Check that all the other files still exist
        for fp in fps:
            self.assertTrue(exists(fp))
Example #7
0
    def test_build_mapping_file(self):
        new_id = get_count('qiita.filepath') + 1
        samples = {1: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196']}
        self.analysis._build_mapping_file(samples)
        obs = self.analysis.mapping_file
        self.assertEqual(obs, self.map_fp)

        base_dir = get_mountpoint('analysis')[0][1]
        obs = pd.read_csv(obs, sep='\t', infer_datetime_format=True,
                          parse_dates=True, index_col=False, comment='\t')
        exp = pd.read_csv(join(base_dir, '1_analysis_mapping_exp.txt'),
                          sep='\t', infer_datetime_format=True,
                          parse_dates=True, index_col=False, comment='\t')

        assert_frame_equal(obs, exp)

        sql = """SELECT * FROM qiita.filepath
                 WHERE filepath=%s ORDER BY filepath_id"""
        obs = self.conn_handler.execute_fetchall(
            sql, ("%d_analysis_mapping.txt" % self.analysis.id,))

        exp = [[13, '1_analysis_mapping.txt', 9, '852952723', 1, 1],
               [new_id, '1_analysis_mapping.txt', 9, '1606265094', 1, 1]]
        self.assertEqual(obs, exp)

        sql = """SELECT * FROM qiita.analysis_filepath
                 WHERE analysis_id=%s ORDER BY filepath_id"""
        obs = self.conn_handler.execute_fetchall(sql, (self.analysis.id,))
        exp = [[1L, 14L, 2L], [1L, 15L, None], [1L, new_id, None]]
    def test_create_templates_from_qiime_mapping_file_reverse_linker(self):
        new_pt_id = get_count('qiita.prep_template') + 1
        obs_st, obs_pt = create_templates_from_qiime_mapping_file(
            StringIO(QIIME_MAP_WITH_REVERSE_LINKER_PRIMER),
            self.new_study, "16S")

        # Be green: clean the environment
        for template in [obs_st, obs_pt]:
            for _, fp in template.get_filepaths():
                self._clean_up_files.append(fp)

        self.assertEqual(obs_st.id, self.new_study.id)
        self.assertEqual(obs_pt.id, new_pt_id)

        # Check that each template has the correct columns
        exp = {"physical_specimen_location", "physical_specimen_remaining",
               "dna_extracted", "sample_type", "host_subject_id", "latitude",
               "longitude", "taxon_id", "scientific_name",
               "collection_timestamp", "description"}
        self.assertEqual(set(obs_st.categories()), exp)

        exp = {"barcode", "primer", "center_name", "run_prefix", "platform",
               "library_construction_protocol",
               "experiment_design_description", "reverselinkerprimer"}
        self.assertEqual(set(obs_pt.categories()), exp)
Example #9
0
    def test_post_edit_blank_doi(self):
        study_count_before = get_count('qiita.study')
        study = Study(1)
        study_info = study.info

        post_data = {
            'new_people_names': [],
            'new_people_emails': [],
            'new_people_affiliations': [],
            'new_people_addresses': [],
            'new_people_phones': [],
            'study_title': 'New title - test post edit',
            'study_alias': study_info['study_alias'],
            'publications_doi': '',
            'study_abstract': study_info['study_abstract'],
            'study_description': study_info['study_description'],
            'principal_investigator': study_info['principal_investigator'].id,
            'lab_person': study_info['lab_person'].id}

        self.post('/study/edit/1', post_data)

        # Check that the study was updated
        self.assertTrue(check_count('qiita.study', study_count_before))
        self.assertEqual(study.title, 'New title - test post edit')
        self.assertEqual(study.publications, [])
Example #10
0
 def test_import_preprocessed_data(self):
     initial_ppd_count = get_count('qiita.preprocessed_data')
     initial_fp_count = get_count('qiita.filepath')
     ppd = load_preprocessed_data_from_cmd(
         1, 'preprocessed_sequence_illumina_params',
         self.tmpdir, 'preprocessed_sequences', 1, False, 1)
     self.files_to_remove.append(
         join(self.db_test_ppd_dir,
              '%d_%s' % (ppd.id, basename(self.file1))))
     self.files_to_remove.append(
         join(self.db_test_ppd_dir,
              '%d_%s' % (ppd.id, basename(self.file2))))
     self.assertEqual(ppd.id, 3)
     self.assertTrue(check_count('qiita.preprocessed_data',
                                 initial_ppd_count + 1))
     self.assertTrue(check_count('qiita.filepath', initial_fp_count+2))
Example #11
0
    def test_new_person_created(self):
        person_count_before = get_count('qiita.study_person')

        post_data = {'new_people_names': ['Adam', 'Ethan'],
                     'new_people_emails': ['*****@*****.**', '*****@*****.**'],
                     'new_people_affiliations': ['CU Boulder', 'NYU'],
                     'new_people_addresses': ['Some St., Boulder, CO 80305',
                                              ''],
                     'new_people_phones': ['', ''],
                     'study_title': 'dummy title',
                     'study_alias': 'dummy alias',
                     'pubmed_id': 'dummy pmid',
                     'investigation_type': 'eukaryote',
                     'environmental_packages': 'air',
                     'is_timeseries': 'y',
                     'study_abstract': "dummy abstract",
                     'study_description': 'dummy description',
                     'principal_investigator': '-2',
                     'lab_person': '1'}

        self.post('/study/create/', post_data)

        # Check that the new person was created
        expected_id = person_count_before + 1
        self.assertTrue(check_count('qiita.study_person', expected_id))

        new_person = StudyPerson(expected_id)
        self.assertTrue(new_person.name == 'Ethan')
        self.assertTrue(new_person.email == '*****@*****.**')
        self.assertTrue(new_person.affiliation == 'NYU')
        self.assertTrue(new_person.address is None)
        self.assertTrue(new_person.phone is None)
Example #12
0
    def test_post_edit(self):
        study_count_before = get_count('qiita.study')
        study = Study(1)
        study_info = study.info

        post_data = {
            'new_people_names': [],
            'new_people_emails': [],
            'new_people_affiliations': [],
            'new_people_addresses': [],
            'new_people_phones': [],
            'study_title': 'dummy title',
            'study_alias': study_info['study_alias'],
            'publications_doi': ','.join(
                [doi for doi, _ in study.publications]),
            'study_abstract': study_info['study_abstract'],
            'study_description': study_info['study_description'],
            'principal_investigator': study_info['principal_investigator_id'],
            'lab_person': study_info['lab_person_id']}

        self.post('/study/edit/1', post_data)

        # Check that the study was updated
        self.assertTrue(check_count('qiita.study', study_count_before))
        self.assertEqual(study.title, 'dummy title')
Example #13
0
    def test_create(self):
        """Correctly creates the rows in the DB for the reference"""
        fp_count = get_count('qiita.filepath')
        # Check that the returned object has the correct id
        obs = Reference.create(self.name, self.version, self.seqs_fp,
                               self.tax_fp, self.tree_fp)
        self.assertEqual(obs.id, 2)

        seqs_id = fp_count + 1
        tax_id = fp_count + 2
        tree_id = fp_count + 3

        # Check that the information on the database is correct
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.reference WHERE reference_id=2")
        exp = [[2, self.name, self.version, seqs_id, tax_id, tree_id]]
        self.assertEqual(obs, exp)

        # Check that the filepaths have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=%s or "
            "filepath_id=%s or filepath_id=%s", (seqs_id, tax_id, tree_id))
        exp_seq = "%s_%s_%s" % (self.name, self.version,
                                basename(self.seqs_fp))
        exp_tax = "%s_%s_%s" % (self.name, self.version,
                                basename(self.tax_fp))
        exp_tree = "%s_%s_%s" % (self.name, self.version,
                                 basename(self.tree_fp))
        exp = [[seqs_id, exp_seq, 10, '0', 1, 6],
               [tax_id, exp_tax, 11, '0', 1, 6],
               [tree_id, exp_tree, 12, '0', 1, 6]]
        self.assertEqual(obs, exp)
Example #14
0
    def test_load_data_from_cmd(self):
        filepaths = [self.forward_fp, self.reverse_fp, self.barcodes_fp]
        filepath_types = ['raw_forward_seqs', 'raw_reverse_seqs',
                          'raw_barcodes']

        filetype = 'FASTQ'
        metadata_dict = {
            'SKB8.640193': {'center_name': 'ANL',
                            'primer': 'GTGCCAGCMGCCGCGGTAA',
                            'barcode': 'GTCCGCAAGTTA',
                            'run_prefix': "s_G1_L001_sequences",
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'AAAA',
                            'experiment_design_description': 'BBBB'}}
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
        pt1 = PrepTemplate.create(metadata, Study(1), "16S")
        prep_templates = [pt1.id]

        initial_raw_count = get_count('qiita.raw_data')
        initial_fp_count = get_count('qiita.filepath')
        initial_raw_fp_count = get_count('qiita.raw_filepath')

        new = load_raw_data_cmd(filepaths, filepath_types, filetype,
                                prep_templates)
        raw_data_id = new.id
        self.files_to_remove.append(
            join(self.db_test_raw_dir,
                 '%d_%s' % (raw_data_id, basename(self.forward_fp))))
        self.files_to_remove.append(
            join(self.db_test_raw_dir,
                 '%d_%s' % (raw_data_id, basename(self.reverse_fp))))
        self.files_to_remove.append(
            join(self.db_test_raw_dir,
                 '%d_%s' % (raw_data_id, basename(self.barcodes_fp))))

        self.assertTrue(check_count('qiita.raw_data', initial_raw_count + 1))
        self.assertTrue(check_count('qiita.filepath',
                                    initial_fp_count + 3))
        self.assertTrue(check_count('qiita.raw_filepath',
                                    initial_raw_fp_count + 3))

        # Ensure that the ValueError is raised when a filepath_type is not
        # provided for each and every filepath
        with self.assertRaises(ValueError):
            load_raw_data_cmd(filepaths, filepath_types[:-1], filetype,
                              prep_templates)
Example #15
0
 def test_set_step(self):
     new_id = get_count("qiita.analysis") + 1
     new = Analysis.create(User("*****@*****.**"), "newAnalysis",
                           "A New Analysis", Analysis(1))
     new.step = 2
     sql = "SELECT * FROM qiita.analysis_workflow WHERE analysis_id = %s"
     obs = self.conn_handler.execute_fetchall(sql, [new_id])
     self.assertEqual(obs, [[new_id, 2]])
Example #16
0
 def test_prep_template_delete_req(self):
     template = pd.read_csv(self.update_fp, sep='\t', index_col=0)
     new_id = get_count('qiita.prep_template') + 1
     npt.assert_warns(QiitaDBWarning, PrepTemplate.create,
                      template, Study(1), '16S')
     obs = prep_template_delete_req(new_id, '*****@*****.**')
     exp = {'status': 'success',
            'message': ''}
     self.assertEqual(obs, exp)
Example #17
0
 def test_post(self):
     new_prep_id = get_count('qiita.prep_template') + 1
     arguments = {'study_id': '1',
                  'data-type': '16S',
                  'prep-file': 'new_template.txt'}
     response = self.post('/prep_template/', arguments)
     self.assertEqual(response.code, 200)
     # Check that the new prep template has been created
     self.assertTrue(PrepTemplate.exists(new_prep_id))
Example #18
0
    def _get_stats(self, callback):
        # check if the key exists in redis
        redis_lats_key = '%s:stats:sample_lats' % qiita_config.portal
        redis_longs_key = '%s:stats:sample_longs' % qiita_config.portal
        lats = r_client.lrange(redis_lats_key, 0, -1)
        longs = r_client.lrange(redis_longs_key, 0, -1)
        if not (lats and longs):
            # if we don't have them, then fetch from disk and add to the
            # redis server with a 24-hour expiration
            lat_longs = get_lat_longs()
            lats = [float(x[0]) for x in lat_longs]
            longs = [float(x[1]) for x in lat_longs]
            with r_client.pipeline() as pipe:
                for latitude, longitude in lat_longs:
                    # storing as a simple data structure, hopefully this
                    # doesn't burn us later
                    pipe.rpush(redis_lats_key, latitude)
                    pipe.rpush(redis_longs_key, longitude)

                # set the key to expire in 24 hours, so that we limit the
                # number of times we have to go to the database to a reasonable
                # amount
                r_client.expire(redis_lats_key, 86400)
                r_client.expire(redis_longs_key, 86400)

                pipe.execute()
        else:
            # If we do have them, put the redis results into the same structure
            # that would come back from the database
            longs = [float(x) for x in longs]
            lats = [float(x) for x in lats]
            lat_longs = zip(lats, longs)

        # Get the number of studies
        num_studies = get_count('qiita.study')

        # Get the number of samples
        num_samples = len(lats)

        # Get the number of users
        num_users = get_count('qiita.qiita_user')

        callback([num_studies, num_samples, num_users, lat_longs])
Example #19
0
    def test_add_results(self):
        fp_count = get_count('qiita.filepath')
        self.job.add_results([(join(self._job_folder, "1_job_result.txt"),
                             "plain_text")])

        # make sure files attached to job properly
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.job_results_filepath WHERE job_id = 1")

        self.assertEqual(obs, [[1, 10], [1, fp_count + 1]])
Example #20
0
    def test_update_preprocessed_data_from_cmd(self):
        exp_ppd = PreprocessedData(Study(1).preprocessed_data()[0])
        exp_fps = exp_ppd.get_filepaths()

        # The original paths mush exist, but they're not included in the test
        # so create them here
        for _, fp, _ in exp_fps:
            with open(fp, 'w') as f:
                f.write("")

        next_fp_id = get_count('qiita.filepath') + 1
        exp_fps.append(
            (next_fp_id,
             join(self.db_ppd_dir, "%s_split_library_log.txt" % exp_ppd.id),
             'log'))

        ppd = update_preprocessed_data_from_cmd(self.test_slo, 1)

        # Check that the modified preprocessed data is the correct one
        self.assertEqual(ppd.id, exp_ppd.id)

        # Check that the filepaths returned are correct
        # We need to sort the list returned from the db because the ordering
        # on that list is based on db modification time, rather than id
        obs_fps = sorted(ppd.get_filepaths())
        self.assertEqual(obs_fps, sorted(exp_fps))

        # Check that the checksums have been updated
        sql = "SELECT checksum FROM qiita.filepath WHERE filepath_id=%s"

        # Checksum of the fasta file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[0][0],))[0]
        self.assertEqual(obs_checksum, '3532748626')

        # Checksum of the fastq file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[1][0],))[0]
        self.assertEqual(obs_checksum, '2958832064')

        # Checksum of the demux file
        # The checksum is generated dynamically, so the checksum changes
        # We are going to test that the checksum is not the one that was
        # before, which corresponds to an empty file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[2][0],))[0]
        self.assertTrue(isinstance(obs_checksum, str))
        self.assertNotEqual(obs_checksum, '852952723')
        self.assertTrue(len(obs_checksum) > 0)

        # Checksum of the log file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[3][0],))[0]
        self.assertEqual(obs_checksum, '626839734')
Example #21
0
 def test_post(self):
     new_aid = get_count('qiita.analysis') + 1
     post_args = {
         'name': 'post-test',
         'description': "test of posting"}
     response = self.post('/analysis/3', post_args)
     # Make sure page response loaded sucessfully
     self.assertEqual(response.code, 200)
     # make sure analysis created
     analysis = Analysis(new_aid)
     self.assertEqual(analysis.name, 'post-test')
Example #22
0
    def test_create_nonqiita_portal(self):
        new_id = get_count("qiita.analysis") + 1
        qiita_config.portal = "EMP"
        Analysis.create(User("*****@*****.**"), "newAnalysis",
                        "A New Analysis")

        # make sure portal is associated
        obs = self.conn_handler.execute_fetchall(
            "SELECT * from qiita.analysis_portal WHERE analysis_id = %s",
            [new_id])
        self.assertEqual(obs, [[new_id, 2], [new_id, 1]])
Example #23
0
    def test_add_message(self):
        count = get_count('qiita.message') + 1
        users = [User('*****@*****.**'), User('*****@*****.**')]
        add_message("TEST MESSAGE", users)

        obs = [[x[0], x[1]] for x in User('*****@*****.**').messages()]
        exp = [[count, 'TEST MESSAGE'], [1, 'message 1']]
        self.assertEqual(obs, exp)
        obs = [[x[0], x[1]] for x in User('*****@*****.**').messages()]
        exp = [[count, 'TEST MESSAGE']]
        self.assertEqual(obs, exp)
Example #24
0
    def test_update_preprocessed_data_from_cmd_ppd(self):
        exp_ppd = PreprocessedData(2)

        next_fp_id = get_count('qiita.filepath') + 1
        exp_fps = []
        path_builder = partial(join, self.db_ppd_dir)
        suffix_types = [("seqs.fna", "preprocessed_fasta"),
                        ("seqs.fastq", "preprocessed_fastq"),
                        ("seqs.demux", "preprocessed_demux"),
                        ("split_library_log.txt", "log")]
        for id_, vals in enumerate(suffix_types, start=next_fp_id):
            suffix, fp_type = vals
            exp_fps.append(
                (id_, path_builder("%s_%s" % (exp_ppd.id, suffix)), fp_type))

        ppd = update_preprocessed_data_from_cmd(self.test_slo, 1, 2)

        # Check that the modified preprocessed data is the correct one
        self.assertEqual(ppd.id, exp_ppd.id)

        # Check that the filepaths returned are correct
        # We need to sort the list returned from the db because the ordering
        # on that list is based on db modification time, rather than id
        obs_fps = sorted(ppd.get_filepaths())
        self.assertEqual(obs_fps, exp_fps)

        # Check that the checksums have been updated
        sql = "SELECT checksum FROM qiita.filepath WHERE filepath_id=%s"

        # Checksum of the fasta file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[0][0],))[0]
        self.assertEqual(obs_checksum, '3532748626')

        # Checksum of the fastq file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[1][0],))[0]
        self.assertEqual(obs_checksum, '2958832064')

        # Checksum of the demux file
        # The checksum is generated dynamically, so the checksum changes
        # We are going to test that the checksum is not the one that was
        # before, which corresponds to an empty file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[2][0],))[0]
        self.assertTrue(isinstance(obs_checksum, str))
        self.assertNotEqual(obs_checksum, '852952723')
        self.assertTrue(len(obs_checksum) > 0)

        # Checksum of the log file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[3][0],))[0]
        self.assertEqual(obs_checksum, '626839734')
Example #25
0
    def test_add_results_dir(self):
        fp_count = get_count('qiita.filepath')
        # Create a test directory
        test_dir = join(self._job_folder, "2_test_folder")

        # add folder to job
        self.job.add_results([(test_dir, "directory")])

        # make sure files attached to job properly
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.job_results_filepath WHERE job_id = 1")
        self.assertEqual(obs, [[1, 10], [1, fp_count + 1]])
Example #26
0
    def test_create(self):
        """Correctly creates all the rows in the DB for the raw data"""
        # Check that the returned object has the correct id
        exp_id = get_count("qiita.raw_data") + 1
        obs = RawData.create(self.filetype, self.prep_templates,
                             self.filepaths)
        self.assertEqual(obs.id, exp_id)

        # Check that the raw data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_data WHERE raw_data_id=%d" % exp_id)
        # raw_data_id, filetype, link_filepaths_status
        self.assertEqual(obs, [[exp_id, 2, 'idle']])

        # Check that the raw data has been correctly linked with the prep
        # templates
        sql = """SELECT prep_template_id
                 FROM qiita.prep_template
                 WHERE raw_data_id = %s
                 ORDER BY prep_template_id"""
        obs = self.conn_handler.execute_fetchall(sql, (exp_id,))
        self.assertEqual(obs, [[self.pt1.id], [self.pt2.id]])

        # Check that the files have been copied to right location
        exp_seqs_fp = join(self.db_test_raw_dir,
                           "%d_%s" % (exp_id, basename(self.seqs_fp)))
        self.assertTrue(exists(exp_seqs_fp))
        self._clean_up_files.append(exp_seqs_fp)

        exp_bc_fp = join(self.db_test_raw_dir,
                         "%d_%s" % (exp_id, basename(self.barcodes_fp)))
        self.assertTrue(exists(exp_bc_fp))
        self._clean_up_files.append(exp_bc_fp)

        # Check that the filepaths have been correctly added to the DB
        top_id = self.conn_handler.execute_fetchone(
            "SELECT count(1) FROM qiita.filepath")[0]
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=%d or "
            "filepath_id=%d" % (top_id - 1, top_id))
        exp_seqs_fp = "%d_%s" % (exp_id, basename(self.seqs_fp))
        exp_bc_fp = "%d_%s" % (exp_id, basename(self.barcodes_fp))
        # filepath_id, path, filepath_type_id
        exp = [[top_id - 1, exp_seqs_fp, 1, '852952723', 1, 5],
               [top_id, exp_bc_fp, 2, '852952723', 1, 5]]
        self.assertEqual(obs, exp)

        # Check that the raw data have been correctly linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=%d" % exp_id)
        # raw_data_id, filepath_id
        self.assertEqual(obs, [[exp_id, top_id - 1], [exp_id, top_id]])
Example #27
0
    def test_load_data_from_cmd(self):
        filepaths = [self.forward_fp, self.reverse_fp, self.barcodes_fp]
        filepath_types = ['raw_forward_seqs', 'raw_reverse_seqs',
                          'raw_barcodes']

        filetype = 'FASTQ'
        study_ids = [1]

        initial_raw_count = get_count('qiita.raw_data')
        initial_fp_count = get_count('qiita.filepath')
        initial_raw_fp_count = get_count('qiita.raw_filepath')

        new = load_raw_data_cmd(filepaths, filepath_types, filetype,
                                study_ids)
        raw_data_id = new.id
        self.files_to_remove.append(
            join(self.db_test_raw_dir,
                 '%d_%s' % (raw_data_id, basename(self.forward_fp))))
        self.files_to_remove.append(
            join(self.db_test_raw_dir,
                 '%d_%s' % (raw_data_id, basename(self.reverse_fp))))
        self.files_to_remove.append(
            join(self.db_test_raw_dir,
                 '%d_%s' % (raw_data_id, basename(self.barcodes_fp))))

        self.assertTrue(check_count('qiita.raw_data', initial_raw_count + 1))
        self.assertTrue(check_count('qiita.filepath',
                                    initial_fp_count + 3))
        self.assertTrue(check_count('qiita.raw_filepath',
                                    initial_raw_fp_count + 3))
        self.assertTrue(check_count('qiita.study_raw_data',
                                    initial_raw_count + 1))

        # Ensure that the ValueError is raised when a filepath_type is not
        # provided for each and every filepath
        with self.assertRaises(ValueError):
            load_raw_data_cmd(filepaths, filepath_types[:-1], filetype,
                              study_ids)
Example #28
0
 def test_create(self):
     sql = "SELECT EXTRACT(EPOCH FROM NOW())"
     time1 = float(self.conn_handler.execute_fetchall(sql)[0][0])
     new_id = get_count("qiita.analysis") + 1
     new = Analysis.create(User("*****@*****.**"), "newAnalysis",
                           "A New Analysis")
     self.assertEqual(new.id, new_id)
     sql = ("SELECT analysis_id, email, name, description, "
            "analysis_status_id, pmid, EXTRACT(EPOCH FROM timestamp) "
            "FROM qiita.analysis WHERE analysis_id = %s")
     obs = self.conn_handler.execute_fetchall(sql, [new_id])
     self.assertEqual(obs[0][:-1], [new_id, '*****@*****.**', 'newAnalysis',
                                    'A New Analysis', 1, None])
     self.assertTrue(time1 < float(obs[0][-1]))
Example #29
0
 def test_create_exists_return_existing(self):
     """Makes sure creation doesn't duplicate a job by returning existing"""
     new_id = get_count("qiita.analysis") + 1
     Analysis.create(User("*****@*****.**"), "new", "desc")
     self.conn_handler.execute(
         "INSERT INTO qiita.analysis_sample "
         "(analysis_id, processed_data_id, sample_id) VALUES "
         "({0}, 1, '1.SKB8.640193'), ({0}, 1, '1.SKD8.640184'), "
         "({0}, 1, '1.SKB7.640196'), ({0}, 1, '1.SKM9.640192'), "
         "({0}, 1, '1.SKM4.640180')".format(new_id))
     new = Job.create("18S", "Beta Diversity",
                      {"--otu_table_fp": 1, "--mapping_fp": 1},
                      Analysis(new_id), return_existing=True)
     self.assertEqual(new.id, 2)
Example #30
0
 def test_required_sample_info(self):
     self.assertEqual(get_count("qiita.study_sample"), 27)
Example #31
0
 def test_filepath_type(self):
     self.assertEqual(get_count("qiita.filepath_type"), 21)
Example #32
0
 def test_study_prep_template(self):
     self.assertEqual(get_count("qiita.study_prep_template"), 2)
Example #33
0
 def test_investigation_study(self):
     self.assertEqual(get_count("qiita.investigation_study"), 1)
Example #34
0
 def test_study_experimental_factor(self):
     self.assertEqual(get_count("qiita.study_experimental_factor"), 1)
Example #35
0
    def test_study(self):
        self.assertEqual(get_count("qiita.study"), 1)

    def test_study_users(self):
        self.assertEqual(get_count("qiita.study_users"), 1)

    def test_investigation(self):
        self.assertEqual(get_count("qiita.investigation"), 1)

    def test_investigation_study(self):
        self.assertEqual(get_count("qiita.investigation_study"), 1)

    def test_filepath(self):
<<<<<<< HEAD
        self.assertEqual(get_count("qiita.filepath"), 16)

    def test_filepath_type(self):
        self.assertEqual(get_count("qiita.filepath_type"), 19)
=======
        self.assertEqual(get_count("qiita.filepath"), 25)

    def test_filepath_type(self):
        self.assertEqual(get_count("qiita.filepath_type"), 23)
>>>>>>> 405cbef0c9f71c620da95a0c1ba6c7d3d588b3ed

    def test_study_prep_template(self):
        self.assertEqual(get_count("qiita.study_prep_template"), 1)

    def test_required_sample_info(self):
        self.assertEqual(get_count("qiita.study_sample"), 27)
Example #36
0
 def test_study_users(self):
     self.assertEqual(get_count("qiita.study_users"), 1)
Example #37
0
 def test_preprocessed_sequence_illumina_params(self):
     self.assertEqual(
         get_count("qiita.preprocessed_sequence_illumina_params"), 6)
Example #38
0
 def test_study_preprocessed_data(self):
     self.assertEqual(get_count("qiita.study_preprocessed_data"), 2)
Example #39
0
 def test_job_results_filepath(self):
     self.assertEqual(get_count("qiita.job_results_filepath"), 2)
Example #40
0
 def test_filepath(self):
     self.assertEqual(get_count("qiita.filepath"), 19)
Example #41
0
    def test_complete_job(self):
        # Complete success
        pt = npt.assert_warns(QiitaDBWarning, PrepTemplate.create,
                              pd.DataFrame({'new_col': {
                                  '1.SKD6.640190': 1
                              }}), Study(1), '16S')
        c_job = ProcessingJob.create(
            User('*****@*****.**'),
            Parameters.load(Command.get_validator('BIOM'),
                            values_dict={
                                'template': pt.id,
                                'files': dumps({'BIOM': ['file']}),
                                'artifact_type': 'BIOM'
                            }), True)
        c_job._set_status('running')
        fd, fp = mkstemp(suffix='_table.biom')
        close(fd)
        with open(fp, 'w') as f:
            f.write('\n')
        self._clean_up_files.append(fp)
        exp_artifact_count = get_count('qiita.artifact') + 1

        # the main job (c_job) is still not completing so the step hasn't been
        # updated since creation === None
        self.assertIsNone(c_job.step)

        payload = dumps({
            'success': True,
            'error': '',
            'artifacts': {
                'OTU table': {
                    'filepaths': [(fp, 'biom')],
                    'artifact_type': 'BIOM'
                }
            }
        })
        job = self._create_job('complete_job', {
            'job_id': c_job.id,
            'payload': payload
        })
        private_task(job.id)

        # the complete job has started so now the step of c_job should report
        # the complete information
        self.assertEqual(c_job.step,
                         f"Completing via {job.id} [Not Available]")

        self.assertEqual(job.status, 'success')
        self.assertEqual(c_job.status, 'success')
        self.assertEqual(get_count('qiita.artifact'), exp_artifact_count)

        # Complete job error
        payload = dumps({'success': False, 'error': 'Job failure'})
        job = self._create_job('complete_job', {
            'job_id': 'bcc7ebcd-39c1-43e4-af2d-822e3589f14d',
            'payload': payload
        })
        private_task(job.id)
        self.assertEqual(job.status, 'success')
        c_job = ProcessingJob('bcc7ebcd-39c1-43e4-af2d-822e3589f14d')
        self.assertEqual(c_job.status, 'error')
        self.assertEqual(c_job.log, LogEntry.newest_records(numrecords=1)[0])
        self.assertEqual(c_job.log.msg, 'Job failure')

        # Complete internal error
        pt = npt.assert_warns(QiitaDBWarning, PrepTemplate.create,
                              pd.DataFrame({'new_col': {
                                  '1.SKD6.640190': 1
                              }}), Study(1), '16S')
        c_job = ProcessingJob.create(
            User('*****@*****.**'),
            Parameters.load(Command.get_validator('BIOM'),
                            values_dict={
                                'template': pt.id,
                                'files': dumps({'BIOM': ['file']}),
                                'artifact_type': 'BIOM'
                            }), True)
        c_job._set_status('running')
        fp = '/surprised/if/this/path/exists.biom'
        payload = dumps({
            'success': True,
            'error': '',
            'artifacts': {
                'OTU table': {
                    'filepaths': [(fp, 'biom')],
                    'artifact_type': 'BIOM'
                }
            }
        })
        job = self._create_job('complete_job', {
            'job_id': c_job.id,
            'payload': payload
        })
        private_task(job.id)
        self.assertEqual(job.status, 'success')
        self.assertEqual(c_job.status, 'error')
        self.assertIn('No such file or directory', c_job.log.msg)
Example #42
0
 def test_reference(self):
     self.assertEqual(get_count("qiita.reference"), 1)
Example #43
0
 def test_prep_1(self):
     self.assertEqual(get_count("qiita.prep_1"), 27)
Example #44
0
 def test_sample_1(self):
     self.assertEqual(get_count("qiita.sample_1"), 27)
Example #45
0
 def test_study_sample_columns(self):
     self.assertEqual(get_count("qiita.study_sample_columns"), 31)
Example #46
0
 def test_analysis_filepath(self):
     self.assertEqual(get_count("qiita.analysis_filepath"), 1)
Example #47
0
 def test_prep_template_sample(self):
     self.assertEqual(get_count("qiita.prep_template_sample"), 27)
Example #48
0
 def test_analysis_sample(self):
     self.assertEqual(get_count("qiita.analysis_sample"), 13)
Example #49
0
 def test_analysis_users(self):
     self.assertEqual(get_count("qiita.analysis_users"), 1)
Example #50
0
 def test_analysis_workflow(self):
     self.assertEqual(get_count("qiita.analysis_workflow"), 2)
Example #51
0
 def test_common_prep_info(self):
     self.assertEqual(get_count("qiita.common_prep_info"), 27)
Example #52
0
 def test_preprocessed_processed_data(self):
     self.assertEqual(get_count("qiita.preprocessed_processed_data"), 1)
Example #53
0
 def test_qitta_user(self):
     self.assertEqual(get_count("qiita.qiita_user"), 4)
Example #54
0
 def test_preprocessed_filepath(self):
     self.assertEqual(get_count("qiita.preprocessed_filepath"), 3)
Example #55
0
 def test_analysis_job(self):
     self.assertEqual(get_count("qiita.analysis_job"), 3)
Example #56
0
 def test_study_person(self):
     self.assertEqual(get_count("qiita.study_person"), 3)
Example #57
0
 def test_command_data_type(self):
     self.assertEqual(get_count("qiita.command_data_type"), 14)
Example #58
0
 def test_prep_columns(self):
     self.assertEqual(get_count("qiita.prep_columns"), 23)
Example #59
0
 def test_job(self):
     self.assertEqual(get_count("qiita.job"), 3)
Example #60
-1
    def test_build_biom_tables(self):
        new_id = get_count('qiita.filepath') + 1
        samples = {1: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196']}
        self.analysis._build_biom_tables(samples, 100)
        obs = self.analysis.biom_tables

        self.assertEqual(obs, {'18S': self.biom_fp})

        table = load_table(self.biom_fp)
        obs = set(table.ids(axis='sample'))
        exp = {'1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'}
        self.assertEqual(obs, exp)

        obs = table.metadata('1.SKB8.640193')
        exp = {'Study':
               'Identification of the Microbiomes for Cannabis Soils',
               'Processed_id': 1}
        self.assertEqual(obs, exp)

        sql = """SELECT EXISTS(SELECT * FROM qiita.filepath
                 WHERE filepath_id=%s)"""
        obs = self.conn_handler.execute_fetchone(sql, (new_id,))[0]

        self.assertTrue(obs)

        sql = """SELECT * FROM qiita.analysis_filepath
                 WHERE analysis_id=%s ORDER BY filepath_id"""
        obs = self.conn_handler.execute_fetchall(sql, (self.analysis.id,))
        exp = [[1L, 14L, 2L], [1L, 15L, None], [1L, new_id, None]]