コード例 #1
0
    def test_get_preprocess_fastq_cmd_per_sample_FASTQ_failure(self):
        metadata_dict = {
            'SKB8.640193': {'run_prefix': "sample1_failure", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'}}
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, Study(1), '16S')

        # This part should fail
        fp1 = self.path_builder('sample1_failure.fastq')
        with open(fp1, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp1)
        fp2 = self.path_builder('sample1_failure.barcodes.fastq.gz')
        with open(fp2, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp2)
        forward_filepath_id = convert_to_id('raw_forward_seqs',
                                            'filepath_type')
        barcode_filepath_id = convert_to_id('raw_barcodes', 'filepath_type')

        fps = [(fp1, forward_filepath_id), (fp2, barcode_filepath_id)]

        filetype_id = get_filetypes()['per_sample_FASTQ']
        raw_data = RawData.create(filetype_id, [prep_template], fps)
        params = [p for p in list(PreprocessedIlluminaParams.iter())
                  if p.name == 'per sample FASTQ defaults'][0]

        with self.assertRaises(ValueError):
            _get_preprocess_fastq_cmd(raw_data, prep_template, params)
コード例 #2
0
ファイル: test_util.py プロジェクト: jwdebelius/qiita
    def test_move_filepaths_to_upload_folder(self):
        # setting up test, done here as this is the only test that uses these
        # files
        fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        study_id = 1

        rd = RawData.create(2, [Study(study_id)], [(seqs_fp, 1)])
        filepaths = rd.get_filepaths()
        # deleting reference so we can directly call
        # move_filepaths_to_upload_folder
        for fid, _, _ in filepaths:
            self.conn_handler.execute(
                "DELETE FROM qiita.raw_filepath WHERE filepath_id=%s", (fid,))

        # moving filepaths
        move_filepaths_to_upload_folder(study_id, filepaths, self.conn_handler)

        # check that they do not exist in the old path but do in the new one
        path_for_removal = join(get_mountpoint("uploads")[0][1], str(study_id))
        for _, fp, _ in filepaths:
            self.assertFalse(exists(fp))
            new_fp = join(path_for_removal, basename(fp).split('_', 1)[1])
            self.assertTrue(exists(new_fp))

            self.files_to_remove.append(new_fp)
コード例 #3
0
    def test_move_filepaths_to_upload_folder(self):
        # setting up test, done here as this is the only test that uses these
        # files
        fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        study_id = 1

        rd = RawData.create(2, [Study(study_id)], [(seqs_fp, 1)])
        filepaths = rd.get_filepaths()
        # deleting reference so we can directly call
        # move_filepaths_to_upload_folder
        for fid, _, _ in filepaths:
            self.conn_handler.execute(
                "DELETE FROM qiita.raw_filepath WHERE filepath_id=%s", (fid, ))

        # moving filepaths
        move_filepaths_to_upload_folder(study_id, filepaths, self.conn_handler)

        # check that they do not exist in the old path but do in the new one
        path_for_removal = join(get_mountpoint("uploads")[0][1], str(study_id))
        for _, fp, _ in filepaths:
            self.assertFalse(exists(fp))
            new_fp = join(path_for_removal, basename(fp).split('_', 1)[1])
            self.assertTrue(exists(new_fp))

            self.files_to_remove.append(new_fp)
コード例 #4
0
ファイル: test_data.py プロジェクト: BrindhaBioinfo/qiita
 def test_clear_filepaths(self):
     rd = RawData.create(self.filetype, self.studies, self.filepaths)
     self.assertTrue(self.conn_handler.execute_fetchone(
         "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
         "WHERE raw_data_id=%s)", (rd.id,))[0])
     rd.clear_filepaths()
     self.assertFalse(self.conn_handler.execute_fetchone(
         "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
         "WHERE raw_data_id=%s)", (rd.id,))[0])
コード例 #5
0
ファイル: test_data.py プロジェクト: BrindhaBioinfo/qiita
 def test_remove_filepath(self):
     rd = RawData.create(self.filetype, self.studies, self.filepaths)
     fp = join(self.db_test_raw_dir, "3_%s" % basename(self.seqs_fp))
     rd.remove_filepath(fp)
     self.assertFalse(self.conn_handler.execute_fetchone(
         "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
         "WHERE filepath_id=17)")[0])
     self.assertTrue(self.conn_handler.execute_fetchone(
         "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
         "WHERE filepath_id=18)")[0])
コード例 #6
0
    def setUp(self):
        metadata_dict = {
            'SKB8.640193': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status_id': 1,
                'data_type_id': 2,
                'str_column': 'Value for sample 1'
            },
            'SKD8.640184': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status_id': 1,
                'data_type_id': 2,
                'str_column': 'Value for sample 2'
            },
            'SKB7.640196': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status_id': 1,
                'data_type_id': 2,
                'str_column': 'Value for sample 3'
            }
        }
        self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
        self.test_raw_data = RawData(1)

        fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq')
        close(fd)
        filepaths = [(seqs_fp, 1), (barcodes_fp, 2)]
        with open(seqs_fp, "w") as f:
            f.write("\n")
        with open(barcodes_fp, "w") as f:
            f.write("\n")
        self.new_raw_data = RawData.create(2, filepaths, [Study(1)])
        db_test_raw_dir = join(get_db_files_base_dir(), 'raw_data')
        db_seqs_fp = join(db_test_raw_dir, "3_%s" % basename(seqs_fp))
        db_barcodes_fp = join(db_test_raw_dir, "3_%s" % basename(barcodes_fp))
        self._clean_up_files = [db_seqs_fp, db_barcodes_fp]

        self.tester = PrepTemplate(1)
        self.exp_sample_ids = {
            'SKB1.640202', 'SKB2.640194', 'SKB3.640195', 'SKB4.640189',
            'SKB5.640181', 'SKB6.640176', 'SKB7.640196', 'SKB8.640193',
            'SKB9.640200', 'SKD1.640179', 'SKD2.640178', 'SKD3.640198',
            'SKD4.640185', 'SKD5.640186', 'SKD6.640190', 'SKD7.640191',
            'SKD8.640184', 'SKD9.640182', 'SKM1.640183', 'SKM2.640199',
            'SKM3.640197', 'SKM4.640180', 'SKM5.640177', 'SKM6.640187',
            'SKM7.640188', 'SKM8.640201', 'SKM9.640192'
        }
コード例 #7
0
ファイル: test_data.py プロジェクト: MarkBruns/qiita
    def test_create(self):
        """Correctly creates all the rows in the DB for the raw data"""
        # Check that the returned object has the correct id
        exp_id = get_count("qiita.raw_data") + 1
        obs = RawData.create(self.filetype, self.prep_templates,
                             self.filepaths)
        self.assertEqual(obs.id, exp_id)

        # Check that the raw data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_data WHERE raw_data_id=%d" % exp_id)
        # raw_data_id, filetype, link_filepaths_status
        self.assertEqual(obs, [[exp_id, 2, 'idle']])

        # Check that the raw data has been correctly linked with the prep
        # templates
        sql = """SELECT prep_template_id
                 FROM qiita.prep_template
                 WHERE raw_data_id = %s
                 ORDER BY prep_template_id"""
        obs = self.conn_handler.execute_fetchall(sql, (exp_id,))
        self.assertEqual(obs, [[self.pt1.id], [self.pt2.id]])

        # Check that the files have been copied to right location
        exp_seqs_fp = join(self.db_test_raw_dir,
                           "%d_%s" % (exp_id, basename(self.seqs_fp)))
        self.assertTrue(exists(exp_seqs_fp))
        self._clean_up_files.append(exp_seqs_fp)

        exp_bc_fp = join(self.db_test_raw_dir,
                         "%d_%s" % (exp_id, basename(self.barcodes_fp)))
        self.assertTrue(exists(exp_bc_fp))
        self._clean_up_files.append(exp_bc_fp)

        # Check that the filepaths have been correctly added to the DB
        top_id = self.conn_handler.execute_fetchone(
            "SELECT count(1) FROM qiita.filepath")[0]
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=%d or "
            "filepath_id=%d" % (top_id - 1, top_id))
        exp_seqs_fp = "%d_%s" % (exp_id, basename(self.seqs_fp))
        exp_bc_fp = "%d_%s" % (exp_id, basename(self.barcodes_fp))
        # filepath_id, path, filepath_type_id
        exp = [[top_id - 1, exp_seqs_fp, 1, '852952723', 1, 5],
               [top_id, exp_bc_fp, 2, '852952723', 1, 5]]
        self.assertEqual(obs, exp)

        # Check that the raw data have been correctly linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=%d" % exp_id)
        # raw_data_id, filepath_id
        self.assertEqual(obs, [[exp_id, top_id - 1], [exp_id, top_id]])
コード例 #8
0
ファイル: description_handlers.py プロジェクト: RNAer/qiita
    def create_raw_data(self, study, user, callback):
        """Adds a (new) raw data to the study

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        msg = "Raw data successfully added"
        msg_level = "success"

        # Get the arguments needed to create a raw data object
        filetype = self.get_argument('filetype', None)
        previous_raw_data = self.get_argument('previous_raw_data', None)

        if filetype and previous_raw_data:
            # The user selected a filetype and an existing raw data
            msg = ("You can not specify both a new raw data and a previously "
                   "used one")
            msg_level = "danger"
        elif filetype:
            # We are creating a new raw data object
            try:
                rd_id = RawData.create(filetype, [study]).id
            except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                    QiitaDBDuplicateError, IOError, ValueError, KeyError,
                    CParserError) as e:
                msg = html_error_message % (
                    "creating a new raw data object for study:",
                    str(study.id), str(e))
                msg_level = "danger"
        elif previous_raw_data:
            previous_raw_data = previous_raw_data.split(',')
            raw_data = [RawData(rd) for rd in previous_raw_data]
            study.add_raw_data(raw_data)
            rd_id = raw_data[0].id
        else:
            # The user did not provide a filetype neither an existing raw data
            # If using the interface, we should never reach this if, but
            # better be safe than sorry
            msg = ("You should choose a filetype for a new raw data or "
                   "choose a raw data previously used")
            msg_level = "danger"
            rd_id = None

        callback((msg, msg_level, 'raw_data_tab', rd_id, None))
コード例 #9
0
    def create_raw_data(self, study, user, callback):
        """Adds a (new) raw data to the study

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        msg = "Raw data successfully added"
        msg_level = "success"

        # Get the arguments needed to create a raw data object
        filetype = self.get_argument('filetype', None)
        previous_raw_data = self.get_argument('previous_raw_data', None)

        if filetype and previous_raw_data:
            # The user selected a filetype and an existing raw data
            msg = ("You can not specify both a new raw data and a previously "
                   "used one")
            msg_level = "danger"
        elif filetype:
            # We are creating a new raw data object
            try:
                rd_id = RawData.create(filetype, [study]).id
            except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                    QiitaDBDuplicateError, IOError, ValueError, KeyError,
                    CParserError) as e:
                msg = html_error_message % (
                    "creating a new raw data object for study:",
                    str(study.id), str(e))
                msg_level = "danger"
        elif previous_raw_data:
            previous_raw_data = previous_raw_data.split(',')
            raw_data = [RawData(rd) for rd in previous_raw_data]
            study.add_raw_data(raw_data)
            rd_id = raw_data[0].id
        else:
            # The user did not provide a filetype neither an existing raw data
            # If using the interface, we should never reach this if, but
            # better be safe than sorry
            msg = ("You should choose a filetype for a new raw data or "
                   "choose a raw data previously used")
            msg_level = "danger"
            rd_id = None

        callback((msg, msg_level, 'raw_data_tab', rd_id, None))
コード例 #10
0
    def setUp(self):
        metadata_dict = {
            'SKB8.640193': {'center_name': 'ANL',
                            'center_project_name': 'Test Project',
                            'ebi_submission_accession': None,
                            'EMP_status_id': 1,
                            'data_type_id': 2,
                            'str_column': 'Value for sample 1'},
            'SKD8.640184': {'center_name': 'ANL',
                            'center_project_name': 'Test Project',
                            'ebi_submission_accession': None,
                            'EMP_status_id': 1,
                            'data_type_id': 2,
                            'str_column': 'Value for sample 2'},
            'SKB7.640196': {'center_name': 'ANL',
                            'center_project_name': 'Test Project',
                            'ebi_submission_accession': None,
                            'EMP_status_id': 1,
                            'data_type_id': 2,
                            'str_column': 'Value for sample 3'}
            }
        self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
        self.test_raw_data = RawData(1)

        fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq')
        close(fd)
        filepaths = [(seqs_fp, 1), (barcodes_fp, 2)]
        with open(seqs_fp, "w") as f:
            f.write("\n")
        with open(barcodes_fp, "w") as f:
            f.write("\n")
        self.new_raw_data = RawData.create(2, filepaths, [Study(1)])
        db_test_raw_dir = join(get_db_files_base_dir(), 'raw_data')
        db_seqs_fp = join(db_test_raw_dir, "3_%s" % basename(seqs_fp))
        db_barcodes_fp = join(db_test_raw_dir, "3_%s" % basename(barcodes_fp))
        self._clean_up_files = [db_seqs_fp, db_barcodes_fp]

        self.tester = PrepTemplate(1)
        self.exp_sample_ids = {'SKB1.640202', 'SKB2.640194', 'SKB3.640195',
                               'SKB4.640189', 'SKB5.640181', 'SKB6.640176',
                               'SKB7.640196', 'SKB8.640193', 'SKB9.640200',
                               'SKD1.640179', 'SKD2.640178', 'SKD3.640198',
                               'SKD4.640185', 'SKD5.640186', 'SKD6.640190',
                               'SKD7.640191', 'SKD8.640184', 'SKD9.640182',
                               'SKM1.640183', 'SKM2.640199', 'SKM3.640197',
                               'SKM4.640180', 'SKM5.640177', 'SKM6.640187',
                               'SKM7.640188', 'SKM8.640201', 'SKM9.640192'}
コード例 #11
0
ファイル: test_data.py プロジェクト: jwdebelius/qiita
    def test_create(self):
        """Correctly creates all the rows in the DB for the raw data"""
        # Check that the returned object has the correct id
        exp_id = 1 + self.conn_handler.execute_fetchone(
            "SELECT count(1) from qiita.raw_data")[0]
        obs = RawData.create(self.filetype, self.studies, self.filepaths)
        self.assertEqual(obs.id, exp_id)

        # Check that the raw data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_data WHERE raw_data_id=%d" % exp_id)
        # raw_data_id, filetype, link_filepaths_status
        self.assertEqual(obs, [[exp_id, 2, 'idle']])

        # Check that the raw data have been correctly linked with the study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_raw_data WHERE raw_data_id=%d" % exp_id)
        # study_id , raw_data_id
        self.assertEqual(obs, [[1, exp_id]])

        # Check that the files have been copied to right location
        exp_seqs_fp = join(self.db_test_raw_dir,
                           "%d_%s" % (exp_id, basename(self.seqs_fp)))
        self.assertTrue(exists(exp_seqs_fp))
        self._clean_up_files.append(exp_seqs_fp)

        exp_bc_fp = join(self.db_test_raw_dir,
                         "%d_%s" % (exp_id, basename(self.barcodes_fp)))
        self.assertTrue(exists(exp_bc_fp))
        self._clean_up_files.append(exp_bc_fp)

        # Check that the filepaths have been correctly added to the DB
        top_id = self.conn_handler.execute_fetchone(
            "SELECT count(1) FROM qiita.filepath")[0]
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=%d or "
            "filepath_id=%d" % (top_id - 1, top_id))
        exp_seqs_fp = "%d_%s" % (exp_id, basename(self.seqs_fp))
        exp_bc_fp = "%d_%s" % (exp_id, basename(self.barcodes_fp))
        # filepath_id, path, filepath_type_id
        exp = [[top_id - 1, exp_seqs_fp, 1, '852952723', 1, 5],
               [top_id, exp_bc_fp, 2, '852952723', 1, 5]]
        self.assertEqual(obs, exp)

        # Check that the raw data have been correctly linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=%d" % exp_id)
        # raw_data_id, filepath_id
        self.assertEqual(obs, [[exp_id, top_id - 1], [exp_id, top_id]])
コード例 #12
0
ファイル: test_data.py プロジェクト: jwdebelius/qiita
 def test_remove_filepath(self):
     top_id = self.conn_handler.execute_fetchone(
         "SELECT count(1) FROM qiita.raw_filepath")[0]
     raw_id = self.conn_handler.execute_fetchone(
         "SELECT count(1) FROM qiita.raw_data")[0]
     rd = RawData.create(self.filetype, self.studies, self.filepaths)
     fp = join(self.db_test_raw_dir, "%d_%s" % (raw_id + 1,
                                                basename(self.seqs_fp)))
     rd.remove_filepath(fp)
     self.assertFalse(self.conn_handler.execute_fetchone(
         "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
         "WHERE filepath_id=%d)" % (top_id - 1))[0])
     self.assertTrue(self.conn_handler.execute_fetchone(
         "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
         "WHERE filepath_id=%d)" % (top_id - 2))[0])
コード例 #13
0
ファイル: test_data.py プロジェクト: zonca/qiita
 def test_remove_filepath(self):
     top_id = self.conn_handler.execute_fetchone(
         "SELECT count(1) FROM qiita.raw_filepath")[0]
     raw_id = self.conn_handler.execute_fetchone(
         "SELECT count(1) FROM qiita.raw_data")[0]
     rd = RawData.create(self.filetype, self.studies, self.filepaths)
     fp = join(self.db_test_raw_dir, "%d_%s" % (raw_id + 1,
                                                basename(self.seqs_fp)))
     rd.remove_filepath(fp)
     self.assertFalse(self.conn_handler.execute_fetchone(
         "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
         "WHERE filepath_id=%d)" % (top_id - 1))[0])
     self.assertTrue(self.conn_handler.execute_fetchone(
         "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
         "WHERE filepath_id=%d)" % (top_id - 2))[0])
コード例 #14
0
ファイル: test_data.py プロジェクト: zonca/qiita
    def test_status(self):
        rd = RawData(1)
        s = Study(1)
        self.assertEqual(rd.status(s), 'private')

        # Since the status is inferred from the processed data, change the
        # status of the processed data so we can check how it changes in the
        # preprocessed data
        pd = ProcessedData(1)
        pd.status = 'public'
        self.assertEqual(rd.status(s), 'public')

        # Check that new raw data has sandbox as status since no
        # processed data exists for them
        rd = RawData.create(self.filetype, self.studies, self.filepaths)
        self.assertEqual(rd.status(s), 'sandbox')
コード例 #15
0
    def test_get_preprocess_fastq_cmd_per_sample_FASTQ(self):
        metadata_dict = {
            'SKB8.640193': {'run_prefix': "sample1", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'},
            'SKD8.640184': {'run_prefix': "sample2", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'}}
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, Study(1), '16S')

        fp1 = self.path_builder('sample1.fastq')
        with open(fp1, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp1)
        fp2 = self.path_builder('sample2.fastq.gz')
        with open(fp2, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp2)
        filepath_id = convert_to_id('raw_forward_seqs', 'filepath_type')

        fps = [(fp1, filepath_id), (fp2, filepath_id)]

        filetype_id = get_filetypes()['per_sample_FASTQ']
        raw_data = RawData.create(filetype_id, [prep_template], fps)
        params = [p for p in list(PreprocessedIlluminaParams.iter())
                  if p.name == 'per sample FASTQ defaults'][0]

        obs_cmd, obs_output_dir = _get_preprocess_fastq_cmd(raw_data,
                                                            prep_template,
                                                            params)

        raw_fps = ','.join([fp for _, fp, _ in
                            sorted(raw_data.get_filepaths())])
        exp_cmd = (
            "split_libraries_fastq.py --store_demultiplexed_fastq -i "
            "{} --sample_ids 1.SKB8.640193,1.SKD8.640184 -o {} --barcode_type "
            "not-barcoded --max_bad_run_length 3 --max_barcode_errors 1.5 "
            "--min_per_read_length_fraction 0.75 --phred_quality_threshold 3 "
            "--sequence_max_n 0").format(raw_fps, obs_output_dir)
        self.assertEqual(obs_cmd, exp_cmd)
コード例 #16
0
ファイル: test_data.py プロジェクト: MarkBruns/qiita
    def test_delete(self):
        rd = RawData.create(self.filetype, self.prep_templates,
                            self.filepaths)

        sql_pt = """SELECT prep_template_id
                    FROM qiita.prep_template
                    WHERE raw_data_id = %s
                    ORDER BY prep_template_id"""
        obs = self.conn_handler.execute_fetchall(sql_pt, (rd.id,))
        self.assertEqual(obs, [[self.pt1.id], [self.pt2.id]])

        # This delete call will only unlink the raw data from the prep template
        RawData.delete(rd.id, self.pt2.id)

        # Check that it successfully unlink the raw data from pt2
        obs = self.conn_handler.execute_fetchall(sql_pt, (rd.id,))
        self.assertEqual(obs, [[self.pt1.id]])
        self.assertEqual(self.pt2.raw_data, None)

        # If we try to remove the RawData now, it should raise an error
        # because it still has files attached to it
        with self.assertRaises(QiitaDBError):
            RawData.delete(rd.id, self.pt1.id)

        # Clear the files so we can actually remove the RawData
        study_id = rd.studies[0]
        path_for_removal = join(get_mountpoint("uploads")[0][1], str(study_id))
        self._clean_up_files.extend([join(path_for_removal,
                                     basename(f).split('_', 1)[1])
                                    for _, f, _ in rd.get_filepaths()])
        rd.clear_filepaths()

        RawData.delete(rd.id, self.pt1.id)
        obs = self.conn_handler.execute_fetchall(sql_pt, (rd.id,))
        self.assertEqual(obs, [])

        # Check that all expected rows have been deleted
        sql = """SELECT EXISTS(
                    SELECT * FROM qiita.raw_filepath
                    WHERE raw_data_id = %s)"""
        self.assertFalse(self.conn_handler.execute_fetchone(sql, (rd.id,))[0])

        sql = """SELECT EXISTS(
                    SELECT * FROM qiita.raw_data
                    WHERE raw_data_id=%s)"""
        self.assertFalse(self.conn_handler.execute_fetchone(sql, (rd.id,))[0])
コード例 #17
0
ファイル: test_data.py プロジェクト: BrindhaBioinfo/qiita
    def test_create(self):
        """Correctly creates all the rows in the DB for the raw data"""
        # Check that the returned object has the correct id
        obs = RawData.create(self.filetype, self.studies, self.filepaths)
        self.assertEqual(obs.id, 3)

        # Check that the raw data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_data WHERE raw_data_id=3")
        # raw_data_id, filetype, link_filepaths_status
        self.assertEqual(obs, [[3, 2, 'idle']])

        # Check that the raw data have been correctly linked with the study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_raw_data WHERE raw_data_id=3")
        # study_id , raw_data_id
        self.assertEqual(obs, [[1, 3]])

        # Check that the files have been copied to right location
        exp_seqs_fp = join(self.db_test_raw_dir,
                           "3_%s" % basename(self.seqs_fp))
        self.assertTrue(exists(exp_seqs_fp))
        self._clean_up_files.append(exp_seqs_fp)

        exp_bc_fp = join(self.db_test_raw_dir,
                         "3_%s" % basename(self.barcodes_fp))
        self.assertTrue(exists(exp_bc_fp))
        self._clean_up_files.append(exp_bc_fp)

        # Check that the filepaths have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=17 or "
            "filepath_id=18")
        exp_seqs_fp = "3_%s" % basename(self.seqs_fp)
        exp_bc_fp = "3_%s" % basename(self.barcodes_fp)
        # filepath_id, path, filepath_type_id
        exp = [[17, exp_seqs_fp, 1, '852952723', 1, 5],
               [18, exp_bc_fp, 2, '852952723', 1, 5]]
        self.assertEqual(obs, exp)

        # Check that the raw data have been correctly linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=3")
        # raw_data_id, filepath_id
        self.assertEqual(obs, [[3, 17], [3, 18]])
コード例 #18
0
ファイル: test_data.py プロジェクト: zonca/qiita
    def test_clear_filepaths(self):
        rd = RawData.create(self.filetype, self.studies, self.filepaths)
        self.assertTrue(self.conn_handler.execute_fetchone(
            "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
            "WHERE raw_data_id=%s)", (rd.id,))[0])

        # add files to clean before cleaning the filepaths
        study_id = rd.studies[0]
        path_for_removal = join(get_mountpoint("uploads")[0][1], str(study_id))
        self._clean_up_files = [join(path_for_removal,
                                     basename(f).split('_', 1)[1])
                                for _, f, _ in rd.get_filepaths()]

        # cleaning the filepaths
        rd.clear_filepaths()
        self.assertFalse(self.conn_handler.execute_fetchone(
            "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
            "WHERE raw_data_id=%s)", (rd.id,))[0])
コード例 #19
0
ファイル: test_data.py プロジェクト: jwdebelius/qiita
    def test_clear_filepaths(self):
        rd = RawData.create(self.filetype, self.studies, self.filepaths)
        self.assertTrue(self.conn_handler.execute_fetchone(
            "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
            "WHERE raw_data_id=%s)", (rd.id,))[0])

        # add files to clean before cleaning the filepaths
        study_id = rd.studies[0]
        path_for_removal = join(get_mountpoint("uploads")[0][1], str(study_id))
        self._clean_up_files = [join(path_for_removal,
                                     basename(f).split('_', 1)[1])
                                for _, f, _ in rd.get_filepaths()]

        # cleaning the filepaths
        rd.clear_filepaths()
        self.assertFalse(self.conn_handler.execute_fetchone(
            "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
            "WHERE raw_data_id=%s)", (rd.id,))[0])
コード例 #20
0
    def test_create(self):
        """Correctly creates all the rows in the DB for the raw data"""
        # Check that the returned object has the correct id
        obs = RawData.create(self.filetype, self.filepaths, self.studies)
        self.assertEqual(obs.id, 3)

        # Check that the raw data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_data WHERE raw_data_id=3")
        # raw_data_id, filetype, submitted_to_insdc
        self.assertEqual(obs, [[3, 2]])

        # Check that the raw data have been correctly linked with the study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_raw_data WHERE raw_data_id=3")
        # study_id , raw_data_id
        self.assertEqual(obs, [[1, 3]])

        # Check that the files have been copied to right location
        exp_seqs_fp = join(self.db_test_raw_dir,
                           "3_%s" % basename(self.seqs_fp))
        self.assertTrue(exists(exp_seqs_fp))
        self._clean_up_files.append(exp_seqs_fp)

        exp_bc_fp = join(self.db_test_raw_dir,
                         "3_%s" % basename(self.barcodes_fp))
        self.assertTrue(exists(exp_bc_fp))
        self._clean_up_files.append(exp_bc_fp)

        # Check that the filepaths have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=15 or "
            "filepath_id=16")
        # filepath_id, path, filepath_type_id
        exp = [[15, exp_seqs_fp, 1, '852952723', 1],
               [16, exp_bc_fp, 2, '852952723', 1]]
        self.assertEqual(obs, exp)

        # Check that the raw data have been correctly linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=3")
        # raw_data_id, filepath_id
        self.assertEqual(obs, [[3, 15], [3, 16]])
コード例 #21
0
    def setUp(self):
        # Create a sample template file
        fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq')
        close(fd)

        with open(seqs_fp, "w") as f:
            f.write("\n")
        with open(barcodes_fp, "w") as f:
            f.write("\n")

        self.pt_contents = PREP_TEMPLATE

        self.raw_data = RawData.create(
            2, [(seqs_fp, 1), (barcodes_fp, 2)], [Study(1)])

        join_f = partial(join, join(get_db_files_base_dir(), 'raw_data'))
        self.files_to_remove = [
            join_f("%s_%s" % (self.raw_data.id, basename(seqs_fp))),
            join_f("%s_%s" % (self.raw_data.id, basename(barcodes_fp)))]
コード例 #22
0
ファイル: test_data.py プロジェクト: BrindhaBioinfo/qiita
    def test_create_no_filepaths(self):
        """Correctly creates a raw data object with no filepaths attached"""
        # Check that the returned object has the correct id
        obs = RawData.create(self.filetype, self.studies)
        self.assertEqual(obs.id, 3)

        # Check that the raw data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_data WHERE raw_data_id=3")
        # raw_data_id, filetype, link_filepaths_status
        self.assertEqual(obs, [[3, 2, 'idle']])

        # Check that the raw data have been correctly linked with the study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_raw_data WHERE raw_data_id=3")
        # study_id , raw_data_id
        self.assertEqual(obs, [[1, 3]])

        # Check that no files have been linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=3")
        self.assertEqual(obs, [])
コード例 #23
0
ファイル: test_util.py プロジェクト: DarcyMyers/qiita
    def test_move_filepaths_to_upload_folder(self):
        # setting up test, done here as this is the only test that uses these
        # files
        fd, seqs_fp = mkstemp(suffix="_seqs.fastq")
        close(fd)
        st = Study(1)
        metadata_dict = {
            "SKB8.640193": {
                "center_name": "ANL",
                "primer": "GTGCCAGCMGCCGCGGTAA",
                "barcode": "GTCCGCAAGTTA",
                "run_prefix": "s_G1_L001_sequences",
                "platform": "ILLUMINA",
                "library_construction_protocol": "AAAA",
                "experiment_design_description": "BBBB",
            }
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient="index")
        pt = PrepTemplate.create(metadata, Study(1), "16S")

        rd = RawData.create(2, [pt], [(seqs_fp, 1)])
        filepaths = rd.get_filepaths()
        # deleting reference so we can directly call
        # move_filepaths_to_upload_folder
        for fid, _, _ in filepaths:
            self.conn_handler.execute("DELETE FROM qiita.raw_filepath WHERE filepath_id=%s", (fid,))

        # moving filepaths
        move_filepaths_to_upload_folder(st.id, filepaths)

        # check that they do not exist in the old path but do in the new one
        path_for_removal = join(get_mountpoint("uploads")[0][1], str(st.id))
        for _, fp, _ in filepaths:
            self.assertFalse(exists(fp))
            new_fp = join(path_for_removal, basename(fp).split("_", 1)[1])
            self.assertTrue(exists(new_fp))

            self.files_to_remove.append(new_fp)
コード例 #24
0
ファイル: test_data.py プロジェクト: jwdebelius/qiita
    def test_create_no_filepaths(self):
        """Correctly creates a raw data object with no filepaths attached"""
        # Check that the returned object has the correct id
        exp_id = 1 + self.conn_handler.execute_fetchone(
            "SELECT count(1) from qiita.raw_data")[0]
        obs = RawData.create(self.filetype, self.studies)
        self.assertEqual(obs.id, exp_id)

        # Check that the raw data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_data WHERE raw_data_id=%d" % exp_id)
        # raw_data_id, filetype, link_filepaths_status
        self.assertEqual(obs, [[exp_id, 2, 'idle']])

        # Check that the raw data have been correctly linked with the study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_raw_data WHERE raw_data_id=%d" % exp_id)
        # study_id , raw_data_id
        self.assertEqual(obs, [[1, exp_id]])

        # Check that no files have been linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=%d" % exp_id)
        self.assertEqual(obs, [])
コード例 #25
0
ファイル: study_handlers.py プロジェクト: gustabf/qiita
    def post(self, study_id):
        study_id = int(study_id)
        user = User(self.current_user)
        try:
            study = Study(study_id)
        except QiitaDBUnknownIDError:
            # Study not in database so fail nicely
            raise HTTPError(404, "Study %d does not exist" % study_id)
        else:
            check_access(User(self.current_user), study,
                         raise_error=True)

        # vars to add sample template
        msg = ''
        msg_level = ''
        tab_to_display = ''
        sample_template = self.get_argument('sample_template', None)
        # vars to add raw data
        filetype = self.get_argument('filetype', None)
        previous_raw_data = self.get_argument('previous_raw_data', None)
        # vars to add prep template
        add_prep_template = self.get_argument('add_prep_template', None)
        raw_data_id = self.get_argument('raw_data_id', None)
        data_type_id = self.get_argument('data_type_id', None)
        make_public = self.get_argument('make_public', False)
        make_sandbox = self.get_argument('make_sandbox', False)
        approve_study = self.get_argument('approve_study', False)
        request_approval = self.get_argument('request_approval', False)
        investigation_type = self.get_argument('investigation-type', None)
        user_defined_investigation_type = self.get_argument(
            'user-defined-investigation-type', None)
        new_investigation_type = self.get_argument('new-investigation-type',
                                                   None)

        # None Selected is the equivalent to the user not specifying the info
        # thus we should make the investigation_type None
        if investigation_type == "" or investigation_type == "None Selected":
            investigation_type = None

        # to update investigation type
        update_investigation_type = self.get_argument(
            'update_investigation_type', None)
        edit_investigation_type = self.get_argument('edit-investigation-type',
                                                    None)
        edit_user_defined_investigation_type = self.get_argument(
            'edit-user-defined-investigation-type', None)
        edit_new_investigation_type = self.get_argument(
            'edit-new-investigation-type', None)

        # None Selected is the equivalent to the user not specifying the info
        # thus we should make the investigation_type None
        if edit_investigation_type == "" or \
                edit_investigation_type == "None Selected":
            edit_investigation_type = None

        msg_level = 'success'
        if sample_template:
            # processing sample templates

            _, base_fp = get_mountpoint("uploads")[0]
            fp_rsp = join(base_fp, str(study_id), sample_template)
            if not exists(fp_rsp):
                raise HTTPError(400, "This file doesn't exist: %s" % fp_rsp)

            try:
                # deleting previous uploads and inserting new one
                yield Task(self.remove_add_study_template,
                           study.raw_data,
                           study_id, fp_rsp)
            except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                    QiitaDBDuplicateError, IOError, ValueError, KeyError,
                    CParserError, QiitaDBDuplicateHeaderError) as e:
                msg = html_error_message % ('parsing the sample template:',
                                            basename(fp_rsp), str(e))
                self.display_template(study, msg, "danger")
                return

            msg = ("The sample template '%s' has been added" %
                   sample_template)
            tab_to_display = ""

        elif request_approval:
            study.status = 'awaiting_approval'
            msg = "Study sent to admin for approval"
            tab_to_display = ""

        elif make_public:
            msg = ''
            study.status = 'public'
            msg = "Study set to public"
            tab_to_display = ""

        elif make_sandbox:
            msg = ''
            study.status = 'sandbox'
            msg = "Study reverted to sandbox"
            tab_to_display = ""

        elif approve_study:
            # make sure user is admin, then make full private study
            if user.level == 'admin' or not \
                    qiita_config.require_approval:
                study.status = 'private'
                msg = "Study approved"
                tab_to_display = ""

        elif filetype or previous_raw_data:
            # adding blank raw data
            if filetype and previous_raw_data:
                msg = ("You can not specify both a new raw data and a "
                       "previouly used one")
            elif filetype:
                try:
                    RawData.create(filetype, [study])
                except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                        QiitaDBDuplicateError, IOError, ValueError, KeyError,
                        CParserError) as e:
                    msg = html_error_message % ("creating a new raw data "
                                                "object for study:",
                                                str(study.id), str(e))
                    self.display_template(study, msg, "danger")
                    return
                msg = ""
            else:
                raw_data = [RawData(rd) for rd in previous_raw_data]
                study.add_raw_data(raw_data)
                msg = ""
            tab_to_display = ""

        elif add_prep_template and raw_data_id and data_type_id:
            # adding prep templates

            if investigation_type == 'Other' and \
                    user_defined_investigation_type == 'New Type':
                investigation_type = new_investigation_type

                # this is a new user defined investigation type so store it
                ontology = Ontology(convert_to_id('ENA', 'ontology'))
                ontology.add_user_defined_term(investigation_type)
            elif investigation_type == 'Other' and \
                    user_defined_investigation_type != 'New Type':
                investigation_type = user_defined_investigation_type

            raw_data_id = int(raw_data_id)
            _, base_path = get_mountpoint("uploads")[0]
            fp_rpt = join(base_path, str(study_id), add_prep_template)
            if not exists(fp_rpt):
                raise HTTPError(400, "This file doesn't exist: %s" % fp_rpt)

            try:
                # inserting prep templates
                yield Task(self.remove_add_prep_template, fp_rpt, raw_data_id,
                           study, data_type_id, investigation_type)
            except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                    QiitaDBDuplicateError, IOError, ValueError,
                    CParserError) as e:
                msg = html_error_message % ("parsing the prep template: ",
                                            basename(fp_rpt), str(e))
                self.display_template(study, msg, "danger",
                                      str(raw_data_id))
                return

            msg = "Your prep template was added"
            tab_to_display = str(raw_data_id)

        elif update_investigation_type:
            # updating the prep template investigation type

            pt = PrepTemplate(update_investigation_type)
            investigation_type = edit_investigation_type

            # figure out whether to add it as a user defined term or not
            if edit_investigation_type == 'Other' and \
                    edit_user_defined_investigation_type == 'New Type':
                investigation_type = edit_new_investigation_type

                # this is a new user defined investigation type so store it
                ontology = Ontology(convert_to_id('ENA', 'ontology'))
                ontology.add_user_defined_term(investigation_type)

            elif investigation_type == 'Other' and \
                    user_defined_investigation_type != 'New Type':
                investigation_type = edit_user_defined_investigation_type

            try:
                pt.investigation_type = investigation_type
            except QiitaDBColumnError as e:
                msg = html_error_message % (", invalid investigation type: ",
                                            investigation_type, str(e))
                self.display_template(study, msg, "danger",
                                      str(pt.raw_data))
                return

            msg = "The prep template has been updated!"
            tab_to_display = str(pt.raw_data)

        else:
            msg = ("Error, did you select a valid uploaded file or are "
                   "passing the correct parameters?")
            msg_level = 'danger'
            tab_to_display = ""

        self.display_template(study, msg, msg_level, tab_to_display)
コード例 #26
0
ファイル: dispatchable.py プロジェクト: MarkBruns/qiita
def create_raw_data(filetype, prep_template, filepaths):
    """Creates a new raw data

    Needs to be dispachable because it moves large files
    """
    RawData.create(filetype, [prep_template], filepaths)
コード例 #27
0
    def setUp(self):
        self.db_dir = get_db_files_base_dir()

        # Create a SFF dataset: add prep template and a RawData
        study = Study(1)
        md_dict = {
            'SKB8.640193': {'center_name': 'ANL',
                            'primer': 'GTGCCAGCMGCCGCGGTAA',
                            'barcode': 'GTCCGCAAGTTA',
                            'run_prefix': "preprocess_test",
                            'platform': 'ILLUMINA',
                            'library_construction_protocol': 'AAAA',
                            'experiment_design_description': 'BBBB'},
            'SKD8.640184': {'center_name': 'ANL',
                            'primer': 'GTGCCAGCMGCCGCGGTAA',
                            'barcode': 'CGTAGAGCTCTC',
                            'run_prefix': "preprocess_test",
                            'platform': 'ILLUMINA',
                            'library_construction_protocol': 'AAAA',
                            'experiment_design_description': 'BBBB'},
            'SKB7.640196': {'center_name': 'ANL',
                            'primer': 'GTGCCAGCMGCCGCGGTAA',
                            'barcode': 'CCTCTGAGAGCT',
                            'run_prefix': "preprocess_test",
                            'platform': 'ILLUMINA',
                            'library_construction_protocol': 'AAAA',
                            'experiment_design_description': 'BBBB'}
        }
        md = pd.DataFrame.from_dict(md_dict, orient='index')
        self.sff_prep_template = PrepTemplate.create(md, study, "16S")

        tmp_dir = mkdtemp()
        self.path_builder = partial(join, tmp_dir)
        fp1 = self.path_builder('preprocess_test1.sff')
        with open(fp1, 'w') as f:
            f.write('\n')
        fp2 = self.path_builder('preprocess_test2.sff')
        with open(fp2, 'w') as f:
            f.write('\n')
        self.raw_sff_id = convert_to_id('raw_sff', 'filepath_type')
        fps = [(fp1, self.raw_sff_id), (fp2, self.raw_sff_id)]

        # Magic number 1: is the filetype id
        self.raw_data = RawData.create(1, [self.sff_prep_template], fps)

        md = pd.DataFrame.from_dict(md_dict, orient='index')
        self.sff_prep_template_gz = PrepTemplate.create(md, study, "16S")
        fp1_gz = self.path_builder('preprocess_test1.sff.gz')
        with gzip.open(fp1_gz, 'w') as f:
            f.write('\n')
        fps = [(fp1_gz, self.raw_sff_id)]
        self.raw_data_gz = RawData.create(1, [self.sff_prep_template_gz], fps)

        # Create a SFF dataset with multiple run prefix:
        # add prep template and a RawData
        md_dict['SKD8.640184']['run_prefix'] = "new"
        md_rp = pd.DataFrame.from_dict(md_dict, orient='index')
        self.sff_prep_template_rp = PrepTemplate.create(md_rp, study, "16S")

        rp_fp1 = self.path_builder('preprocess_test1.sff')
        with open(rp_fp1, 'w') as f:
            f.write('\n')
        rp_fp2 = self.path_builder('preprocess_test2.sff')
        with open(rp_fp2, 'w') as f:
            f.write('\n')
        fps = [(rp_fp1, self.raw_sff_id), (rp_fp2, self.raw_sff_id)]

        # Magic number 1: is the filetype id
        self.raw_data_rp = RawData.create(1, [self.sff_prep_template_rp], fps)

        # Make sure that we clean up all created files
        self.files_to_remove = [fp1, fp2, rp_fp1, rp_fp2]
        self.dirs_to_remove = [tmp_dir]

        for pt in [self.sff_prep_template, self.sff_prep_template_rp]:
            for _, fp in pt.get_filepaths():
                self.files_to_remove.append(fp)
コード例 #28
0
ファイル: test_data.py プロジェクト: MarkBruns/qiita
 def test_create_error(self):
     with self.assertRaises(QiitaDBError):
         RawData.create(self.filetype, [PrepTemplate(1)], self.filepaths)
コード例 #29
0
ファイル: test_data.py プロジェクト: MarkBruns/qiita
 def test_is_preprocessed(self):
     self.assertTrue(RawData(1)._is_preprocessed())
     rd = RawData.create(self.filetype, self.prep_templates, self.filepaths)
     self.assertFalse(rd._is_preprocessed())