Example #1
0
    def delete_raw_data(self, study, user, callback):
        """Delete the selected raw data

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        raw_data_id = int(self.get_argument('raw_data_id'))

        try:
            RawData.delete(raw_data_id, study.id)
            msg = ("Raw data %d has been deleted from study: "
                   "<b><i>%s</i></b>" % (raw_data_id, study.title))
            msg_level = "success"
            tab = 'study_information_tab'
            tab_id = None
        except Exception as e:
            msg = "Couldn't remove %d raw data: %s" % (raw_data_id, str(e))
            msg_level = "danger"
            tab = 'raw_data_tab'
            tab_id = raw_data_id

        callback((msg, msg_level, tab, tab_id, None))
Example #2
0
    def delete_raw_data(self, study, user, callback):
        """Delete the selected raw data

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        raw_data_id = int(self.get_argument('raw_data_id'))
        prep_template_id = int(self.get_argument('prep_template_id'))

        try:
            RawData.delete(raw_data_id, prep_template_id)
            msg = ("Raw data %d has been deleted from prep_template %d"
                   % (raw_data_id, prep_template_id))
            msg_level = "success"
        except Exception as e:
            msg = "Couldn't remove raw data %d: %s" % (raw_data_id, str(e))
            msg_level = "danger"

        callback((msg, msg_level, "prep_template_tab", prep_template_id, None))
Example #3
0
    def delete_raw_data(self, study, user, callback):
        """Delete the selected raw data

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        raw_data_id = int(self.get_argument('raw_data_id'))

        try:
            RawData.delete(raw_data_id, study.id)
            msg = ("Raw data %d has been deleted from study: "
                   "<b><i>%s</i></b>" % (raw_data_id, study.title))
            msg_level = "success"
            tab = 'study_information_tab'
            tab_id = None
        except Exception as e:
            msg = "Couldn't remove %d raw data: %s" % (raw_data_id, str(e))
            msg_level = "danger"
            tab = 'raw_data_tab'
            tab_id = raw_data_id

        callback((msg, msg_level, tab, tab_id, None))
Example #4
0
    def test_status_error(self):
        # Let's create a new study, so we can check that the error is raised
        # because the new study does not have access to the raw data
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 25,
            "number_samples_promised": 28,
            "portal_type_id": 3,
            "study_alias": "FCM",
            "study_description": "Microbiome of people who eat nothing but "
                                 "fried chicken",
            "study_abstract": "Exploring how a high fat diet changes the "
                              "gut microbiome",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        s = Study.create(User('*****@*****.**'), "Fried chicken microbiome",
                         [1], info)
        rd = RawData(1)

        with self.assertRaises(QiitaDBStatusError):
            rd.status(s)
Example #5
0
    def render(self, raw_data_id, prep_template, study, files):
        rd = RawData(raw_data_id)
        raw_data_files = [(basename(fp), fp_type[4:])
                          for _, fp, fp_type in rd.get_filepaths()]
        filetype = rd.filetype
        fp_types = fp_type_by_ft[filetype]
        raw_data_link_status = rd.link_filepaths_status

        show_buttons = rd.status(study) == 'sandbox'
        link_msg = ""
        if show_buttons:
            # Define the message for the link status
            if raw_data_link_status == 'linking':
                link_msg = "Linking files..."
                show_buttons = False
            elif raw_data_link_status == 'unlinking':
                link_msg = "Unlinking files..."
                show_buttons = False
            elif raw_data_link_status.startswith('failed'):
                link_msg = "Error (un)linking files: %s" % raw_data_link_status

        link_msg = convert_text_html(link_msg)
        return self.render_string(
            "study_description_templates/raw_data_info.html",
            rd_id=raw_data_id,
            rd_filetype=rd.filetype,
            raw_data_files=raw_data_files,
            prep_template_id=prep_template.id,
            files=files,
            filepath_types=fp_types,
            filetype=filetype,
            link_msg=link_msg,
            show_buttons=show_buttons)
Example #6
0
def add_files_to_raw_data(raw_data_id, filepaths):
    """Add files to raw data

    Needs to be dispachable because it moves large files
    """
    rd = RawData(raw_data_id)
    rd.add_filepaths(filepaths)
Example #7
0
def unlink_all_files(raw_data_id):
    """Removes all files from raw data

    Needs to be dispachable because it does I/O and a lot of DB calls
    """
    rd = RawData(raw_data_id)
    rd.clear_filepaths()
Example #8
0
 def test_add_raw_data(self):
     self._make_sandbox()
     new = Study.create(
         User('*****@*****.**'), 'NOT Identification of the '
         'Microbiomes for Cannabis Soils', [1], self.info)
     new.add_raw_data([RawData(1), RawData(2)])
     obs = self.conn_handler.execute_fetchall(
         "SELECT * FROM qiita.study_raw_data WHERE study_id=%s", (new.id, ))
     self.assertEqual(obs, [[new.id, 1], [new.id, 2]])
Example #9
0
 def test_get_filepaths(self):
     """Correctly returns the filepaths to the raw files"""
     rd = RawData(1)
     obs = rd.get_filepaths()
     exp = [
         (join(self.db_test_raw_dir, '1_s_G1_L001_sequences.fastq.gz'), 1),
         (join(self.db_test_raw_dir,
               '1_s_G1_L001_sequences_barcodes.fastq.gz'), 2)]
     self.assertEqual(obs, exp)
Example #10
0
 def test_get_filepaths(self):
     """Correctly returns the filepaths to the raw files"""
     rd = RawData(1)
     obs = rd.get_filepaths()
     exp = [(join(self.db_test_raw_dir,
                  '1_s_G1_L001_sequences.fastq.gz'), "raw_sequences"),
            (join(self.db_test_raw_dir,
                  '1_s_G1_L001_sequences_barcodes.fastq.gz'),
             "raw_barcodes")]
     self.assertEqual(obs, exp)
Example #11
0
    def setUp(self):
        metadata_dict = {
            'SKB8.640193': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status_id': 1,
                'data_type_id': 2,
                'str_column': 'Value for sample 1'
            },
            'SKD8.640184': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status_id': 1,
                'data_type_id': 2,
                'str_column': 'Value for sample 2'
            },
            'SKB7.640196': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status_id': 1,
                'data_type_id': 2,
                'str_column': 'Value for sample 3'
            }
        }
        self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
        self.test_raw_data = RawData(1)

        fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq')
        close(fd)
        filepaths = [(seqs_fp, 1), (barcodes_fp, 2)]
        with open(seqs_fp, "w") as f:
            f.write("\n")
        with open(barcodes_fp, "w") as f:
            f.write("\n")
        self.new_raw_data = RawData.create(2, filepaths, [Study(1)])
        db_test_raw_dir = join(get_db_files_base_dir(), 'raw_data')
        db_seqs_fp = join(db_test_raw_dir, "3_%s" % basename(seqs_fp))
        db_barcodes_fp = join(db_test_raw_dir, "3_%s" % basename(barcodes_fp))
        self._clean_up_files = [db_seqs_fp, db_barcodes_fp]

        self.tester = PrepTemplate(1)
        self.exp_sample_ids = {
            'SKB1.640202', 'SKB2.640194', 'SKB3.640195', 'SKB4.640189',
            'SKB5.640181', 'SKB6.640176', 'SKB7.640196', 'SKB8.640193',
            'SKB9.640200', 'SKD1.640179', 'SKD2.640178', 'SKD3.640198',
            'SKD4.640185', 'SKD5.640186', 'SKD6.640190', 'SKD7.640191',
            'SKD8.640184', 'SKD9.640182', 'SKM1.640183', 'SKM2.640199',
            'SKM3.640197', 'SKM4.640180', 'SKM5.640177', 'SKM6.640187',
            'SKM7.640188', 'SKM8.640201', 'SKM9.640192'
        }
Example #12
0
class QiitaBaseTest(TestCase):
    """Tests that the base class functions act correctly"""

    def setUp(self):
        # We need an actual subclass in order to test the equality functions
        self.tester = RawData(1)

    def test_init_base_error(self):
        """Raises an error when instantiating a base class directly"""
        with self.assertRaises(IncompetentQiitaDeveloperError):
            QiitaObject(1)

    def test_init_error_inexistent(self):
        """Raises an error when instantiating an object that does not exists"""
        with self.assertRaises(QiitaDBUnknownIDError):
            RawData(10)

    def test_check_subclass(self):
        """Nothing happens if check_subclass called from a subclass"""
        self.tester._check_subclass()

    def test_check_subclass_error(self):
        """check_subclass raises an error if called from a base class"""
        # Checked through the __init__ call
        with self.assertRaises(IncompetentQiitaDeveloperError):
            QiitaObject(1)
        with self.assertRaises(IncompetentQiitaDeveloperError):
            QiitaStatusObject(1)

    def test_check_id(self):
        """Correctly checks if an id exists on the database"""
        self.assertTrue(self.tester._check_id(1))
        self.assertFalse(self.tester._check_id(100))

    def test_equal_self(self):
        """Equality works with the same object"""
        self.assertEqual(self.tester, self.tester)

    def test_equal(self):
        """Equality works with two objects pointing to the same instance"""
        new = RawData(1)
        self.assertEqual(self.tester, new)

    def test_not_equal(self):
        """Not equals works with object of the same type"""
        new = RawData(2)
        self.assertNotEqual(self.tester, new)

    def test_not_equal_type(self):
        """Not equals works with object of different type"""
        new = Study(1)
        self.assertNotEqual(self.tester, new)
Example #13
0
class QiitaBaseTest(TestCase):
    """Tests that the base class functions act correctly"""

    def setUp(self):
        # We need an actual subclass in order to test the equality functions
        self.tester = RawData(1)

    def test_init_base_error(self):
        """Raises an error when instantiating a base class directly"""
        with self.assertRaises(IncompetentQiitaDeveloperError):
            QiitaObject(1)

    def test_init_error_inexistent(self):
        """Raises an error when instantiating an object that does not exists"""
        with self.assertRaises(QiitaDBUnknownIDError):
            RawData(10)

    def test_check_subclass(self):
        """Nothing happens if check_subclass called from a subclass"""
        self.tester._check_subclass()

    def test_check_subclass_error(self):
        """check_subclass raises an error if called from a base class"""
        # Checked through the __init__ call
        with self.assertRaises(IncompetentQiitaDeveloperError):
            QiitaObject(1)
        with self.assertRaises(IncompetentQiitaDeveloperError):
            QiitaStatusObject(1)

    def test_check_id(self):
        """Correctly checks if an id exists on the database"""
        self.assertTrue(self.tester._check_id(1))
        self.assertFalse(self.tester._check_id(100))

    def test_equal_self(self):
        """Equality works with the same object"""
        self.assertEqual(self.tester, self.tester)

    def test_equal(self):
        """Equality works with two objects pointing to the same instance"""
        new = RawData(1)
        self.assertEqual(self.tester, new)

    def test_not_equal(self):
        """Not equals works with object of the same type"""
        new = RawData(2)
        self.assertNotEqual(self.tester, new)

    def test_not_equal_type(self):
        """Not equals works with object of different type"""
        new = Study(1)
        self.assertNotEqual(self.tester, new)
Example #14
0
    def create_raw_data(self, study, user, callback):
        """Adds a (new) raw data to the study

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        msg = "Raw data successfully added"
        msg_level = "success"

        # Get the arguments needed to create a raw data object
        filetype = self.get_argument('filetype', None)
        previous_raw_data = self.get_argument('previous_raw_data', None)

        if filetype and previous_raw_data:
            # The user selected a filetype and an existing raw data
            msg = ("You can not specify both a new raw data and a previously "
                   "used one")
            msg_level = "danger"
        elif filetype:
            # We are creating a new raw data object
            try:
                rd_id = RawData.create(filetype, [study]).id
            except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                    QiitaDBDuplicateError, IOError, ValueError, KeyError,
                    CParserError) as e:
                msg = html_error_message % (
                    "creating a new raw data object for study:",
                    str(study.id), str(e))
                msg_level = "danger"
        elif previous_raw_data:
            previous_raw_data = previous_raw_data.split(',')
            raw_data = [RawData(rd) for rd in previous_raw_data]
            study.add_raw_data(raw_data)
            rd_id = raw_data[0].id
        else:
            # The user did not provide a filetype neither an existing raw data
            # If using the interface, we should never reach this if, but
            # better be safe than sorry
            msg = ("You should choose a filetype for a new raw data or "
                   "choose a raw data previously used")
            msg_level = "danger"
            rd_id = None

        callback((msg, msg_level, 'raw_data_tab', rd_id, None))
Example #15
0
    def test_remove_filepath_errors(self):
        fp = join(self.db_test_raw_dir, '1_s_G1_L001_sequences.fastq.gz')
        with self.assertRaises(QiitaDBError):
            RawData(1).remove_filepath(fp)

        # filepath doesn't belong to that raw data
        with self.assertRaises(ValueError):
            RawData(2).remove_filepath(fp)

        # the raw data has been linked to more than 1 study so it can't be
        # unliked
        Study(2).add_raw_data([RawData(2)])
        with self.assertRaises(QiitaDBError):
            RawData(2).remove_filepath(fp)
Example #16
0
    def test_move_filepaths_to_upload_folder(self):
        # setting up test, done here as this is the only test that uses these
        # files
        fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        study_id = 1

        rd = RawData.create(2, [Study(study_id)], [(seqs_fp, 1)])
        filepaths = rd.get_filepaths()
        # deleting reference so we can directly call
        # move_filepaths_to_upload_folder
        for fid, _, _ in filepaths:
            self.conn_handler.execute(
                "DELETE FROM qiita.raw_filepath WHERE filepath_id=%s", (fid,))

        # moving filepaths
        move_filepaths_to_upload_folder(study_id, filepaths, self.conn_handler)

        # check that they do not exist in the old path but do in the new one
        path_for_removal = join(get_mountpoint("uploads")[0][1], str(study_id))
        for _, fp, _ in filepaths:
            self.assertFalse(exists(fp))
            new_fp = join(path_for_removal, basename(fp).split('_', 1)[1])
            self.assertTrue(exists(new_fp))

            self.files_to_remove.append(new_fp)
    def test_get_preprocess_fastq_cmd(self):
        raw_data = RawData(1)
        params = PreprocessedIlluminaParams(1)
        prep_template = PrepTemplate(1)
        obs_cmd, obs_output_dir = _get_preprocess_fastq_cmd(
            raw_data, prep_template, params)

        get_raw_path = partial(join, self.db_dir, 'raw_data')
        seqs_fp = get_raw_path('1_s_G1_L001_sequences.fastq.gz')
        bc_fp = get_raw_path('1_s_G1_L001_sequences_barcodes.fastq.gz')

        exp_cmd_1 = ("split_libraries_fastq.py --store_demultiplexed_fastq -i "
                     "{} -b {} "
                     "-m ".format(seqs_fp, bc_fp))
        exp_cmd_2 = ("-o {0} --barcode_type golay_12 --max_bad_run_length 3 "
                     "--max_barcode_errors 1.5 "
                     "--min_per_read_length_fraction 0.75 "
                     "--phred_quality_threshold 3 "
                     "--sequence_max_n 0".format(obs_output_dir))

        # We are splitting the command into two parts because there is no way
        # that we can know the filepath of the mapping file. We thus split the
        # command on the mapping file path and we check that the two parts
        # of the commands is correct
        obs_cmd_1 = obs_cmd[:len(exp_cmd_1)]
        obs_cmd_2 = obs_cmd[len(exp_cmd_1):].split(" ", 1)[1]

        self.assertEqual(obs_cmd_1, exp_cmd_1)
        self.assertEqual(obs_cmd_2, exp_cmd_2)
    def test_get_preprocess_fastq_cmd_per_sample_FASTQ_failure(self):
        metadata_dict = {
            'SKB8.640193': {'run_prefix': "sample1_failure", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'}}
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, Study(1), '16S')

        # This part should fail
        fp1 = self.path_builder('sample1_failure.fastq')
        with open(fp1, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp1)
        fp2 = self.path_builder('sample1_failure.barcodes.fastq.gz')
        with open(fp2, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp2)
        forward_filepath_id = convert_to_id('raw_forward_seqs',
                                            'filepath_type')
        barcode_filepath_id = convert_to_id('raw_barcodes', 'filepath_type')

        fps = [(fp1, forward_filepath_id), (fp2, barcode_filepath_id)]

        filetype_id = get_filetypes()['per_sample_FASTQ']
        raw_data = RawData.create(filetype_id, [prep_template], fps)
        params = [p for p in list(PreprocessedIlluminaParams.iter())
                  if p.name == 'per sample FASTQ defaults'][0]

        with self.assertRaises(ValueError):
            _get_preprocess_fastq_cmd(raw_data, prep_template, params)
Example #19
0
    def test_move_filepaths_to_upload_folder(self):
        # setting up test, done here as this is the only test that uses these
        # files
        fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        study_id = 1

        rd = RawData.create(2, [Study(study_id)], [(seqs_fp, 1)])
        filepaths = rd.get_filepaths()
        # deleting reference so we can directly call
        # move_filepaths_to_upload_folder
        for fid, _, _ in filepaths:
            self.conn_handler.execute(
                "DELETE FROM qiita.raw_filepath WHERE filepath_id=%s", (fid, ))

        # moving filepaths
        move_filepaths_to_upload_folder(study_id, filepaths, self.conn_handler)

        # check that they do not exist in the old path but do in the new one
        path_for_removal = join(get_mountpoint("uploads")[0][1], str(study_id))
        for _, fp, _ in filepaths:
            self.assertFalse(exists(fp))
            new_fp = join(path_for_removal, basename(fp).split('_', 1)[1])
            self.assertTrue(exists(new_fp))

            self.files_to_remove.append(new_fp)
Example #20
0
 def test_add_raw_data_private(self):
     new = Study.create(
         User('*****@*****.**'), 'NOT Identification of the '
         'Microbiomes for Cannabis Soils', [1], self.info)
     new.status = 'private'
     with self.assertRaises(QiitaDBStatusError):
         new.add_raw_data([RawData(2)])
Example #21
0
    def test_status(self):
        rd = RawData(1)
        s = Study(1)
        self.assertEqual(rd.status(s), 'private')

        # Since the status is inferred from the processed data, change the
        # status of the processed data so we can check how it changes in the
        # preprocessed data
        pd = ProcessedData(1)
        pd.status = 'public'
        self.assertEqual(rd.status(s), 'public')

        # Check that new raw data has sandbox as status since no
        # processed data exists for them
        rd = RawData.create(self.filetype, self.studies, self.filepaths)
        self.assertEqual(rd.status(s), 'sandbox')
Example #22
0
 def test_link_filepaths_status_setter(self):
     rd = RawData(1)
     self.assertEqual(rd.link_filepaths_status, 'idle')
     rd._set_link_filepaths_status('linking')
     self.assertEqual(rd.link_filepaths_status, 'linking')
     rd._set_link_filepaths_status('unlinking')
     self.assertEqual(rd.link_filepaths_status, 'unlinking')
     rd._set_link_filepaths_status('failed: error')
     self.assertEqual(rd.link_filepaths_status, 'failed: error')
Example #23
0
 def test_clear_filepaths(self):
     rd = RawData.create(self.filetype, self.studies, self.filepaths)
     self.assertTrue(self.conn_handler.execute_fetchone(
         "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
         "WHERE raw_data_id=%s)", (rd.id,))[0])
     rd.clear_filepaths()
     self.assertFalse(self.conn_handler.execute_fetchone(
         "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
         "WHERE raw_data_id=%s)", (rd.id,))[0])
    def test_get_preprocess_fasta_cmd_sff(self):
        raw_data = RawData(3)
        params = Preprocessed454Params(1)
        prep_template = PrepTemplate(1)
        obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
            raw_data, prep_template, params)

        get_raw_path = partial(join, self.db_dir, 'raw_data')
        seqs_fp = [
            get_raw_path('preprocess_test1.sff'),
            get_raw_path('preprocess_test2.sff')
        ]

        exp_cmd_1 = ' '.join(
            ["process_sff.py",
             "-i %s" % seqs_fp[0],
             "-o %s" % obs_output_dir])
        exp_cmd_2 = ' '.join(
            ["process_sff.py",
             "-i %s" % seqs_fp[1],
             "-o %s" % obs_output_dir])

        fasta_files = ','.join([
            join(obs_output_dir, "preprocess_test1.fna"),
            join(obs_output_dir, "preprocess_test2.fna")
        ])
        qual_files = ','.join([
            join(obs_output_dir, "preprocess_test1.qual"),
            join(obs_output_dir, "preprocess_test2.qual")
        ])
        exp_cmd_3a = ' '.join(["split_libraries.py", "-f %s" % fasta_files])

        exp_cmd_3b = ' '.join([
            "-q %s" % qual_files, "-d",
            "-o %s" % obs_output_dir,
            params.to_str()
        ])
        exp_cmd_4 = ' '.join([
            "convert_fastaqual_fastq.py",
            "-f %s/seqs.fna" % obs_output_dir,
            "-q %s/seqs_filtered.qual" % obs_output_dir,
            "-o %s" % obs_output_dir, "-F"
        ])

        obs_cmds = obs_cmd.split('; ')

        # We are splitting the command into two parts because there is no way
        # that we can know the filepath of the mapping file. We thus split the
        # command on the mapping file path and we check that the two parts
        # of the commands is correct
        obs_cmd_3a, obs_cmd_3b_temp = obs_cmds[2].split(' -m ', 1)
        obs_cmd_3b = obs_cmd_3b_temp.split(' ', 1)[1]
        self.assertEqual(obs_cmds[0], exp_cmd_1)
        self.assertEqual(obs_cmds[1], exp_cmd_2)
        self.assertEqual(obs_cmd_3a, exp_cmd_3a)
        self.assertEqual(obs_cmd_3b, exp_cmd_3b)
        self.assertEqual(obs_cmds[3], exp_cmd_4)
Example #25
0
 def remove_add_prep_template(self, fp_rpt, raw_data_id, study,
                              data_type_id, investigation_type):
     """add prep templates"""
     pt_id = PrepTemplate.create(load_template_to_dataframe(fp_rpt),
                                 RawData(raw_data_id), study,
                                 _to_int(data_type_id),
                                 investigation_type=investigation_type).id
     remove(fp_rpt)
     return pt_id
Example #26
0
 def test_remove_filepath(self):
     rd = RawData.create(self.filetype, self.studies, self.filepaths)
     fp = join(self.db_test_raw_dir, "3_%s" % basename(self.seqs_fp))
     rd.remove_filepath(fp)
     self.assertFalse(self.conn_handler.execute_fetchone(
         "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
         "WHERE filepath_id=17)")[0])
     self.assertTrue(self.conn_handler.execute_fetchone(
         "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
         "WHERE filepath_id=18)")[0])
Example #27
0
 def test_link_filepaths_status_setter(self):
     rd = RawData(1)
     self.assertEqual(rd.link_filepaths_status, 'idle')
     rd._set_link_filepaths_status('linking')
     self.assertEqual(rd.link_filepaths_status, 'linking')
     rd._set_link_filepaths_status('unlinking')
     self.assertEqual(rd.link_filepaths_status, 'unlinking')
     rd._set_link_filepaths_status('failed: error')
     self.assertEqual(rd.link_filepaths_status, 'failed: error')
Example #28
0
def get_raw_data_from_other_studies(user, study):
    """Retrieves a tuple of raw_data_id and the last study title for that
    raw_data
    """
    d = {}
    for sid in user.user_studies:
        if sid == study.id:
            continue
        for rdid in Study(sid).raw_data():
            d[int(rdid)] = Study(RawData(rdid).studies[-1]).title
    return d
Example #29
0
    def test_create(self):
        """Correctly creates all the rows in the DB for the raw data"""
        # Check that the returned object has the correct id
        exp_id = get_count("qiita.raw_data") + 1
        obs = RawData.create(self.filetype, self.prep_templates,
                             self.filepaths)
        self.assertEqual(obs.id, exp_id)

        # Check that the raw data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_data WHERE raw_data_id=%d" % exp_id)
        # raw_data_id, filetype, link_filepaths_status
        self.assertEqual(obs, [[exp_id, 2, 'idle']])

        # Check that the raw data has been correctly linked with the prep
        # templates
        sql = """SELECT prep_template_id
                 FROM qiita.prep_template
                 WHERE raw_data_id = %s
                 ORDER BY prep_template_id"""
        obs = self.conn_handler.execute_fetchall(sql, (exp_id,))
        self.assertEqual(obs, [[self.pt1.id], [self.pt2.id]])

        # Check that the files have been copied to right location
        exp_seqs_fp = join(self.db_test_raw_dir,
                           "%d_%s" % (exp_id, basename(self.seqs_fp)))
        self.assertTrue(exists(exp_seqs_fp))
        self._clean_up_files.append(exp_seqs_fp)

        exp_bc_fp = join(self.db_test_raw_dir,
                         "%d_%s" % (exp_id, basename(self.barcodes_fp)))
        self.assertTrue(exists(exp_bc_fp))
        self._clean_up_files.append(exp_bc_fp)

        # Check that the filepaths have been correctly added to the DB
        top_id = self.conn_handler.execute_fetchone(
            "SELECT count(1) FROM qiita.filepath")[0]
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=%d or "
            "filepath_id=%d" % (top_id - 1, top_id))
        exp_seqs_fp = "%d_%s" % (exp_id, basename(self.seqs_fp))
        exp_bc_fp = "%d_%s" % (exp_id, basename(self.barcodes_fp))
        # filepath_id, path, filepath_type_id
        exp = [[top_id - 1, exp_seqs_fp, 1, '852952723', 1, 5],
               [top_id, exp_bc_fp, 2, '852952723', 1, 5]]
        self.assertEqual(obs, exp)

        # Check that the raw data have been correctly linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=%d" % exp_id)
        # raw_data_id, filepath_id
        self.assertEqual(obs, [[exp_id, top_id - 1], [exp_id, top_id]])
Example #30
0
    def render(self, prep, study_id, is_editable, ena_terms, study_status,
               user_defined_terms):
        # Check if the request came from a local source
        is_local_request = self._is_local()

        prep_id = prep.id
        data_type = prep.data_type()
        raw_data = RawData(prep.raw_data)
        filepaths = prep.get_filepaths()
        investigation_type = prep.investigation_type
        preprocessed_data = prep.preprocessed_data
        preprocessing_status = prep.preprocessing_status

        if raw_data.filetype in ('SFF', 'FASTA'):
            param_iter = Preprocessed454Params.iter()
        elif raw_data.filetype == 'FASTQ':
            param_iter = PreprocessedIlluminaParams.iter()
        else:
            raise ValueError("Don't know what to do but this exception will "
                             "never actually get shown anywhere because why "
                             "would you want to see tracebacks?")

        preprocess_options = []
        for param in param_iter:
            text = ("<b>%s:</b> %s" % (k, v)
                    for k, v in viewitems(param.values))
            preprocess_options.append(
                (param.id, param.name, '<br>'.join(text)))

        # Unfortunately, both the prep template and the qiime mapping files
        # have the sample type. The way to differentiate them is if we have
        # the substring 'qiime' in the basename
        _fp_type = (lambda fp: "Qiime mapping"
                    if 'qiime' in basename(fp) else "Prep template")
        filepaths = [(id_, fp, _fp_type(fp)) for id_, fp in filepaths]

        return self.render_string(
            "study_description_templates/prep_template_panel.html",
            prep_id=prep_id,
            data_type=data_type,
            filepaths=filepaths,
            investigation_type=investigation_type,
            preprocessed_data=preprocessed_data,
            preprocessing_status=preprocessing_status,
            study_id=study_id,
            is_local_request=is_local_request,
            is_editable=is_editable,
            ena_terms=ena_terms,
            study_status=study_status,
            user_defined_terms=user_defined_terms,
            preprocess_options=preprocess_options)
Example #31
0
    def create_raw_data(self, study, user, callback):
        """Adds a (new) raw data to the study

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        msg = "Raw data successfully added"
        msg_level = "success"

        # Get the arguments needed to create a raw data object
        filetype = self.get_argument('filetype', None)
        previous_raw_data = self.get_argument('previous_raw_data', None)

        if filetype and previous_raw_data:
            # The user selected a filetype and an existing raw data
            msg = ("You can not specify both a new raw data and a previously "
                   "used one")
            msg_level = "danger"
        elif filetype:
            # We are creating a new raw data object
            try:
                rd_id = RawData.create(filetype, [study]).id
            except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                    QiitaDBDuplicateError, IOError, ValueError, KeyError,
                    CParserError) as e:
                msg = html_error_message % (
                    "creating a new raw data object for study:",
                    str(study.id), str(e))
                msg_level = "danger"
        elif previous_raw_data:
            previous_raw_data = previous_raw_data.split(',')
            raw_data = [RawData(rd) for rd in previous_raw_data]
            study.add_raw_data(raw_data)
            rd_id = raw_data[0].id
        else:
            # The user did not provide a filetype neither an existing raw data
            # If using the interface, we should never reach this if, but
            # better be safe than sorry
            msg = ("You should choose a filetype for a new raw data or "
                   "choose a raw data previously used")
            msg_level = "danger"
            rd_id = None

        callback((msg, msg_level, 'raw_data_tab', rd_id, None))
Example #32
0
    def remove_add_study_template(self, raw_data, study_id, fp_rsp):
        """Replace prep templates, raw data, and sample template with a new one
        """
        for rd in raw_data():
            rd = RawData(rd)
            for pt in rd.prep_templates:
                if PrepTemplate.exists(pt):
                    PrepTemplate.delete(pt)
        if SampleTemplate.exists(study_id):
            SampleTemplate.delete(study_id)

        SampleTemplate.create(load_template_to_dataframe(fp_rsp),
                              Study(study_id))
        remove(fp_rsp)
Example #33
0
    def setUp(self):
        metadata_dict = {
            'SKB8.640193': {'center_name': 'ANL',
                            'center_project_name': 'Test Project',
                            'ebi_submission_accession': None,
                            'EMP_status_id': 1,
                            'data_type_id': 2,
                            'str_column': 'Value for sample 1'},
            'SKD8.640184': {'center_name': 'ANL',
                            'center_project_name': 'Test Project',
                            'ebi_submission_accession': None,
                            'EMP_status_id': 1,
                            'data_type_id': 2,
                            'str_column': 'Value for sample 2'},
            'SKB7.640196': {'center_name': 'ANL',
                            'center_project_name': 'Test Project',
                            'ebi_submission_accession': None,
                            'EMP_status_id': 1,
                            'data_type_id': 2,
                            'str_column': 'Value for sample 3'}
            }
        self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
        self.test_raw_data = RawData(1)

        fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq')
        close(fd)
        filepaths = [(seqs_fp, 1), (barcodes_fp, 2)]
        with open(seqs_fp, "w") as f:
            f.write("\n")
        with open(barcodes_fp, "w") as f:
            f.write("\n")
        self.new_raw_data = RawData.create(2, filepaths, [Study(1)])
        db_test_raw_dir = join(get_db_files_base_dir(), 'raw_data')
        db_seqs_fp = join(db_test_raw_dir, "3_%s" % basename(seqs_fp))
        db_barcodes_fp = join(db_test_raw_dir, "3_%s" % basename(barcodes_fp))
        self._clean_up_files = [db_seqs_fp, db_barcodes_fp]

        self.tester = PrepTemplate(1)
        self.exp_sample_ids = {'SKB1.640202', 'SKB2.640194', 'SKB3.640195',
                               'SKB4.640189', 'SKB5.640181', 'SKB6.640176',
                               'SKB7.640196', 'SKB8.640193', 'SKB9.640200',
                               'SKD1.640179', 'SKD2.640178', 'SKD3.640198',
                               'SKD4.640185', 'SKD5.640186', 'SKD6.640190',
                               'SKD7.640191', 'SKD8.640184', 'SKD9.640182',
                               'SKM1.640183', 'SKM2.640199', 'SKM3.640197',
                               'SKM4.640180', 'SKM5.640177', 'SKM6.640187',
                               'SKM7.640188', 'SKM8.640201', 'SKM9.640192'}
Example #34
0
    def test_create(self):
        """Correctly creates all the rows in the DB for the raw data"""
        # Check that the returned object has the correct id
        exp_id = 1 + self.conn_handler.execute_fetchone(
            "SELECT count(1) from qiita.raw_data")[0]
        obs = RawData.create(self.filetype, self.studies, self.filepaths)
        self.assertEqual(obs.id, exp_id)

        # Check that the raw data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_data WHERE raw_data_id=%d" % exp_id)
        # raw_data_id, filetype, link_filepaths_status
        self.assertEqual(obs, [[exp_id, 2, 'idle']])

        # Check that the raw data have been correctly linked with the study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_raw_data WHERE raw_data_id=%d" % exp_id)
        # study_id , raw_data_id
        self.assertEqual(obs, [[1, exp_id]])

        # Check that the files have been copied to right location
        exp_seqs_fp = join(self.db_test_raw_dir,
                           "%d_%s" % (exp_id, basename(self.seqs_fp)))
        self.assertTrue(exists(exp_seqs_fp))
        self._clean_up_files.append(exp_seqs_fp)

        exp_bc_fp = join(self.db_test_raw_dir,
                         "%d_%s" % (exp_id, basename(self.barcodes_fp)))
        self.assertTrue(exists(exp_bc_fp))
        self._clean_up_files.append(exp_bc_fp)

        # Check that the filepaths have been correctly added to the DB
        top_id = self.conn_handler.execute_fetchone(
            "SELECT count(1) FROM qiita.filepath")[0]
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=%d or "
            "filepath_id=%d" % (top_id - 1, top_id))
        exp_seqs_fp = "%d_%s" % (exp_id, basename(self.seqs_fp))
        exp_bc_fp = "%d_%s" % (exp_id, basename(self.barcodes_fp))
        # filepath_id, path, filepath_type_id
        exp = [[top_id - 1, exp_seqs_fp, 1, '852952723', 1, 5],
               [top_id, exp_bc_fp, 2, '852952723', 1, 5]]
        self.assertEqual(obs, exp)

        # Check that the raw data have been correctly linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=%d" % exp_id)
        # raw_data_id, filepath_id
        self.assertEqual(obs, [[exp_id, top_id - 1], [exp_id, top_id]])
Example #35
0
 def test_remove_filepath(self):
     top_id = self.conn_handler.execute_fetchone(
         "SELECT count(1) FROM qiita.raw_filepath")[0]
     raw_id = self.conn_handler.execute_fetchone(
         "SELECT count(1) FROM qiita.raw_data")[0]
     rd = RawData.create(self.filetype, self.studies, self.filepaths)
     fp = join(self.db_test_raw_dir, "%d_%s" % (raw_id + 1,
                                                basename(self.seqs_fp)))
     rd.remove_filepath(fp)
     self.assertFalse(self.conn_handler.execute_fetchone(
         "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
         "WHERE filepath_id=%d)" % (top_id - 1))[0])
     self.assertTrue(self.conn_handler.execute_fetchone(
         "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
         "WHERE filepath_id=%d)" % (top_id - 2))[0])
Example #36
0
 def test_remove_filepath(self):
     top_id = self.conn_handler.execute_fetchone(
         "SELECT count(1) FROM qiita.raw_filepath")[0]
     raw_id = self.conn_handler.execute_fetchone(
         "SELECT count(1) FROM qiita.raw_data")[0]
     rd = RawData.create(self.filetype, self.studies, self.filepaths)
     fp = join(self.db_test_raw_dir, "%d_%s" % (raw_id + 1,
                                                basename(self.seqs_fp)))
     rd.remove_filepath(fp)
     self.assertFalse(self.conn_handler.execute_fetchone(
         "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
         "WHERE filepath_id=%d)" % (top_id - 1))[0])
     self.assertTrue(self.conn_handler.execute_fetchone(
         "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
         "WHERE filepath_id=%d)" % (top_id - 2))[0])
Example #37
0
    def test_get_preprocess_fastq_cmd_per_sample_FASTQ(self):
        metadata_dict = {
            'SKB8.640193': {'run_prefix': "sample1", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'},
            'SKD8.640184': {'run_prefix': "sample2", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'}}
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, Study(1), '16S')

        fp1 = self.path_builder('sample1.fastq')
        with open(fp1, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp1)
        fp2 = self.path_builder('sample2.fastq.gz')
        with open(fp2, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp2)
        filepath_id = convert_to_id('raw_forward_seqs', 'filepath_type')

        fps = [(fp1, filepath_id), (fp2, filepath_id)]

        filetype_id = get_filetypes()['per_sample_FASTQ']
        raw_data = RawData.create(filetype_id, [prep_template], fps)
        params = [p for p in list(PreprocessedIlluminaParams.iter())
                  if p.name == 'per sample FASTQ defaults'][0]

        obs_cmd, obs_output_dir = _get_preprocess_fastq_cmd(raw_data,
                                                            prep_template,
                                                            params)

        raw_fps = ','.join([fp for _, fp, _ in
                            sorted(raw_data.get_filepaths())])
        exp_cmd = (
            "split_libraries_fastq.py --store_demultiplexed_fastq -i "
            "{} --sample_ids 1.SKB8.640193,1.SKD8.640184 -o {} --barcode_type "
            "not-barcoded --max_bad_run_length 3 --max_barcode_errors 1.5 "
            "--min_per_read_length_fraction 0.75 --phred_quality_threshold 3 "
            "--sequence_max_n 0").format(raw_fps, obs_output_dir)
        self.assertEqual(obs_cmd, exp_cmd)
Example #38
0
    def test_create(self):
        """Correctly creates all the rows in the DB for the raw data"""
        # Check that the returned object has the correct id
        obs = RawData.create(self.filetype, self.studies, self.filepaths)
        self.assertEqual(obs.id, 3)

        # Check that the raw data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_data WHERE raw_data_id=3")
        # raw_data_id, filetype, link_filepaths_status
        self.assertEqual(obs, [[3, 2, 'idle']])

        # Check that the raw data have been correctly linked with the study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_raw_data WHERE raw_data_id=3")
        # study_id , raw_data_id
        self.assertEqual(obs, [[1, 3]])

        # Check that the files have been copied to right location
        exp_seqs_fp = join(self.db_test_raw_dir,
                           "3_%s" % basename(self.seqs_fp))
        self.assertTrue(exists(exp_seqs_fp))
        self._clean_up_files.append(exp_seqs_fp)

        exp_bc_fp = join(self.db_test_raw_dir,
                         "3_%s" % basename(self.barcodes_fp))
        self.assertTrue(exists(exp_bc_fp))
        self._clean_up_files.append(exp_bc_fp)

        # Check that the filepaths have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=17 or "
            "filepath_id=18")
        exp_seqs_fp = "3_%s" % basename(self.seqs_fp)
        exp_bc_fp = "3_%s" % basename(self.barcodes_fp)
        # filepath_id, path, filepath_type_id
        exp = [[17, exp_seqs_fp, 1, '852952723', 1, 5],
               [18, exp_bc_fp, 2, '852952723', 1, 5]]
        self.assertEqual(obs, exp)

        # Check that the raw data have been correctly linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=3")
        # raw_data_id, filepath_id
        self.assertEqual(obs, [[3, 17], [3, 18]])
Example #39
0
    def test_clear_filepaths(self):
        rd = RawData.create(self.filetype, self.studies, self.filepaths)
        self.assertTrue(self.conn_handler.execute_fetchone(
            "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
            "WHERE raw_data_id=%s)", (rd.id,))[0])

        # add files to clean before cleaning the filepaths
        study_id = rd.studies[0]
        path_for_removal = join(get_mountpoint("uploads")[0][1], str(study_id))
        self._clean_up_files = [join(path_for_removal,
                                     basename(f).split('_', 1)[1])
                                for _, f, _ in rd.get_filepaths()]

        # cleaning the filepaths
        rd.clear_filepaths()
        self.assertFalse(self.conn_handler.execute_fetchone(
            "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
            "WHERE raw_data_id=%s)", (rd.id,))[0])
Example #40
0
    def test_clear_filepaths(self):
        rd = RawData.create(self.filetype, self.studies, self.filepaths)
        self.assertTrue(self.conn_handler.execute_fetchone(
            "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
            "WHERE raw_data_id=%s)", (rd.id,))[0])

        # add files to clean before cleaning the filepaths
        study_id = rd.studies[0]
        path_for_removal = join(get_mountpoint("uploads")[0][1], str(study_id))
        self._clean_up_files = [join(path_for_removal,
                                     basename(f).split('_', 1)[1])
                                for _, f, _ in rd.get_filepaths()]

        # cleaning the filepaths
        rd.clear_filepaths()
        self.assertFalse(self.conn_handler.execute_fetchone(
            "SELECT EXISTS(SELECT * FROM qiita.raw_filepath "
            "WHERE raw_data_id=%s)", (rd.id,))[0])
Example #41
0
    def test_delete(self):
        rd = RawData.create(self.filetype, self.prep_templates,
                            self.filepaths)

        sql_pt = """SELECT prep_template_id
                    FROM qiita.prep_template
                    WHERE raw_data_id = %s
                    ORDER BY prep_template_id"""
        obs = self.conn_handler.execute_fetchall(sql_pt, (rd.id,))
        self.assertEqual(obs, [[self.pt1.id], [self.pt2.id]])

        # This delete call will only unlink the raw data from the prep template
        RawData.delete(rd.id, self.pt2.id)

        # Check that it successfully unlink the raw data from pt2
        obs = self.conn_handler.execute_fetchall(sql_pt, (rd.id,))
        self.assertEqual(obs, [[self.pt1.id]])
        self.assertEqual(self.pt2.raw_data, None)

        # If we try to remove the RawData now, it should raise an error
        # because it still has files attached to it
        with self.assertRaises(QiitaDBError):
            RawData.delete(rd.id, self.pt1.id)

        # Clear the files so we can actually remove the RawData
        study_id = rd.studies[0]
        path_for_removal = join(get_mountpoint("uploads")[0][1], str(study_id))
        self._clean_up_files.extend([join(path_for_removal,
                                     basename(f).split('_', 1)[1])
                                    for _, f, _ in rd.get_filepaths()])
        rd.clear_filepaths()

        RawData.delete(rd.id, self.pt1.id)
        obs = self.conn_handler.execute_fetchall(sql_pt, (rd.id,))
        self.assertEqual(obs, [])

        # Check that all expected rows have been deleted
        sql = """SELECT EXISTS(
                    SELECT * FROM qiita.raw_filepath
                    WHERE raw_data_id = %s)"""
        self.assertFalse(self.conn_handler.execute_fetchone(sql, (rd.id,))[0])

        sql = """SELECT EXISTS(
                    SELECT * FROM qiita.raw_data
                    WHERE raw_data_id=%s)"""
        self.assertFalse(self.conn_handler.execute_fetchone(sql, (rd.id,))[0])
Example #42
0
    def test_create(self):
        """Correctly creates all the rows in the DB for the raw data"""
        # Check that the returned object has the correct id
        obs = RawData.create(self.filetype, self.filepaths, self.studies)
        self.assertEqual(obs.id, 3)

        # Check that the raw data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_data WHERE raw_data_id=3")
        # raw_data_id, filetype, submitted_to_insdc
        self.assertEqual(obs, [[3, 2]])

        # Check that the raw data have been correctly linked with the study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_raw_data WHERE raw_data_id=3")
        # study_id , raw_data_id
        self.assertEqual(obs, [[1, 3]])

        # Check that the files have been copied to right location
        exp_seqs_fp = join(self.db_test_raw_dir,
                           "3_%s" % basename(self.seqs_fp))
        self.assertTrue(exists(exp_seqs_fp))
        self._clean_up_files.append(exp_seqs_fp)

        exp_bc_fp = join(self.db_test_raw_dir,
                         "3_%s" % basename(self.barcodes_fp))
        self.assertTrue(exists(exp_bc_fp))
        self._clean_up_files.append(exp_bc_fp)

        # Check that the filepaths have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=15 or "
            "filepath_id=16")
        # filepath_id, path, filepath_type_id
        exp = [[15, exp_seqs_fp, 1, '852952723', 1],
               [16, exp_bc_fp, 2, '852952723', 1]]
        self.assertEqual(obs, exp)

        # Check that the raw data have been correctly linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=3")
        # raw_data_id, filepath_id
        self.assertEqual(obs, [[3, 15], [3, 16]])
Example #43
0
    def setUp(self):
        self.raw_data = RawData(1)
        self.study = Study(1)
        self.params_table = "preprocessed_sequence_illumina_params"
        self.params_id = 1
        fd, self.fna_fp = mkstemp(suffix='_seqs.fna')
        close(fd)
        fd, self.qual_fp = mkstemp(suffix='_seqs.qual')
        close(fd)
        self.filepaths = [(self.fna_fp, 4), (self.qual_fp, 5)]
        self.db_test_ppd_dir = join(get_db_files_base_dir(),
                                    'preprocessed_data')
        self.ebi_submission_accession = "EBI123456-A"
        self.ebi_study_accession = "EBI123456-B"

        with open(self.fna_fp, "w") as f:
            f.write("\n")
        with open(self.qual_fp, "w") as f:
            f.write("\n")
        self._clean_up_files = []
Example #44
0
    def setUp(self):
        # Create a sample template file
        fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq')
        close(fd)

        with open(seqs_fp, "w") as f:
            f.write("\n")
        with open(barcodes_fp, "w") as f:
            f.write("\n")

        self.pt_contents = PREP_TEMPLATE

        self.raw_data = RawData.create(
            2, [(seqs_fp, 1), (barcodes_fp, 2)], [Study(1)])

        join_f = partial(join, join(get_db_files_base_dir(), 'raw_data'))
        self.files_to_remove = [
            join_f("%s_%s" % (self.raw_data.id, basename(seqs_fp))),
            join_f("%s_%s" % (self.raw_data.id, basename(barcodes_fp)))]
Example #45
0
    def test_create_no_filepaths(self):
        """Correctly creates a raw data object with no filepaths attached"""
        # Check that the returned object has the correct id
        obs = RawData.create(self.filetype, self.studies)
        self.assertEqual(obs.id, 3)

        # Check that the raw data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_data WHERE raw_data_id=3")
        # raw_data_id, filetype, link_filepaths_status
        self.assertEqual(obs, [[3, 2, 'idle']])

        # Check that the raw data have been correctly linked with the study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_raw_data WHERE raw_data_id=3")
        # study_id , raw_data_id
        self.assertEqual(obs, [[1, 3]])

        # Check that no files have been linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=3")
        self.assertEqual(obs, [])
Example #46
0
    def test_move_filepaths_to_upload_folder(self):
        # setting up test, done here as this is the only test that uses these
        # files
        fd, seqs_fp = mkstemp(suffix="_seqs.fastq")
        close(fd)
        st = Study(1)
        metadata_dict = {
            "SKB8.640193": {
                "center_name": "ANL",
                "primer": "GTGCCAGCMGCCGCGGTAA",
                "barcode": "GTCCGCAAGTTA",
                "run_prefix": "s_G1_L001_sequences",
                "platform": "ILLUMINA",
                "library_construction_protocol": "AAAA",
                "experiment_design_description": "BBBB",
            }
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient="index")
        pt = PrepTemplate.create(metadata, Study(1), "16S")

        rd = RawData.create(2, [pt], [(seqs_fp, 1)])
        filepaths = rd.get_filepaths()
        # deleting reference so we can directly call
        # move_filepaths_to_upload_folder
        for fid, _, _ in filepaths:
            self.conn_handler.execute("DELETE FROM qiita.raw_filepath WHERE filepath_id=%s", (fid,))

        # moving filepaths
        move_filepaths_to_upload_folder(st.id, filepaths)

        # check that they do not exist in the old path but do in the new one
        path_for_removal = join(get_mountpoint("uploads")[0][1], str(st.id))
        for _, fp, _ in filepaths:
            self.assertFalse(exists(fp))
            new_fp = join(path_for_removal, basename(fp).split("_", 1)[1])
            self.assertTrue(exists(new_fp))

            self.files_to_remove.append(new_fp)
    def post(self):
        study_id = int(self.get_argument('study_id'))
        prep_template_id = int(self.get_argument('prep_template_id'))
        raw_data = RawData(PrepTemplate(prep_template_id).raw_data)
        param_id = int(self.get_argument('preprocessing_parameters_id'))

        # Get the preprocessing parameters
        if raw_data.filetype == 'FASTQ':
            param_constructor = PreprocessedIlluminaParams
        elif raw_data.filetype in ('FASTA', 'SFF'):
            param_constructor = Preprocessed454Params
        else:
            raise ValueError('Unknown filetype')

        job_id = submit(self.current_user.id, preprocessor, study_id,
                        prep_template_id, param_id, param_constructor)

        self.render('compute_wait.html',
                    job_id=job_id, title='Preprocessing',
                    completion_redirect='/study/description/%d?top_tab='
                                        'raw_data_tab&sub_tab=%s&prep_tab=%s'
                                        % (study_id, raw_data.id,
                                           prep_template_id))
Example #48
0
    def test_create_no_filepaths(self):
        """Correctly creates a raw data object with no filepaths attached"""
        # Check that the returned object has the correct id
        exp_id = 1 + self.conn_handler.execute_fetchone(
            "SELECT count(1) from qiita.raw_data")[0]
        obs = RawData.create(self.filetype, self.studies)
        self.assertEqual(obs.id, exp_id)

        # Check that the raw data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_data WHERE raw_data_id=%d" % exp_id)
        # raw_data_id, filetype, link_filepaths_status
        self.assertEqual(obs, [[exp_id, 2, 'idle']])

        # Check that the raw data have been correctly linked with the study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_raw_data WHERE raw_data_id=%d" % exp_id)
        # study_id , raw_data_id
        self.assertEqual(obs, [[1, exp_id]])

        # Check that no files have been linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=%d" % exp_id)
        self.assertEqual(obs, [])
Example #49
0
 def test_init_error_inexistent(self):
     """Raises an error when instantiating an object that does not exists"""
     with self.assertRaises(QiitaDBUnknownIDError):
         RawData(10)
Example #50
0
         FROM qiita.raw_data
         WHERE raw_data_id NOT IN (
            SELECT DISTINCT raw_data_id FROM qiita.prep_template);"""
rd_ids = [x[0] for x in conn_handler.execute_fetchall(sql)]

# We will delete those RawData. However, if they have files attached, we should
# move them to the uploads folder of the study
sql_detach = """DELETE FROM qiita.study_raw_data
                WHERE raw_data_id = %s AND study_id = %s"""
sql_unlink = "DELETE FROM qiita.raw_filepath WHERE raw_data_id = %s"
sql_delete = "DELETE FROM qiita.raw_data WHERE raw_data_id = %s"
sql_studies = """SELECT study_id FROM qiita.study_raw_data
                 WHERE raw_data_id = %s"""
move_files = []
for rd_id in rd_ids:
    rd = RawData(rd_id)
    filepaths = rd.get_filepaths()
    studies = [s[0] for s in conn_handler.execute_fetchall(sql_studies,
                                                           (rd_id,))]
    if filepaths:
        # we need to move the files to a study. We chose the one with lower
        # study id. Currently there is no case in the live database in which a
        # RawData with no prep templates is attached to more than one study,
        # but I think it is better to normalize this just in case
        move_files.append((min(studies), filepaths))

    # To delete the RawData we first need to unlink all the files
    conn_handler.add_to_queue(queue, sql_unlink, (rd_id,))

    # Then, remove the raw data from all the studies
    for st_id in studies:
Example #51
0
 def test_not_equal(self):
     """Not equals works with object of the same type"""
     new = RawData(2)
     self.assertNotEqual(self.tester, new)
Example #52
0
 def test_equal(self):
     """Equality works with two objects pointing to the same instance"""
     new = RawData(1)
     self.assertEqual(self.tester, new)
Example #53
0
 def test_link_filepaths_status_setter_error(self):
     rd = RawData(1)
     with self.assertRaises(ValueError):
         rd._set_link_filepaths_status('not a valid status')
    def test_get_qiime_minimal_mapping_multiple(self):
        # We need to create a prep template in which we have different run
        # prefix values, so we can test this case
        metadata_dict = {
            'SKB8.640193': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status': 'EMP',
                'str_column': 'Value for sample 1',
                'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA',
                'barcodesequence': 'GTCCGCAAGTTA',
                'run_prefix': "s_G1_L001_sequences",
                'platform': 'ILLUMINA',
                'library_construction_protocol': 'AAA',
                'experiment_design_description': 'BBB'
            },
            'SKD8.640184': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status': 'EMP',
                'str_column': 'Value for sample 2',
                'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA',
                'barcodesequence': 'CGTAGAGCTCTC',
                'run_prefix': "s_G1_L001_sequences",
                'platform': 'ILLUMINA',
                'library_construction_protocol': 'AAA',
                'experiment_design_description': 'BBB'
            },
            'SKB7.640196': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status': 'EMP',
                'str_column': 'Value for sample 3',
                'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA',
                'barcodesequence': 'CCTCTGAGAGCT',
                'run_prefix': "s_G1_L002_sequences",
                'platform': 'ILLUMINA',
                'library_construction_protocol': 'AAA',
                'experiment_design_description': 'BBB'
            }
        }
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, RawData(2), Study(1),
                                            '16S')

        out_dir = mkdtemp()

        obs_fps = sorted(_get_qiime_minimal_mapping(prep_template, out_dir))
        exp_fps = sorted([
            join(out_dir, 's_G1_L001_sequences_MMF.txt'),
            join(out_dir, 's_G1_L002_sequences_MMF.txt')
        ])

        # Check that the returned list is as expected
        self.assertEqual(obs_fps, exp_fps)
        # Check that the file exists
        for fp in exp_fps:
            self.assertTrue(exists(fp))
        # Check the contents of the file
        for fp, contents in zip(exp_fps, [EXP_PREP_1, EXP_PREP_2]):
            with open(fp, "U") as f:
                self.assertEqual(f.read(), contents)
Example #55
0
 def test_data_types_id(self):
     """Correctly returns the data_types of raw_data"""
     rd = RawData(1)
     self.assertEqual(rd.data_types(ret_id=True), [2])
Example #56
0
 def test_data_types(self):
     """Correctly returns the data_types of raw_data"""
     rd = RawData(1)
     self.assertEqual(rd.data_types(), ["18S"])
Example #57
0
    def test_delete(self):
        # the raw data doesn't exist
        with self.assertRaises(QiitaDBUnknownIDError):
            RawData.delete(1000, 1)

        # the raw data and the study id are not linked or
        # the study doesn't exits
        with self.assertRaises(QiitaDBError):
            RawData.delete(1, 1000)

        # the raw data has prep templates
        with self.assertRaises(QiitaDBError):
            RawData.delete(1, 1)

        # the raw data has linked files
        with self.assertRaises(QiitaDBError):
            RawData.delete(3, 1)

        # the raw data is linked to a study that has not prep templates
        Study(2).add_raw_data([RawData(1)])
        RawData.delete(1, 2)

        # delete raw data
        self.assertTrue(RawData.exists(2))
        RawData.delete(2, 1)
        self.assertFalse(RawData.exists(2))