コード例 #1
0
ファイル: test_study.py プロジェクト: jwdebelius/qiita
 def test_add_raw_data(self):
     self._make_sandbox()
     new = Study.create(
         User('*****@*****.**'), 'NOT Identification of the '
         'Microbiomes for Cannabis Soils', [1], self.info)
     new.add_raw_data([RawData(1), RawData(2)])
     obs = self.conn_handler.execute_fetchall(
         "SELECT * FROM qiita.study_raw_data WHERE study_id=%s", (new.id, ))
     self.assertEqual(obs, [[new.id, 1], [new.id, 2]])
コード例 #2
0
ファイル: test_data.py プロジェクト: jwdebelius/qiita
    def test_remove_filepath_errors(self):
        fp = join(self.db_test_raw_dir, '1_s_G1_L001_sequences.fastq.gz')
        with self.assertRaises(QiitaDBError):
            RawData(1).remove_filepath(fp)

        # filepath doesn't belong to that raw data
        with self.assertRaises(ValueError):
            RawData(2).remove_filepath(fp)

        # the raw data has been linked to more than 1 study so it can't be
        # unliked
        Study(2).add_raw_data([RawData(2)])
        with self.assertRaises(QiitaDBError):
            RawData(2).remove_filepath(fp)
コード例 #3
0
ファイル: test_study.py プロジェクト: jwdebelius/qiita
 def test_add_raw_data_private(self):
     new = Study.create(
         User('*****@*****.**'), 'NOT Identification of the '
         'Microbiomes for Cannabis Soils', [1], self.info)
     new.status = 'private'
     with self.assertRaises(QiitaDBStatusError):
         new.add_raw_data([RawData(2)])
コード例 #4
0
    def test_get_preprocess_fastq_cmd(self):
        raw_data = RawData(1)
        params = PreprocessedIlluminaParams(1)
        prep_template = PrepTemplate(1)
        obs_cmd, obs_output_dir = _get_preprocess_fastq_cmd(
            raw_data, prep_template, params)

        get_raw_path = partial(join, self.db_dir, 'raw_data')
        seqs_fp = get_raw_path('1_s_G1_L001_sequences.fastq.gz')
        bc_fp = get_raw_path('1_s_G1_L001_sequences_barcodes.fastq.gz')

        exp_cmd_1 = ("split_libraries_fastq.py --store_demultiplexed_fastq -i "
                     "{} -b {} "
                     "-m ".format(seqs_fp, bc_fp))
        exp_cmd_2 = ("-o {0} --barcode_type golay_12 --max_bad_run_length 3 "
                     "--max_barcode_errors 1.5 "
                     "--min_per_read_length_fraction 0.75 "
                     "--phred_quality_threshold 3 "
                     "--sequence_max_n 0".format(obs_output_dir))

        # We are splitting the command into two parts because there is no way
        # that we can know the filepath of the mapping file. We thus split the
        # command on the mapping file path and we check that the two parts
        # of the commands is correct
        obs_cmd_1 = obs_cmd[:len(exp_cmd_1)]
        obs_cmd_2 = obs_cmd[len(exp_cmd_1):].split(" ", 1)[1]

        self.assertEqual(obs_cmd_1, exp_cmd_1)
        self.assertEqual(obs_cmd_2, exp_cmd_2)
コード例 #5
0
ファイル: dispatchable.py プロジェクト: jwdebelius/qiita
def add_files_to_raw_data(raw_data_id, filepaths):
    """Add files to raw data

    Needs to be dispachable because it moves large files
    """
    rd = RawData(raw_data_id)
    rd.add_filepaths(filepaths)
コード例 #6
0
ファイル: test_data.py プロジェクト: jwdebelius/qiita
    def test_delete(self):
        # the raw data doesn't exist
        with self.assertRaises(QiitaDBUnknownIDError):
            RawData.delete(1000, 1)

        # the raw data and the study id are not linked or
        # the study doesn't exits
        with self.assertRaises(QiitaDBError):
            RawData.delete(1, 1000)

        # the raw data has prep templates
        with self.assertRaises(QiitaDBError):
            RawData.delete(1, 1)

        # the raw data has linked files
        with self.assertRaises(QiitaDBError):
            RawData.delete(3, 1)

        # the raw data is linked to a study that has not prep templates
        Study(2).add_raw_data([RawData(1)])
        RawData.delete(1, 2)

        # delete raw data
        self.assertTrue(RawData.exists(2))
        RawData.delete(2, 1)
        self.assertFalse(RawData.exists(2))
コード例 #7
0
ファイル: dispatchable.py プロジェクト: jwdebelius/qiita
def unlink_all_files(raw_data_id):
    """Removes all files from raw data

    Needs to be dispachable because it does I/O and a lot of DB calls
    """
    rd = RawData(raw_data_id)
    rd.clear_filepaths()
コード例 #8
0
ファイル: test_data.py プロジェクト: jwdebelius/qiita
 def test_link_filepaths_status_setter(self):
     rd = RawData(1)
     self.assertEqual(rd.link_filepaths_status, 'idle')
     rd._set_link_filepaths_status('linking')
     self.assertEqual(rd.link_filepaths_status, 'linking')
     rd._set_link_filepaths_status('unlinking')
     self.assertEqual(rd.link_filepaths_status, 'unlinking')
     rd._set_link_filepaths_status('failed: error')
     self.assertEqual(rd.link_filepaths_status, 'failed: error')
コード例 #9
0
 def remove_add_prep_template(self, fp_rpt, raw_data_id, study,
                              data_type_id, investigation_type):
     """add prep templates"""
     pt_id = PrepTemplate.create(load_template_to_dataframe(fp_rpt),
                                 RawData(raw_data_id), study,
                                 _to_int(data_type_id),
                                 investigation_type=investigation_type).id
     remove(fp_rpt)
     return pt_id
コード例 #10
0
    def test_get_preprocess_fasta_cmd_sff(self):
        raw_data = RawData(3)
        params = Preprocessed454Params(1)
        prep_template = PrepTemplate(1)
        obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
            raw_data, prep_template, params)

        get_raw_path = partial(join, self.db_dir, 'raw_data')
        seqs_fp = [
            get_raw_path('preprocess_test1.sff'),
            get_raw_path('preprocess_test2.sff')
        ]

        exp_cmd_1 = ' '.join(
            ["process_sff.py",
             "-i %s" % seqs_fp[0],
             "-o %s" % obs_output_dir])
        exp_cmd_2 = ' '.join(
            ["process_sff.py",
             "-i %s" % seqs_fp[1],
             "-o %s" % obs_output_dir])

        fasta_files = ','.join([
            join(obs_output_dir, "preprocess_test1.fna"),
            join(obs_output_dir, "preprocess_test2.fna")
        ])
        qual_files = ','.join([
            join(obs_output_dir, "preprocess_test1.qual"),
            join(obs_output_dir, "preprocess_test2.qual")
        ])
        exp_cmd_3a = ' '.join(["split_libraries.py", "-f %s" % fasta_files])

        exp_cmd_3b = ' '.join([
            "-q %s" % qual_files, "-d",
            "-o %s" % obs_output_dir,
            params.to_str()
        ])
        exp_cmd_4 = ' '.join([
            "convert_fastaqual_fastq.py",
            "-f %s/seqs.fna" % obs_output_dir,
            "-q %s/seqs_filtered.qual" % obs_output_dir,
            "-o %s" % obs_output_dir, "-F"
        ])

        obs_cmds = obs_cmd.split('; ')

        # We are splitting the command into two parts because there is no way
        # that we can know the filepath of the mapping file. We thus split the
        # command on the mapping file path and we check that the two parts
        # of the commands is correct
        obs_cmd_3a, obs_cmd_3b_temp = obs_cmds[2].split(' -m ', 1)
        obs_cmd_3b = obs_cmd_3b_temp.split(' ', 1)[1]
        self.assertEqual(obs_cmds[0], exp_cmd_1)
        self.assertEqual(obs_cmds[1], exp_cmd_2)
        self.assertEqual(obs_cmd_3a, exp_cmd_3a)
        self.assertEqual(obs_cmd_3b, exp_cmd_3b)
        self.assertEqual(obs_cmds[3], exp_cmd_4)
コード例 #11
0
 def test_get_filepaths(self):
     """Correctly returns the filepaths to the raw files"""
     rd = RawData(1)
     obs = rd.get_filepaths()
     exp = [(join(self.db_test_raw_dir,
                  '1_s_G1_L001_sequences.fastq.gz'), "raw_sequences"),
            (join(self.db_test_raw_dir,
                  '1_s_G1_L001_sequences_barcodes.fastq.gz'),
             "raw_barcodes")]
     self.assertEqual(obs, exp)
コード例 #12
0
    def setUp(self):
        metadata_dict = {
            'SKB8.640193': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status_id': 1,
                'data_type_id': 2,
                'str_column': 'Value for sample 1'
            },
            'SKD8.640184': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status_id': 1,
                'data_type_id': 2,
                'str_column': 'Value for sample 2'
            },
            'SKB7.640196': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status_id': 1,
                'data_type_id': 2,
                'str_column': 'Value for sample 3'
            }
        }
        self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
        self.test_raw_data = RawData(1)

        fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq')
        close(fd)
        filepaths = [(seqs_fp, 1), (barcodes_fp, 2)]
        with open(seqs_fp, "w") as f:
            f.write("\n")
        with open(barcodes_fp, "w") as f:
            f.write("\n")
        self.new_raw_data = RawData.create(2, filepaths, [Study(1)])
        db_test_raw_dir = join(get_db_files_base_dir(), 'raw_data')
        db_seqs_fp = join(db_test_raw_dir, "3_%s" % basename(seqs_fp))
        db_barcodes_fp = join(db_test_raw_dir, "3_%s" % basename(barcodes_fp))
        self._clean_up_files = [db_seqs_fp, db_barcodes_fp]

        self.tester = PrepTemplate(1)
        self.exp_sample_ids = {
            'SKB1.640202', 'SKB2.640194', 'SKB3.640195', 'SKB4.640189',
            'SKB5.640181', 'SKB6.640176', 'SKB7.640196', 'SKB8.640193',
            'SKB9.640200', 'SKD1.640179', 'SKD2.640178', 'SKD3.640198',
            'SKD4.640185', 'SKD5.640186', 'SKD6.640190', 'SKD7.640191',
            'SKD8.640184', 'SKD9.640182', 'SKM1.640183', 'SKM2.640199',
            'SKM3.640197', 'SKM4.640180', 'SKM5.640177', 'SKM6.640187',
            'SKM7.640188', 'SKM8.640201', 'SKM9.640192'
        }
コード例 #13
0
def get_raw_data_from_other_studies(user, study):
    """Retrieves a tuple of raw_data_id and the last study title for that
    raw_data
    """
    d = {}
    for sid in user.user_studies:
        if sid == study.id:
            continue
        for rdid in Study(sid).raw_data():
            d[int(rdid)] = Study(RawData(rdid).studies[-1]).title
    return d
コード例 #14
0
    def create_raw_data(self, study, user, callback):
        """Adds a (new) raw data to the study

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        msg = "Raw data successfully added"
        msg_level = "success"

        # Get the arguments needed to create a raw data object
        filetype = self.get_argument('filetype', None)
        previous_raw_data = self.get_argument('previous_raw_data', None)

        if filetype and previous_raw_data:
            # The user selected a filetype and an existing raw data
            msg = ("You can not specify both a new raw data and a previously "
                   "used one")
            msg_level = "danger"
        elif filetype:
            # We are creating a new raw data object
            try:
                rd_id = RawData.create(filetype, [study]).id
            except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                    QiitaDBDuplicateError, IOError, ValueError, KeyError,
                    CParserError) as e:
                msg = html_error_message % (
                    "creating a new raw data object for study:",
                    str(study.id), str(e))
                msg_level = "danger"
        elif previous_raw_data:
            previous_raw_data = previous_raw_data.split(',')
            raw_data = [RawData(rd) for rd in previous_raw_data]
            study.add_raw_data(raw_data)
            rd_id = raw_data[0].id
        else:
            # The user did not provide a filetype neither an existing raw data
            # If using the interface, we should never reach this if, but
            # better be safe than sorry
            msg = ("You should choose a filetype for a new raw data or "
                   "choose a raw data previously used")
            msg_level = "danger"
            rd_id = None

        callback((msg, msg_level, 'raw_data_tab', rd_id, None))
コード例 #15
0
    def render(self, prep, study_id, is_editable, ena_terms, study_status,
               user_defined_terms):
        # Check if the request came from a local source
        is_local_request = self._is_local()

        prep_id = prep.id
        data_type = prep.data_type()
        raw_data = RawData(prep.raw_data)
        filepaths = prep.get_filepaths()
        investigation_type = prep.investigation_type
        preprocessed_data = prep.preprocessed_data
        preprocessing_status = prep.preprocessing_status

        if raw_data.filetype in ('SFF', 'FASTA'):
            param_iter = Preprocessed454Params.iter()
        elif raw_data.filetype == 'FASTQ':
            param_iter = PreprocessedIlluminaParams.iter()
        else:
            raise ValueError("Don't know what to do but this exception will "
                             "never actually get shown anywhere because why "
                             "would you want to see tracebacks?")

        preprocess_options = []
        for param in param_iter:
            text = ("<b>%s:</b> %s" % (k, v)
                    for k, v in viewitems(param.values))
            preprocess_options.append(
                (param.id, param.name, '<br>'.join(text)))

        # Unfortunately, both the prep template and the qiime mapping files
        # have the sample type. The way to differentiate them is if we have
        # the substring 'qiime' in the basename
        _fp_type = (lambda fp: "Qiime mapping"
                    if 'qiime' in basename(fp) else "Prep template")
        filepaths = [(id_, fp, _fp_type(fp)) for id_, fp in filepaths]

        return self.render_string(
            "study_description_templates/prep_template_panel.html",
            prep_id=prep_id,
            data_type=data_type,
            filepaths=filepaths,
            investigation_type=investigation_type,
            preprocessed_data=preprocessed_data,
            preprocessing_status=preprocessing_status,
            study_id=study_id,
            is_local_request=is_local_request,
            is_editable=is_editable,
            ena_terms=ena_terms,
            study_status=study_status,
            user_defined_terms=user_defined_terms,
            preprocess_options=preprocess_options)
コード例 #16
0
    def remove_add_study_template(self, raw_data, study_id, fp_rsp):
        """Replace prep templates, raw data, and sample template with a new one
        """
        for rd in raw_data():
            rd = RawData(rd)
            for pt in rd.prep_templates:
                if PrepTemplate.exists(pt):
                    PrepTemplate.delete(pt)
        if SampleTemplate.exists(study_id):
            SampleTemplate.delete(study_id)

        SampleTemplate.create(load_template_to_dataframe(fp_rsp),
                              Study(study_id))
        remove(fp_rsp)
コード例 #17
0
    def setUp(self):
        self.raw_data = RawData(1)
        self.study = Study(1)
        self.params_table = "preprocessed_sequence_illumina_params"
        self.params_id = 1
        fd, self.fna_fp = mkstemp(suffix='_seqs.fna')
        close(fd)
        fd, self.qual_fp = mkstemp(suffix='_seqs.qual')
        close(fd)
        self.filepaths = [(self.fna_fp, 4), (self.qual_fp, 5)]
        self.db_test_ppd_dir = join(get_db_files_base_dir(),
                                    'preprocessed_data')
        self.ebi_submission_accession = "EBI123456-A"
        self.ebi_study_accession = "EBI123456-B"

        with open(self.fna_fp, "w") as f:
            f.write("\n")
        with open(self.qual_fp, "w") as f:
            f.write("\n")
        self._clean_up_files = []
コード例 #18
0
    def post(self):
        study_id = int(self.get_argument('study_id'))
        prep_template_id = int(self.get_argument('prep_template_id'))
        raw_data = RawData(PrepTemplate(prep_template_id).raw_data)
        param_id = int(self.get_argument('preprocessing_parameters_id'))

        # Get the preprocessing parameters
        if raw_data.filetype == 'FASTQ':
            param_constructor = PreprocessedIlluminaParams
        elif raw_data.filetype in ('FASTA', 'SFF'):
            param_constructor = Preprocessed454Params
        else:
            raise ValueError('Unknown filetype')

        job_id = submit(self.current_user.id, preprocessor, study_id,
                        prep_template_id, param_id, param_constructor)

        self.render('compute_wait.html',
                    job_id=job_id, title='Preprocessing',
                    completion_redirect='/study/description/%d?top_tab='
                                        'raw_data_tab&sub_tab=%s&prep_tab=%s'
                                        % (study_id, raw_data.id,
                                           prep_template_id))
コード例 #19
0
def get_raw_data(rdis):
    """Get all raw data objects from a list of raw_data_ids"""
    return [RawData(rdi) for rdi in rdis]
コード例 #20
0
 def test_studies(self):
     """Correctly returns the study ids"""
     rd = RawData(1)
     self.assertEqual(rd.studies, [1])
コード例 #21
0
    def test_get_qiime_minimal_mapping_multiple(self):
        # We need to create a prep template in which we have different run
        # prefix values, so we can test this case
        metadata_dict = {
            'SKB8.640193': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status': 'EMP',
                'str_column': 'Value for sample 1',
                'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA',
                'barcodesequence': 'GTCCGCAAGTTA',
                'run_prefix': "s_G1_L001_sequences",
                'platform': 'ILLUMINA',
                'library_construction_protocol': 'AAA',
                'experiment_design_description': 'BBB'
            },
            'SKD8.640184': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status': 'EMP',
                'str_column': 'Value for sample 2',
                'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA',
                'barcodesequence': 'CGTAGAGCTCTC',
                'run_prefix': "s_G1_L001_sequences",
                'platform': 'ILLUMINA',
                'library_construction_protocol': 'AAA',
                'experiment_design_description': 'BBB'
            },
            'SKB7.640196': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status': 'EMP',
                'str_column': 'Value for sample 3',
                'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA',
                'barcodesequence': 'CCTCTGAGAGCT',
                'run_prefix': "s_G1_L002_sequences",
                'platform': 'ILLUMINA',
                'library_construction_protocol': 'AAA',
                'experiment_design_description': 'BBB'
            }
        }
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, RawData(2), Study(1),
                                            '16S')

        out_dir = mkdtemp()

        obs_fps = sorted(_get_qiime_minimal_mapping(prep_template, out_dir))
        exp_fps = sorted([
            join(out_dir, 's_G1_L001_sequences_MMF.txt'),
            join(out_dir, 's_G1_L002_sequences_MMF.txt')
        ])

        # Check that the returned list is as expected
        self.assertEqual(obs_fps, exp_fps)
        # Check that the file exists
        for fp in exp_fps:
            self.assertTrue(exists(fp))
        # Check the contents of the file
        for fp, contents in zip(exp_fps, [EXP_PREP_1, EXP_PREP_2]):
            with open(fp, "U") as f:
                self.assertEqual(f.read(), contents)
コード例 #22
0
 def get_raw_data(self, rdis, callback):
     """Get all raw data objects from a list of raw_data_ids"""
     callback([RawData(rdi) for rdi in rdis])
コード例 #23
0
 def test_data_type(self):
     """Correctly returns the data_type of raw_data"""
     rd = RawData(1)
     self.assertEqual(rd.data_type(), "18S")
コード例 #24
0
 def test_not_equal(self):
     """Not equals works with object of the same type"""
     new = RawData(2)
     self.assertNotEqual(self.tester, new)
コード例 #25
0
 def test_equal(self):
     """Equality works with two objects pointing to the same instance"""
     new = RawData(1)
     self.assertEqual(self.tester, new)
コード例 #26
0
 def test_init_error_inexistent(self):
     """Raises an error when instantiating an object that does not exists"""
     with self.assertRaises(QiitaDBUnknownIDError):
         RawData(10)
コード例 #27
0
 def setUp(self):
     # We need an actual subclass in order to test the equality functions
     self.tester = RawData(1)
コード例 #28
0
 def test_data_type_id(self):
     """Correctly returns the data_type of raw_data"""
     rd = RawData(1)
     self.assertEqual(rd.data_type(ret_id=True), 2)
コード例 #29
0
ファイル: test_data.py プロジェクト: jwdebelius/qiita
 def test_clear_filepaths_error(self):
     with self.assertRaises(QiitaDBError):
         RawData(1).clear_filepaths()
コード例 #30
0
    def _construct_job_graph(self, study, prep_template, params):
        """Constructs the workflow graph to preprocess a study

        The steps performed to preprocess a study are:
        1) Execute split libraries
        2) Add the new preprocessed data to the DB

        Parameters
        ----------
        study : Study
            The study to preprocess
        prep_template : PrepTemplate
            The prep template to use for the preprocessing
        params : BaseParameters
            The parameters to use for preprocessing
        """

        self.prep_template = prep_template
        self._logger = stderr
        raw_data = RawData(prep_template.raw_data)
        # Change the prep_template preprocessing_status t
        self.prep_template.preprocessing_status = 'preprocessing'

        # STEP 1: Preprocess the study
        preprocess_node = "PREPROCESS"

        # Check the raw data filetype to know which command generator we
        # should use
        filetype = raw_data.filetype
        if filetype == "FASTQ":
            cmd_generator = _get_preprocess_fastq_cmd
            insert_preprocessed_data = _insert_preprocessed_data
        elif filetype in ('FASTA', 'SFF'):
            cmd_generator = _get_preprocess_fasta_cmd
            insert_preprocessed_data = _insert_preprocessed_data
        else:
            raise NotImplementedError(
                "Raw data %s cannot be preprocessed, filetype %s not supported"
                % (raw_data.id, filetype))

        # Generate the command
        cmd, output_dir = cmd_generator(raw_data, self.prep_template, params)
        self._job_graph.add_node(preprocess_node,
                                 func=system_call,
                                 args=(cmd, ),
                                 job_name="Construct preprocess command",
                                 requires_deps=False)

        # This step is currently only for data types in which we need to store,
        # demultiplexed sequences. Since it is the only supported data type at
        # this point, it is ok the leave it here. However, as new data types
        # become available, we will need to think a better way of doing this.
        demux_node = "GEN_DEMUX_FILE"
        self._job_graph.add_node(demux_node,
                                 func=generate_demux_file,
                                 args=(output_dir, ),
                                 job_name="Generated demux file",
                                 requires_deps=False)
        self._job_graph.add_edge(preprocess_node, demux_node)

        # STEP 2: Add preprocessed data to DB
        insert_preprocessed_node = "INSERT_PREPROCESSED"
        self._job_graph.add_node(insert_preprocessed_node,
                                 func=insert_preprocessed_data,
                                 args=(study, params, self.prep_template,
                                       output_dir),
                                 job_name="Store preprocessed data",
                                 requires_deps=False)
        self._job_graph.add_edge(demux_node, insert_preprocessed_node)

        self._dirpaths_to_remove.append(output_dir)