コード例 #1
0
    def remove_add_study_template(self, raw_data, study_id, fp_rsp):
        """Replace prep templates, raw data, and sample template with a new one
        """
        for rd in raw_data():
            rd = RawData(rd)
            for pt in rd.prep_templates:
                if PrepTemplate.exists(pt):
                    PrepTemplate.delete(pt)
        if SampleTemplate.exists(study_id):
            SampleTemplate.delete(study_id)

        SampleTemplate.create(load_template_to_dataframe(fp_rsp),
                              Study(study_id))
        remove(fp_rsp)
コード例 #2
0
ファイル: description_handlers.py プロジェクト: RNAer/qiita
    def remove_add_study_template(self, raw_data, study_id, fp_rsp):
        """Replace prep templates, raw data, and sample template with a new one
        """
        for rd in raw_data():
            rd = RawData(rd)
            for pt in rd.prep_templates:
                if PrepTemplate.exists(pt):
                    PrepTemplate.delete(pt)
        if SampleTemplate.exists(study_id):
            SampleTemplate.delete(study_id)

        SampleTemplate.create(load_template_to_dataframe(fp_rsp),
                              Study(study_id))
        remove(fp_rsp)
コード例 #3
0
 def test_to_file(self):
     """to file writes a tab delimited file with all the metadata"""
     fd, fp = mkstemp()
     close(fd)
     st = SampleTemplate.create(self.metadata, self.new_study)
     st.to_file(fp)
     self._clean_up_files.append(fp)
     with open(fp, 'U') as f:
         obs = f.read()
     self.assertEqual(obs, EXP_SAMPLE_TEMPLATE)
コード例 #4
0
 def test_to_file(self):
     """to file writes a tab delimited file with all the metadata"""
     fd, fp = mkstemp()
     close(fd)
     st = SampleTemplate.create(self.metadata, self.new_study)
     st.to_file(fp)
     self._clean_up_files.append(fp)
     with open(fp, 'U') as f:
         obs = f.read()
     self.assertEqual(obs, EXP_SAMPLE_TEMPLATE)
コード例 #5
0
    def remove_add_study_template(self, raw_data, study_id, fp_rsp, data_type, is_mapping_file):
        """Replace prep templates, raw data, and sample template with a new one
        """
        if is_mapping_file and data_type == "":
            raise ValueError("Please, choose a data type if uploading a QIIME " "mapping file")

        for rd in raw_data():
            rd = RawData(rd)
            for pt in rd.prep_templates:
                if PrepTemplate.exists(pt):
                    PrepTemplate.delete(pt)
        if SampleTemplate.exists(study_id):
            SampleTemplate.delete(study_id)

        if is_mapping_file:
            create_templates_from_qiime_mapping_file(fp_rsp, Study(study_id), int(data_type))
        else:
            SampleTemplate.create(load_template_to_dataframe(fp_rsp), Study(study_id))

        remove(fp_rsp)
コード例 #6
0
    def test_create(self):
        """Creates a new SampleTemplate"""
        st = SampleTemplate.create(self.metadata, self.new_study)
        # The returned object has the correct id
        self.assertEqual(st.id, 2)

        # The relevant rows to required_sample_info have been added.
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.required_sample_info WHERE study_id=2")
        # study_id sample_id physical_location has_physical_specimen
        # has_extracted_data sample_type required_sample_info_status_id
        # collection_timestamp host_subject_id description
        exp = [[
            2, "Sample1", "location1", True, True, "type1", 1,
            datetime(2014, 5, 29, 12, 24, 51), "NotIdentified",
            "Test Sample 1", 42.42, 41.41
        ],
               [
                   2, "Sample2", "location1", True, True, "type1", 1,
                   datetime(2014, 5, 29, 12, 24, 51), "NotIdentified",
                   "Test Sample 2", 4.2, 1.1
               ],
               [
                   2, "Sample3", "location1", True, True, "type1", 1,
                   datetime(2014, 5, 29, 12, 24, 51), "NotIdentified",
                   "Test Sample 3", 4.8, 4.41
               ]]
        self.assertEqual(obs, exp)

        # The relevant rows have been added to the study_sample_columns
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_sample_columns WHERE study_id=2")
        # study_id, column_name, column_type
        exp = [[2, "str_column", "varchar"]]
        self.assertEqual(obs, exp)

        # The new table exists
        self.assertTrue(exists_table("sample_2", self.conn_handler))

        # The new table hosts the correct values
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.sample_2")
        # sample_id, str_column
        exp = [['Sample1', "Value for sample 1"],
               ['Sample2', "Value for sample 2"],
               ['Sample3', "Value for sample 3"]]
        self.assertEqual(obs, exp)
コード例 #7
0
    def test_create(self):
        """Creates a new SampleTemplate"""
        st = SampleTemplate.create(self.metadata, self.new_study)
        # The returned object has the correct id
        self.assertEqual(st.id, 2)

        # The relevant rows to required_sample_info have been added.
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.required_sample_info WHERE study_id=2")
        # study_id sample_id physical_location has_physical_specimen
        # has_extracted_data sample_type required_sample_info_status_id
        # collection_timestamp host_subject_id description
        exp = [[2, "Sample1", "location1", True, True, "type1", 1,
                datetime(2014, 5, 29, 12, 24, 51), "NotIdentified",
                "Test Sample 1"],
               [2, "Sample2", "location1", True, True, "type1", 1,
                datetime(2014, 5, 29, 12, 24, 51), "NotIdentified",
                "Test Sample 2"],
               [2, "Sample3", "location1", True, True, "type1", 1,
                datetime(2014, 5, 29, 12, 24, 51), "NotIdentified",
                "Test Sample 3"]]
        self.assertEqual(obs, exp)

        # The relevant rows have been added to the study_sample_columns
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_sample_columns WHERE study_id=2")
        # study_id, column_name, column_type
        exp = [[2, "str_column", "varchar"]]
        self.assertEqual(obs, exp)

        # The new table exists
        self.assertTrue(exists_table("sample_2", self.conn_handler))

        # The new table hosts the correct values
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.sample_2")
        # sample_id, str_column
        exp = [['Sample1', "Value for sample 1"],
               ['Sample2', "Value for sample 2"],
               ['Sample3', "Value for sample 3"]]
        self.assertEqual(obs, exp)
コード例 #8
0
ファイル: test_analysis.py プロジェクト: RNAer/qiita
    def test_retrieve_dropped_samples(self):
        # Create and populate second study to do test with
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 25,
            "number_samples_promised": 28,
            "portal_type_id": 3,
            "study_alias": "FCM",
            "study_description": "Microbiome of people who eat nothing but "
                                 "fried chicken",
            "study_abstract": "Exploring how a high fat diet changes the "
                              "gut microbiome",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        metadata_dict = {
            'SKB8.640193': {'physical_location': 'location1',
                            'has_physical_specimen': True,
                            'has_extracted_data': True,
                            'sample_type': 'type1',
                            'required_sample_info_status': 'received',
                            'collection_timestamp':
                            datetime(2014, 5, 29, 12, 24, 51),
                            'host_subject_id': 'NotIdentified',
                            'Description': 'Test Sample 1',
                            'str_column': 'Value for sample 1',
                            'latitude': 42.42,
                            'longitude': 41.41},
            'SKD8.640184': {'physical_location': 'location1',
                            'has_physical_specimen': True,
                            'has_extracted_data': True,
                            'sample_type': 'type1',
                            'required_sample_info_status': 'received',
                            'collection_timestamp':
                            datetime(2014, 5, 29, 12, 24, 51),
                            'host_subject_id': 'NotIdentified',
                            'Description': 'Test Sample 2',
                            'str_column': 'Value for sample 2',
                            'latitude': 4.2,
                            'longitude': 1.1},
            'SKB7.640196': {'physical_location': 'location1',
                            'has_physical_specimen': True,
                            'has_extracted_data': True,
                            'sample_type': 'type1',
                            'required_sample_info_status': 'received',
                            'collection_timestamp':
                            datetime(2014, 5, 29, 12, 24, 51),
                            'host_subject_id': 'NotIdentified',
                            'Description': 'Test Sample 3',
                            'str_column': 'Value for sample 3',
                            'latitude': 4.8,
                            'longitude': 4.41},
            }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')

        Study.create(User("*****@*****.**"), "Test study 2", [1], info)

        SampleTemplate.create(metadata, Study(2))

        mp = get_mountpoint("processed_data")[0][1]
        study_fp = join(mp, "2_study_1001_closed_reference_otu_table.biom")
        ProcessedData.create("processed_params_uclust", 1, [(study_fp, 6)],
                             study=Study(2), data_type="16S")
        self.conn_handler.execute(
            "INSERT INTO qiita.analysis_sample (analysis_id, "
            "processed_data_id, sample_id) VALUES "
            "(1,2,'2.SKB8.640193'), (1,2,'2.SKD8.640184'), "
            "(1,2,'2.SKB7.640196')")

        samples = {1: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'],
                   2: ['2.SKB8.640193', '2.SKD8.640184']}
        self.analysis._build_biom_tables(samples, 10000,
                                         conn_handler=self.conn_handler)
        exp = {1: {'1.SKM4.640180', '1.SKM9.640192'},
               2: {'2.SKB7.640196'}}
        self.assertEqual(self.analysis.dropped_samples, exp)
コード例 #9
0
ファイル: description_handlers.py プロジェクト: adamrp/qiita
    def process_sample_template(self, study, user, callback):
        """Process a sample template from the POST method

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done

        Raises
        ------
        HTTPError
            If the sample template file does not exists
        """
        # If we are on this function, the arguments "sample_template" and
        # "data_type" must be defined. If not, let tornado raise its error
        sample_template = self.get_argument('sample_template')
        data_type = self.get_argument('data_type')

        # Get the uploads folder
        _, base_fp = get_mountpoint("uploads")[0]
        # Get the path of the sample template in the uploads folder
        fp_rsp = join(base_fp, str(study.id), sample_template)

        if not exists(fp_rsp):
            # The file does not exist, fail nicely
            raise HTTPError(404, "This file doesn't exist: %s" % fp_rsp)

        # Define here the message and message level in case of success
        msg = "The sample template '%s' has been added" % sample_template
        msg_level = "success"
        is_mapping_file = looks_like_qiime_mapping_file(fp_rsp)

        try:
            if is_mapping_file and not data_type:
                raise ValueError("Please, choose a data type if uploading a "
                                 "QIIME mapping file")

            with warnings.catch_warnings(record=True) as warns:
                if is_mapping_file:
                    create_templates_from_qiime_mapping_file(fp_rsp, study,
                                                             int(data_type))
                else:
                    SampleTemplate.create(load_template_to_dataframe(fp_rsp),
                                          study)
                remove(fp_rsp)

                # join all the warning messages into one. Note that this
                # info will be ignored if an exception is raised
                if warns:
                    msg = '; '.join([convert_text_html(str(w.message))
                                     for w in warns])
                    msg_level = 'warning'

        except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                QiitaDBDuplicateError, IOError, ValueError, KeyError,
                CParserError, QiitaDBDuplicateHeaderError,
                QiitaDBError, QiitaWareError) as e:
            # Some error occurred while processing the sample template
            # Show the error to the user so they can fix the template
            error_msg = ('parsing the QIIME mapping file'
                         if is_mapping_file
                         else 'parsing the sample template')
            msg = html_error_message % (error_msg, basename(fp_rsp),
                                        str(e))
            msg = convert_text_html(msg)
            msg_level = "danger"

        callback((msg, msg_level, None, None, None))
コード例 #10
0
ファイル: metadata_pipeline.py プロジェクト: adamrp/qiita
def create_templates_from_qiime_mapping_file(fp, study, data_type):
    """Creates a sample template and a prep template from qiime mapping file

    Parameters
    ----------
    fp : str or file-like object
        Path to the QIIME mapping file
    study : Study
        The study to which the sample template belongs to
    data_type : str or int
        The data_type of the prep_template

    Returns
    -------
    (SampleTemplate, PrepTemplate)
        The templates created from the QIIME mapping file
    """
    qiime_map = load_template_to_dataframe(fp, index='#SampleID')

    # There are a few columns in the QIIME mapping file that are special and
    # we know how to deal with them
    rename_cols = {
        'BarcodeSequence': 'barcode',
        'LinkerPrimerSequence': 'primer',
        'Description': 'description',
    }

    if 'ReverseLinkerPrimer' in qiime_map:
        rename_cols['ReverseLinkerPrimer'] = 'reverselinkerprimer'

    missing = set(rename_cols).difference(qiime_map.columns)
    if missing:
        raise QiitaWareError(
            "Error generating the templates from the QIIME mapping file. "
            "Missing QIIME mapping file columns: %s" % ', '.join(missing))

    qiime_map.rename(columns=rename_cols, inplace=True)

    # Fix the casing in the columns that we control
    qiime_map.columns = [c.lower() if c.lower() in CONTROLLED_COLS else c
                         for c in qiime_map.columns]

    # Figure out which columns belong to the prep template
    def _col_iterator(restriction_set):
        for restriction in viewvalues(restriction_set):
            for cols in viewkeys(restriction.columns):
                yield cols

    pt_cols = set(col for col in _col_iterator(PREP_TEMPLATE_COLUMNS))

    data_type_str = (convert_from_id(data_type, "data_type")
                     if isinstance(data_type, (int, long)) else data_type)

    if data_type_str in TARGET_GENE_DATA_TYPES:
        pt_cols.update(
            col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE))
        pt_cols.add('reverselinkerprimer')

    qiime_cols = set(qiime_map.columns)
    pt_cols = qiime_cols.intersection(pt_cols)
    st_cols = qiime_cols.difference(pt_cols)

    st_md = qiime_map.ix[:, st_cols]
    pt_md = qiime_map.ix[:, pt_cols]

    return (SampleTemplate.create(st_md, study),
            PrepTemplate.create(pt_md, study, data_type))
コード例 #11
0
 def test_create_duplicate_header(self):
     """Create raises an error when duplicate headers are present"""
     self.metadata['STR_COLUMN'] = pd.Series(['', '', ''],
                                             index=self.metadata.index)
     with self.assertRaises(QiitaDBDuplicateHeaderError):
         SampleTemplate.create(self.metadata, self.new_study)
コード例 #12
0
 def test_create_duplicate(self):
     """Create raises an error when creating a duplicated SampleTemplate"""
     with self.assertRaises(QiitaDBDuplicateError):
         SampleTemplate.create(self.metadata, self.test_study)
コード例 #13
0
 def test_create_duplicate_header(self):
     """Create raises an error when duplicate headers are present"""
     self.metadata['STR_COLUMN'] = pd.Series(['', '', ''],
                                             index=self.metadata.index)
     with self.assertRaises(QiitaDBDuplicateHeaderError):
         SampleTemplate.create(self.metadata, self.new_study)
コード例 #14
0
 def test_create_duplicate(self):
     """Create raises an error when creating a duplicated SampleTemplate"""
     with self.assertRaises(QiitaDBDuplicateError):
         SampleTemplate.create(self.metadata, self.test_study)
コード例 #15
0
ファイル: test_analysis.py プロジェクト: jwdebelius/qiita
    def test_retrieve_dropped_samples(self):
        # Create and populate second study to do test with
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 25,
            "number_samples_promised": 28,
            "portal_type_id": 3,
            "study_alias": "FCM",
            "study_description": "Microbiome of people who eat nothing but "
            "fried chicken",
            "study_abstract": "Exploring how a high fat diet changes the "
            "gut microbiome",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        metadata_dict = {
            'SKB8.640193': {
                'physical_location': 'location1',
                'has_physical_specimen': True,
                'has_extracted_data': True,
                'sample_type': 'type1',
                'required_sample_info_status': 'received',
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 1',
                'str_column': 'Value for sample 1',
                'latitude': 42.42,
                'longitude': 41.41
            },
            'SKD8.640184': {
                'physical_location': 'location1',
                'has_physical_specimen': True,
                'has_extracted_data': True,
                'sample_type': 'type1',
                'required_sample_info_status': 'received',
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 2',
                'str_column': 'Value for sample 2',
                'latitude': 4.2,
                'longitude': 1.1
            },
            'SKB7.640196': {
                'physical_location': 'location1',
                'has_physical_specimen': True,
                'has_extracted_data': True,
                'sample_type': 'type1',
                'required_sample_info_status': 'received',
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 3',
                'str_column': 'Value for sample 3',
                'latitude': 4.8,
                'longitude': 4.41
            },
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')

        Study.create(User("*****@*****.**"), "Test study 2", [1], info)

        SampleTemplate.create(metadata, Study(2))

        mp = get_mountpoint("processed_data")[0][1]
        study_fp = join(mp, "2_study_1001_closed_reference_otu_table.biom")
        ProcessedData.create("processed_params_uclust",
                             1, [(study_fp, 6)],
                             study=Study(2),
                             data_type="16S")
        self.conn_handler.execute(
            "INSERT INTO qiita.analysis_sample (analysis_id, "
            "processed_data_id, sample_id) VALUES "
            "(1,2,'2.SKB8.640193'), (1,2,'2.SKD8.640184'), "
            "(1,2,'2.SKB7.640196')")

        samples = {
            1: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'],
            2: ['2.SKB8.640193', '2.SKD8.640184']
        }
        self.analysis._build_biom_tables(samples,
                                         10000,
                                         conn_handler=self.conn_handler)
        exp = {1: {'1.SKM4.640180', '1.SKM9.640192'}, 2: {'2.SKB7.640196'}}
        self.assertEqual(self.analysis.dropped_samples, exp)
コード例 #16
0
ファイル: test_commands.py プロジェクト: jenwei/qiita
    def generate_new_study_with_preprocessed_data(self):
        """Creates a new study up to the processed data for testing"""
        # ignoring warnings generated when adding templates
        simplefilter("ignore")
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 3,
            "number_samples_promised": 3,
            "study_alias": "Test EBI",
            "study_description": "Study for testing EBI",
            "study_abstract": "Study for testing EBI",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        study = Study.create(User('*****@*****.**'), "Test EBI study", [1], info)
        metadata_dict = {
            'Sample1': {'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0),
                        'physical_specimen_location': 'location1',
                        'taxon_id': 9606,
                        'scientific_name': 'h**o sapiens',
                        'Description': 'Test Sample 1'},
            'Sample2': {'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0),
                        'physical_specimen_location': 'location1',
                        'taxon_id': 9606,
                        'scientific_name': 'h**o sapiens',
                        'Description': 'Test Sample 2'},
            'Sample3': {'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0),
                        'physical_specimen_location': 'location1',
                        'taxon_id': 9606,
                        'scientific_name': 'h**o sapiens',
                        'Description': 'Test Sample 3'}
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
        SampleTemplate.create(metadata, study)
        metadata_dict = {
            'Sample1': {'primer': 'GTGCCAGCMGCCGCGGTAA',
                        'barcode': 'CGTAGAGCTCTC',
                        'center_name': 'KnightLab',
                        'platform': 'ILLUMINA',
                        'instrument_model': 'Illumina MiSeq',
                        'library_construction_protocol': 'Protocol ABC',
                        'experiment_design_description': "Random value 1"},
            'Sample2': {'primer': 'GTGCCAGCMGCCGCGGTAA',
                        'barcode': 'CGTAGAGCTCTA',
                        'center_name': 'KnightLab',
                        'platform': 'ILLUMINA',
                        'instrument_model': 'Illumina MiSeq',
                        'library_construction_protocol': 'Protocol ABC',
                        'experiment_design_description': "Random value 2"},
            'Sample3': {'primer': 'GTGCCAGCMGCCGCGGTAA',
                        'barcode': 'CGTAGAGCTCTT',
                        'center_name': 'KnightLab',
                        'platform': 'ILLUMINA',
                        'instrument_model': 'Illumina MiSeq',
                        'library_construction_protocol': 'Protocol ABC',
                        'experiment_design_description': "Random value 3"},
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
        pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics')
        fna_fp = join(self.temp_dir, 'seqs.fna')
        demux_fp = join(self.temp_dir, 'demux.seqs')
        with open(fna_fp, 'w') as f:
            f.write(FASTA_EXAMPLE_2.format(study.id))
        with File(demux_fp, 'w') as f:
            to_hdf5(fna_fp, f)

        ppd = PreprocessedData.create(
            study, "preprocessed_sequence_illumina_params", 1,
            [(demux_fp, 6)], pt)

        return ppd