예제 #1
0
    def create(cls, md_template, raw_data, study, data_type,
               investigation_type=None):
        r"""Creates the metadata template in the database

        Parameters
        ----------
        md_template : DataFrame
            The metadata template file contents indexed by samples Ids
        raw_data : RawData
            The raw_data to which the prep template belongs to.
        study : Study
            The study to which the prep template belongs to.
        data_type : str or int
            The data_type of the prep template
        investigation_type : str, optional
            The investigation type, if relevant

        Returns
        -------
        A new instance of `cls` to access to the PrepTemplate stored in the DB

        Raises
        ------
        QiitaDBColumnError
            If the investigation_type is not valid
            If a required column is missing in md_template
        """
        # If the investigation_type is supplied, make sure it is one of
        # the recognized investigation types
        if investigation_type is not None:
            cls.validate_investigation_type(investigation_type)

        # Get a connection handler
        conn_handler = SQLConnectionHandler()
        queue_name = "CREATE_PREP_TEMPLATE_%d" % raw_data.id
        conn_handler.create_queue(queue_name)

        # Check if the data_type is the id or the string
        if isinstance(data_type, (int, long)):
            data_type_id = data_type
            data_type_str = convert_from_id(data_type, "data_type",
                                            conn_handler)
        else:
            data_type_id = convert_to_id(data_type, "data_type", conn_handler)
            data_type_str = data_type

        md_template = cls._clean_validate_template(md_template, study.id,
                                                   data_type_str, conn_handler)

        # Insert the metadata template
        # We need the prep_id for multiple calls below, which currently is not
        # supported by the queue system. Thus, executing this outside the queue
        prep_id = conn_handler.execute_fetchone(
            "INSERT INTO qiita.prep_template (data_type_id, raw_data_id, "
            "investigation_type) VALUES (%s, %s, %s) RETURNING "
            "prep_template_id", (data_type_id, raw_data.id,
                                 investigation_type))[0]

        cls._add_common_creation_steps_to_queue(md_template, prep_id,
                                                conn_handler, queue_name)

        try:
            conn_handler.execute_queue(queue_name)
        except Exception:
            # Clean up row from qiita.prep_template
            conn_handler.execute(
                "DELETE FROM qiita.prep_template where "
                "{0} = %s".format(cls._id_column), (prep_id,))

            # Check if sample IDs present here but not in sample template
            sql = ("SELECT sample_id from qiita.required_sample_info WHERE "
                   "study_id = %s")
            # Get list of study sample IDs, prep template study IDs,
            # and their intersection
            prep_samples = set(md_template.index.values)
            unknown_samples = prep_samples.difference(
                s[0] for s in conn_handler.execute_fetchall(sql, [study.id]))
            if unknown_samples:
                raise QiitaDBExecutionError(
                    'Samples found in prep template but not sample template: '
                    '%s' % ', '.join(unknown_samples))

            # some other error we haven't seen before so raise it
            raise

        pt = cls(prep_id)
        pt.generate_files()

        return pt
예제 #2
0
def create_templates_from_qiime_mapping_file(fp, study, data_type):
    """Creates a sample template and a prep template from qiime mapping file

    Parameters
    ----------
    fp : str or file-like object
        Path to the QIIME mapping file
    study : Study
        The study to which the sample template belongs to
    data_type : str or int
        The data_type of the prep_template

    Returns
    -------
    (SampleTemplate, PrepTemplate)
        The templates created from the QIIME mapping file
    """
    qiime_map = load_template_to_dataframe(fp, index='#SampleID')

    # There are a few columns in the QIIME mapping file that are special and
    # we know how to deal with them
    rename_cols = {
        'BarcodeSequence': 'barcode',
        'LinkerPrimerSequence': 'primer',
        'Description': 'description',
    }

    if 'ReverseLinkerPrimer' in qiime_map:
        rename_cols['ReverseLinkerPrimer'] = 'reverselinkerprimer'

    missing = set(rename_cols).difference(qiime_map.columns)
    if missing:
        raise QiitaWareError(
            "Error generating the templates from the QIIME mapping file. "
            "Missing QIIME mapping file columns: %s" % ', '.join(missing))

    qiime_map.rename(columns=rename_cols, inplace=True)

    # Fix the casing in the columns that we control
    qiime_map.columns = [c.lower() if c.lower() in CONTROLLED_COLS else c
                         for c in qiime_map.columns]

    # Figure out which columns belong to the prep template
    def _col_iterator(restriction_set):
        for restriction in viewvalues(restriction_set):
            for cols in viewkeys(restriction.columns):
                yield cols

    pt_cols = set(col for col in _col_iterator(PREP_TEMPLATE_COLUMNS))

    data_type_str = (convert_from_id(data_type, "data_type")
                     if isinstance(data_type, (int, long)) else data_type)

    if data_type_str in TARGET_GENE_DATA_TYPES:
        pt_cols.update(
            col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE))
        pt_cols.add('reverselinkerprimer')

    qiime_cols = set(qiime_map.columns)
    pt_cols = qiime_cols.intersection(pt_cols)
    st_cols = qiime_cols.difference(pt_cols)

    st_md = qiime_map.ix[:, st_cols]
    pt_md = qiime_map.ix[:, pt_cols]

    return (SampleTemplate.create(st_md, study),
            PrepTemplate.create(pt_md, study, data_type))
예제 #3
0
    def create(cls, md_template, study, data_type, investigation_type=None):
        r"""Creates the metadata template in the database

        Parameters
        ----------
        md_template : DataFrame
            The metadata template file contents indexed by samples Ids
        study : Study
            The study to which the prep template belongs to.
        data_type : str or int
            The data_type of the prep template
        investigation_type : str, optional
            The investigation type, if relevant

        Returns
        -------
        A new instance of `cls` to access to the PrepTemplate stored in the DB

        Raises
        ------
        QiitaDBColumnError
            If the investigation_type is not valid
            If a required column is missing in md_template
        """
        with TRN:
            # If the investigation_type is supplied, make sure it is one of
            # the recognized investigation types
            if investigation_type is not None:
                cls.validate_investigation_type(investigation_type)

            # Check if the data_type is the id or the string
            if isinstance(data_type, (int, long)):
                data_type_id = data_type
                data_type_str = convert_from_id(data_type, "data_type")
            else:
                data_type_id = convert_to_id(data_type, "data_type")
                data_type_str = data_type

            pt_cols = PREP_TEMPLATE_COLUMNS
            if data_type_str in TARGET_GENE_DATA_TYPES:
                pt_cols = deepcopy(PREP_TEMPLATE_COLUMNS)
                pt_cols.update(PREP_TEMPLATE_COLUMNS_TARGET_GENE)

            md_template = cls._clean_validate_template(md_template, study.id,
                                                       pt_cols)

            # Insert the metadata template
            sql = """INSERT INTO qiita.prep_template
                        (data_type_id, investigation_type)
                     VALUES (%s, %s)
                     RETURNING prep_template_id"""
            TRN.add(sql, [data_type_id, investigation_type])
            prep_id = TRN.execute_fetchlast()

            try:
                cls._common_creation_steps(md_template, prep_id)
            except Exception:
                # Check if sample IDs present here but not in sample template
                sql = """SELECT sample_id from qiita.study_sample
                         WHERE study_id = %s"""
                # Get list of study sample IDs, prep template study IDs,
                # and their intersection
                TRN.add(sql, [study.id])
                prep_samples = set(md_template.index.values)
                unknown_samples = prep_samples.difference(
                    TRN.execute_fetchflatten())
                if unknown_samples:
                    raise QiitaDBExecutionError(
                        'Samples found in prep template but not sample '
                        'template: %s' % ', '.join(unknown_samples))

                # some other error we haven't seen before so raise it
                raise

            # Link the prep template with the study
            sql = """INSERT INTO qiita.study_prep_template
                        (study_id, prep_template_id)
                     VALUES (%s, %s)"""
            TRN.add(sql, [study.id, prep_id])

            TRN.execute()

            pt = cls(prep_id)
            pt.generate_files()

            return pt
예제 #4
0
 def testConvertFromID(self):
     self.assertEqual(convert_from_id(807481739, 'ontology'), 'ENA')
예제 #5
0
def create_templates_from_qiime_mapping_file(fp, study, data_type):
    """Creates a sample template and a prep template from qiime mapping file

    Parameters
    ----------
    fp : str or file-like object
        Path to the QIIME mapping file
    study : Study
        The study to which the sample template belongs to
    data_type : str or int
        The data_type of the prep_template

    Returns
    -------
    (SampleTemplate, PrepTemplate)
        The templates created from the QIIME mapping file
    """
    qiime_map = load_template_to_dataframe(fp, index='#SampleID')

    # There are a few columns in the QIIME mapping file that are special and
    # we know how to deal with them
    rename_cols = {
        'BarcodeSequence': 'barcode',
        'LinkerPrimerSequence': 'primer',
        'Description': 'description',
    }

    if 'ReverseLinkerPrimer' in qiime_map:
        rename_cols['ReverseLinkerPrimer'] = 'reverselinkerprimer'

    missing = set(rename_cols).difference(qiime_map.columns)
    if missing:
        raise QiitaWareError(
            "Error generating the templates from the QIIME mapping file. "
            "Missing QIIME mapping file columns: %s" % ', '.join(missing))

    qiime_map.rename(columns=rename_cols, inplace=True)

    # Fix the casing in the columns that we control
    qiime_map.columns = [
        c.lower() if c.lower() in CONTROLLED_COLS else c
        for c in qiime_map.columns
    ]

    # Figure out which columns belong to the prep template
    def _col_iterator(restriction_set):
        for restriction in restriction_set.values():
            for cols in restriction.columns.keys():
                yield cols

    pt_cols = set(col for col in _col_iterator(PREP_TEMPLATE_COLUMNS))

    data_type_str = (convert_from_id(data_type, "data_type") if isinstance(
        data_type, int) else data_type)

    if data_type_str in TARGET_GENE_DATA_TYPES:
        pt_cols.update(
            col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE))
        pt_cols.add('reverselinkerprimer')

    qiime_cols = set(qiime_map.columns)
    pt_cols = qiime_cols.intersection(pt_cols)
    st_cols = qiime_cols.difference(pt_cols)

    st_md = qiime_map.loc[:, st_cols]
    pt_md = qiime_map.loc[:, pt_cols]

    return (SampleTemplate.create(st_md, study),
            PrepTemplate.create(pt_md, study, data_type))