Python SampleTemplate.to_dataframe Examples, qiita_db.metadata_template.sample_template.SampleTemplate.to_dataframe Python Examples

Example #1

0

Show file

File: download.py Project: jlab/qiita

    def get(self, prep_template_id):
        pid = int(prep_template_id)
        pt = PrepTemplate(pid)
        sid = pt.study_id

        self._check_permissions(sid)

        st = SampleTemplate(sid)

        text = st.to_dataframe(samples=list(pt)).to_csv(None, sep='\t')

        self._finish_generate_files(
            'sample_information_from_prep_%s.tsv' % pid, text)

Example #2

0

Show file

def sample_template_summary_get_req(study_id, user):
    """Returns a summary of the sample template metadata columns

    Parameters
    ----------
    study_id: int
        The study to retrieve the sample information summary
    user: qiita_db.user
        The user performing the request

    Returns
    -------
    dict of {str: object}
        Keys are metadata categories and the values are list of tuples. Each
        tuple is an observed value in the category and the number of times
        it's seen.

    Raises
    ------
    HTTPError
        404 If the sample template doesn't exist
    """
    # Check if the current user has access to the study and if the sample
    # template exists
    sample_template_checks(study_id, user, check_exists=True)

    st = SampleTemplate(study_id)
    df = st.to_dataframe()

    # Drop the study_id column if it exists
    if 'study_id' in df.columns:
        df.drop('study_id', axis=1, inplace=True)

    res = {}
    for column in df.columns:
        counts = df[column].value_counts()
        res[str(column)] = [(str(key), counts[key])
                            for key in natsorted(
                                counts.index,
                                key=lambda x: unicode(x, errors='ignore'))]

    return res

Example #3

0

Show file

File: sample_template.py Project: josenavas/QiiTa

def sample_template_summary_get_req(study_id, user):
    """Returns a summary of the sample template metadata columns

    Parameters
    ----------
    study_id: int
        The study to retrieve the sample information summary
    user: qiita_db.user
        The user performing the request

    Returns
    -------
    dict of {str: object}
        Keys are metadata categories and the values are list of tuples. Each
        tuple is an observed value in the category and the number of times
        it's seen.

    Raises
    ------
    HTTPError
        404 If the sample template doesn't exist
    """
    # Check if the current user has access to the study and if the sample
    # template exists
    sample_template_checks(study_id, user, check_exists=True)

    st = SampleTemplate(study_id)
    df = st.to_dataframe()

    # Drop the study_id column if it exists
    if 'study_id' in df.columns:
        df.drop('study_id', axis=1, inplace=True)

    res = {}
    for column in df.columns:
        counts = df[column].value_counts()
        res[str(column)] = [(str(key), counts[key])
                            for key in natsorted(
                                counts.index,
                                key=lambda x: unicode(x, errors='ignore'))]

    return res

Example #4

0

Show file

File: sample_template.py Project: semarpetrus/qiita

def sample_template_get_req(samp_id, user_id):
    """Gets the json of the full sample template

    Parameters
    ----------
    samp_id : int or int castable string
        SampleTemplate id to get info for
    user_id : str
        User requesting the sample template info

    Returns
    -------
    dict of objects
        {'status': status,
         'message': msg,
         'template': dict of {str: {str: object, ...}, ...}

        template is dictionary where the keys access_error the metadata samples
        and the values are a dictionary of column and value.
        Format {sample: {column: value, ...}, ...}
    """
    exists = _check_sample_template_exists(int(samp_id))
    if exists['status'] != 'success':
        return exists
    access_error = check_access(int(samp_id), user_id)
    if access_error:
        return access_error

    template = SampleTemplate(int(samp_id))
    access_error = check_access(template.study_id, user_id)
    if access_error:
        return access_error
    df = template.to_dataframe()
    return {
        'status': 'success',
        'message': '',
        'template': df.to_dict(orient='index')
    }

Example #5

0

Show file

def sample_template_get_req(samp_id, user_id):
    """Gets the json of the full sample template

    Parameters
    ----------
    samp_id : int or int castable string
        SampleTemplate id to get info for
    user_id : str
        User requesting the sample template info

    Returns
    -------
    dict of objects
        {'status': status,
         'message': msg,
         'template': dict of {str: {str: object, ...}, ...}

        template is dictionary where the keys access_error the metadata samples
        and the values are a dictionary of column and value.
        Format {sample: {column: value, ...}, ...}
    """
    exists = _check_sample_template_exists(int(samp_id))
    if exists['status'] != 'success':
        return exists
    access_error = check_access(int(samp_id), user_id)
    if access_error:
        return access_error

    template = SampleTemplate(int(samp_id))
    access_error = check_access(template.study_id, user_id)
    if access_error:
        return access_error
    df = template.to_dataframe()
    return {'status': 'success',
            'message': '',
            'template': df.to_dict(orient='index')}

Example #6

0

Show file

def sample_template_summary_get_req(samp_id, user_id):
    """Returns a summary of the sample template metadata columns

    Parameters
    ----------
    samp_id : int
        SampleTemplate id to get info for
    user_id : str
        User requesting the sample template info

    Returns
    -------
    dict
        Returns summary information in the form
        {'status': str,
         'message': str,
         'info': dict of {str: object}
        status can be success, warning, or error depending on result
        message has the warnings or errors
        info dictionary contains the keys as the metadata categories
        and the values are list of tuples. Each tuple is an observed value in
        the category and the number of times its seen.
        Format {num_samples: value,
                category: [(val1, count1), (val2, count2), ...], ...}
    """
    access_error = check_access(samp_id, user_id)
    if access_error:
        return access_error

    processing, alert_type, alert_msg = get_sample_template_processing_status(
        samp_id)

    exists = _check_sample_template_exists(int(samp_id))
    if exists['status'] != 'success':
        return {'status': 'success',
                'message': '',
                'num_samples': 0,
                'num_columns': 0,
                'editable': not processing,
                'alert_type': alert_type,
                'alert_message': alert_msg,
                'stats': {}}

    template = SampleTemplate(int(samp_id))

    df = template.to_dataframe()

    editable = (Study(template.study_id).can_edit(User(user_id)) and not
                processing)

    out = {'status': 'success',
           'message': '',
           'num_samples': df.shape[0],
           'num_columns': df.shape[1],
           'editable': editable,
           'alert_type': alert_type,
           'alert_message': alert_msg,
           'stats': {}}

    # drop the samp_id column if it exists
    if 'study_id' in df.columns:
        df.drop('study_id', axis=1, inplace=True)
    for column in df.columns:
        counts = df[column].value_counts()
        out['stats'][str(column)] = [(str(key), counts[key])
                                     for key in natsorted(counts.index)]

    return out

Example #7

0

Show file

File: 51.py Project: mestaki/qiita

        # 'prep_template_sample'
        sql = """SELECT table_name, array_agg(column_name::text)
                    FROM information_schema.columns
                    WHERE column_name IN %s
                        AND table_name LIKE 'sample_%%'
                        AND table_name NOT IN (
                            'prep_template', 'prep_template_sample')
                    GROUP BY table_name"""
        # note that we are looking for those columns with duplicated names in
        # the headers
        TRN.add(sql, [tuple(set(cols_sample))])
        for table, columns in dict(TRN.execute_fetchindex()).items():
            # [1] the format is table_# so taking the #
            st = SampleTemplate(int(table.split('_')[1]))
            # getting just the columns of interest
            st_df = st.to_dataframe()[columns]
            # converting to datetime
            for col in columns:
                st_df[col] = st_df[col].apply(transform_date)
            st.update(st_df)

if cols_prep:
    with TRN:
        # a few notes: just getting the preps with duplicated values; ignoring
        # column 'sample_id' and tables 'study_sample', 'prep_template',
        # 'prep_template_sample'
        sql = """SELECT table_name, array_agg(column_name::text)
                    FROM information_schema.columns
                    WHERE column_name IN %s
                        AND table_name LIKE 'prep_%%'
                        AND table_name NOT IN (

Example #8

0

Show file

File: sample_template.py Project: semarpetrus/qiita

def sample_template_summary_get_req(samp_id, user_id):
    """Returns a summary of the sample template metadata columns

    Parameters
    ----------
    samp_id : int
        SampleTemplate id to get info for
    user_id : str
        User requesting the sample template info

    Returns
    -------
    dict
        Returns summary information in the form
        {'status': str,
         'message': str,
         'info': dict of {str: object}
        status can be success, warning, or error depending on result
        message has the warnings or errors
        info dictionary contains the keys as the metadata categories
        and the values are list of tuples. Each tuple is an observed value in
        the category and the number of times its seen.
        Format {num_samples: value,
                category: [(val1, count1), (val2, count2), ...], ...}
    """
    access_error = check_access(samp_id, user_id)
    if access_error:
        return access_error

    processing, alert_type, alert_msg = get_sample_template_processing_status(
        samp_id)

    exists = _check_sample_template_exists(int(samp_id))
    if exists['status'] != 'success':
        return {
            'status': 'success',
            'message': '',
            'num_samples': 0,
            'num_columns': 0,
            'editable': not processing,
            'alert_type': alert_type,
            'alert_message': alert_msg,
            'stats': {}
        }

    template = SampleTemplate(int(samp_id))

    df = template.to_dataframe()

    editable = (Study(template.study_id).can_edit(User(user_id))
                and not processing)

    out = {
        'status': 'success',
        'message': '',
        'num_samples': df.shape[0],
        'num_columns': df.shape[1],
        'editable': editable,
        'alert_type': alert_type,
        'alert_message': alert_msg,
        'stats': {}
    }

    # drop the samp_id column if it exists
    if 'study_id' in df.columns:
        df.drop('study_id', axis=1, inplace=True)
    for column in df.columns:
        counts = df[column].value_counts()
        out['stats'][str(column)] = [
            (str(key), counts[key])
            for key in natsorted(counts.index,
                                 key=lambda x: unicode(x, errors='ignore'))
        ]

    return out

Example #9

0

Show file

File: sample_template.py Project: carlyboyd/qiita

def sample_template_summary_get_req(samp_id, user_id):
    """Returns a summary of the sample template metadata columns

    Parameters
    ----------
    samp_id : int
        SampleTemplate id to get info for
    user_id : str
        User requesting the sample template info

    Returns
    -------
    dict
        Returns summary information in the form
        {'status': str,
         'message': str,
         'info': dict of {str: object}
        status can be success, warning, or error depending on result
        message has the warnings or errors
        info dictionary contains the keys as the metadata categories
        and the values are list of tuples. Each tuple is an observed value in
        the category and the number of times its seen.
        Format {num_samples: value,
                category: [(val1, count1), (val2, count2), ...], ...}
    """
    access_error = check_access(samp_id, user_id)
    if access_error:
        return access_error

    job_info = r_client.get(SAMPLE_TEMPLATE_KEY_FORMAT % samp_id)
    if job_info:
        job_info = loads(job_info)
        job_id = job_info['job_id']
        if job_id:
            redis_info = loads(r_client.get(job_id))
            processing = redis_info['status_msg'] == 'Running'
            if processing:
                alert_type = 'info'
                alert_msg = 'This sample template is currently being processed'
            elif redis_info['status_msg'] == 'Success':
                alert_type = redis_info['return']['status']
                alert_msg = redis_info['return']['message'].replace('\n',
                                                                    '</br>')
                payload = {'job_id': None,
                           'status': alert_type,
                           'message': alert_msg}
                r_client.set(SAMPLE_TEMPLATE_KEY_FORMAT % samp_id,
                             dumps(payload))
            else:
                alert_type = redis_info['return']['status']
                alert_msg = redis_info['return']['message'].replace('\n',
                                                                    '</br>')
        else:
            processing = False
            alert_type = job_info['status']
            alert_msg = job_info['message'].replace('\n', '</br>')
    else:
        processing = False
        alert_type = ''
        alert_msg = ''

    exists = _check_sample_template_exists(int(samp_id))
    if exists['status'] != 'success':
        return {'status': 'success',
                'message': '',
                'num_samples': 0,
                'num_columns': 0,
                'editable': not processing,
                'alert_type': alert_type,
                'alert_message': alert_msg,
                'stats': {}}

    template = SampleTemplate(int(samp_id))

    df = template.to_dataframe()

    editable = (Study(template.study_id).can_edit(User(user_id)) and not
                processing)

    out = {'status': 'success',
           'message': '',
           'num_samples': df.shape[0],
           'num_columns': df.shape[1],
           'editable': editable,
           'alert_type': alert_type,
           'alert_message': alert_msg,
           'stats': {}}

    # drop the samp_id column if it exists
    if 'study_id' in df.columns:
        df.drop('study_id', axis=1, inplace=True)
    for column in df.columns:
        counts = df[column].value_counts()
        out['stats'][str(column)] = [(str(key), counts[key])
                                     for key in natsorted(counts.index)]

    return out

Example #10

0

Show file

File: 51.py Project: ElDeveloper/qiita

        # 'prep_template_sample'
        sql = """SELECT table_name, array_agg(column_name::text)
                    FROM information_schema.columns
                    WHERE column_name IN %s
                        AND table_name LIKE 'sample_%%'
                        AND table_name NOT IN (
                            'prep_template', 'prep_template_sample')
                    GROUP BY table_name"""
        # note that we are looking for those columns with duplicated names in
        # the headers
        TRN.add(sql, [tuple(set(cols_sample))])
        for table, columns in viewitems(dict(TRN.execute_fetchindex())):
            # [1] the format is table_# so taking the #
            st = SampleTemplate(int(table.split('_')[1]))
            # getting just the columns of interest
            st_df = st.to_dataframe()[columns]
            # converting to datetime
            for col in columns:
                st_df[col] = st_df[col].apply(transform_date)
            st.update(st_df)

if cols_prep:
    with TRN:
        # a few notes: just getting the preps with duplicated values; ignoring
        # column 'sample_id' and tables 'study_sample', 'prep_template',
        # 'prep_template_sample'
        sql = """SELECT table_name, array_agg(column_name::text)
                    FROM information_schema.columns
                    WHERE column_name IN %s
                        AND table_name LIKE 'prep_%%'
                        AND table_name NOT IN (

Example #11

0

Show file

def sample_template_summary_get_req(samp_id, user_id):
    """Returns a summary of the sample template metadata columns

    Parameters
    ----------
    samp_id : int
        SampleTemplate id to get info for
    user_id : str
        User requesting the sample template info

    Returns
    -------
    dict
        Returns summary information in the form
        {'status': str,
         'message': str,
         'info': dict of {str: object}
        status can be success, warning, or error depending on result
        message has the warnings or errors
        info dictionary contains the keys as the metadata categories
        and the values are list of tuples. Each tuple is an observed value in
        the category and the number of times its seen.
        Format {num_samples: value,
                category: [(val1, count1), (val2, count2), ...], ...}
    """
    access_error = check_access(samp_id, user_id)
    if access_error:
        return access_error

    job_info = r_client.get(SAMPLE_TEMPLATE_KEY_FORMAT % samp_id)
    if job_info:
        job_info = loads(job_info)
        job_id = job_info['job_id']
        if job_id:
            redis_info = loads(r_client.get(job_id))
            processing = redis_info['status_msg'] == 'Running'
            if processing:
                alert_type = 'info'
                alert_msg = 'This sample template is currently being processed'
            elif redis_info['status_msg'] == 'Success':
                alert_type = redis_info['return']['status']
                alert_msg = redis_info['return']['message'].replace(
                    '\n', '</br>')
                payload = {
                    'job_id': None,
                    'status': alert_type,
                    'message': alert_msg
                }
                r_client.set(SAMPLE_TEMPLATE_KEY_FORMAT % samp_id,
                             dumps(payload))
            else:
                alert_type = redis_info['return']['status']
                alert_msg = redis_info['return']['message'].replace(
                    '\n', '</br>')
        else:
            processing = False
            alert_type = job_info['status']
            alert_msg = job_info['message'].replace('\n', '</br>')
    else:
        processing = False
        alert_type = ''
        alert_msg = ''

    exists = _check_sample_template_exists(int(samp_id))
    if exists['status'] != 'success':
        return {
            'status': 'success',
            'message': '',
            'num_samples': 0,
            'num_columns': 0,
            'editable': not processing,
            'alert_type': alert_type,
            'alert_message': alert_msg,
            'stats': {}
        }

    template = SampleTemplate(int(samp_id))

    df = template.to_dataframe()

    editable = (Study(template.study_id).can_edit(User(user_id))
                and not processing)

    out = {
        'status': 'success',
        'message': '',
        'num_samples': df.shape[0],
        'num_columns': df.shape[1],
        'editable': editable,
        'alert_type': alert_type,
        'alert_message': alert_msg,
        'stats': {}
    }

    # drop the samp_id column if it exists
    if 'study_id' in df.columns:
        df.drop('study_id', axis=1, inplace=True)
    for column in df.columns:
        counts = df[column].value_counts()
        out['stats'][str(column)] = [(str(key), counts[key])
                                     for key in natsorted(counts.index)]

    return out