def delete_sample_template(self, study, user, callback):
        """Delete sample template

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        sample_template_id = int(self.get_argument('sample_template_id'))

        try:
            SampleTemplate.delete(sample_template_id)
            msg = ("Sample template %d has been deleted from study: "
                   "<b><i>%s</i></b>" % (sample_template_id, study.title))
            msg_level = "success"
        except Exception as e:
            msg = "Couldn't remove %d sample template: %s" % (
                sample_template_id, str(e))
            msg_level = "danger"

        callback((msg, msg_level, 'study_information_tab', None, None))
Example #2
0
    def delete_sample_template(self, study, user, callback):
        """Delete sample template

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        sample_template_id = int(self.get_argument('sample_template_id'))

        try:
            SampleTemplate.delete(sample_template_id)
            msg = ("Sample template %d has been deleted from study: "
                   "<b><i>%s</i></b>" % (sample_template_id, study.title))
            msg_level = "success"
        except Exception as e:
            msg = "Couldn't remove %d sample template: %s" % (
                sample_template_id, str(e))
            msg_level = "danger"

        callback((msg, msg_level, 'study_information_tab', None, None))
Example #3
0
def sample_template_columns_get_req(study_id, column, user):
    """Returns the columns of the sample template

    Parameters
    ----------
    study_id: int
        The study to retrieve the sample information summary
    column: str
        The column of interest, if None send all columns
    user: qiita_db.user
        The user performing the request

    Returns
    -------
    list of str
        The result of the search

    Raises
    ------
    HTTPError
        404 If the sample template doesn't exist
    """
    # Check if the current user has access to the study and if the sample
    # template exists
    sample_template_checks(study_id, user, check_exists=True)

    if column is None:
        reply = SampleTemplate(study_id).categories()
    else:
        reply = list(SampleTemplate(study_id).get_category(column).values())

    return reply
    def update_sample_template(self, study, user, callback):
        """Update a sample template from the POST method

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done

        Raises
        ------
        HTTPError
            If the sample template file does not exists
        """
        # If we are on this function, the argument "sample_template" must
        # defined. If not, let tornado raise its error
        sample_template = self.get_argument('sample_template')

        # Define here the message and message level in case of success
        msg = "The sample template '%s' has been updated" % sample_template
        msg_level = "success"
        # Get the uploads folder
        _, base_fp = get_mountpoint("uploads")[0]
        # Get the path of the sample template in the uploads folder
        fp_rsp = join(base_fp, str(study.id), sample_template)

        if not exists(fp_rsp):
            # The file does not exist, fail nicely
            raise HTTPError(400, "This file doesn't exist: %s" % fp_rsp)
        try:
            with warnings.catch_warnings(record=True) as warns:
                # deleting previous uploads and inserting new one
                st = SampleTemplate(study.id)
                df = load_template_to_dataframe(fp_rsp)
                st.extend(df)
                st.update(df)
                remove(fp_rsp)

                # join all the warning messages into one. Note that this info
                # will be ignored if an exception is raised
                if warns:
                    msg = '\n'.join(set(str(w.message) for w in warns))
                    msg_level = 'warning'

        except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                QiitaDBDuplicateError, IOError, ValueError, KeyError,
                CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e:
            # Some error occurred while processing the sample template
            # Show the error to the user so they can fix the template
            msg = html_error_message % ('updating the sample template:',
                                        basename(fp_rsp), str(e))
            msg = convert_text_html(msg)
            msg_level = "danger"
        callback((msg, msg_level, None, None, None))
Example #5
0
def sample_template_overview_handler_get_request(study_id, user):
    # Check if the current user has access to the sample template
    sample_template_checks(study_id, user)

    # Check if the sample template exists
    exists = SampleTemplate.exists(study_id)

    # The following information should always be provided:
    # The files that have been uploaded to the system and can be a
    # sample template file
    files = [f for _, f, _ in get_files_from_uploads_folders(study_id)
             if f.endswith(('txt', 'tsv', 'xlsx'))]
    # If there is a job associated with the sample information, the job id
    job = None
    job_info = r_client.get(SAMPLE_TEMPLATE_KEY_FORMAT % study_id)
    if job_info:
        job = loads(job_info)['job_id']

    # Specific information if it exists or not:
    data_types = []
    st_fp_id = None
    old_files = []
    num_samples = 0
    num_cols = 0
    if exists:
        # If it exists we need to provide:
        # The id of the sample template file so the user can download it and
        # the list of old filepaths
        st = SampleTemplate(study_id)
        all_st_files = st.get_filepaths()
        # The current sample template file is the first one in the list
        # (pop(0)) and we are interested only in the id ([0])
        st_fp_id = all_st_files.pop(0)[0]
        # For the old filepaths we are only interested in their basename
        old_files = [basename(fp) for _, fp in all_st_files]
        # The number of samples - this is a space efficient way of counting
        # the number of samples. Doing len(list(st.keys())) creates a list
        # that we are not using
        num_samples = sum(1 for _ in st.keys())
        # The number of columns
        num_cols = len(st.categories())
    else:
        # It doesn't exist, we also need to provide the data_types in case
        # the user uploads a QIIME mapping file
        data_types = sorted(data_types_get_req()['data_types'])

    return {'exists': exists,
            'uploaded_files': files,
            'data_types': data_types,
            'user_can_edit': Study(study_id).can_edit(user),
            'job': job,
            'download_id': st_fp_id,
            'old_files': old_files,
            'num_samples': num_samples,
            'num_columns': num_cols}
Example #6
0
    def tearDown(self):
        for fp in self._clean_up_files:
            if exists(fp):
                remove(fp)

        study_id = self.new_study.id
        for pt in self.new_study.prep_templates():
            PrepTemplate.delete(pt.id)
        if SampleTemplate.exists(study_id):
            SampleTemplate.delete(study_id)
        Study.delete(study_id)
Example #7
0
    def tearDown(self):
        for fp in self._clean_up_files:
            if exists(fp):
                remove(fp)

        study_id = self.new_study.id
        for pt in self.new_study.prep_templates():
            PrepTemplate.delete(pt.id)
        if SampleTemplate.exists(study_id):
            SampleTemplate.delete(study_id)
        Study.delete(study_id)
Example #8
0
def create_sample_template(fp, study, is_mapping_file, data_type=None):
    """Creates a sample template

    Parameters
    ----------
    fp : str
        The file path to the template file
    study : qiita_db.study.Study
        The study to add the sample template to
    is_mapping_file : bool
        Whether `fp` contains a mapping file or a sample template
    data_type : str, optional
        If `is_mapping_file` is True, the data type of the prep template to be
        created

    Returns
    -------
    dict of {str: str}
        A dict of the form {'status': str, 'message': str}
    """
    # The imports need to be in here because this code is executed in
    # the ipython workers
    import warnings
    from os import remove
    from qiita_db.metadata_template.sample_template import SampleTemplate
    from qiita_db.metadata_template.util import load_template_to_dataframe
    from qiita_ware.metadata_pipeline import (
        create_templates_from_qiime_mapping_file)

    status = 'success'
    msg = ''
    try:
        with warnings.catch_warnings(record=True) as warns:
            if is_mapping_file:
                create_templates_from_qiime_mapping_file(fp, study,
                                                         data_type)
            else:
                SampleTemplate.create(load_template_to_dataframe(fp),
                                      study)
            remove(fp)

            # join all the warning messages into one. Note that this
            # info will be ignored if an exception is raised
            if warns:
                msg = '\n'.join(set(str(w.message) for w in warns))
                status = 'warning'
    except Exception as e:
        # Some error occurred while processing the sample template
        # Show the error to the user so they can fix the template
        status = 'danger'
        msg = str(e)

    return {'status': status, 'message': msg}
Example #9
0
def create_sample_template(fp, study, is_mapping_file, data_type=None):
    """Creates a sample template

    Parameters
    ----------
    fp : str
        The file path to the template file
    study : qiita_db.study.Study
        The study to add the sample template to
    is_mapping_file : bool
        Whether `fp` contains a mapping file or a sample template
    data_type : str, optional
        If `is_mapping_file` is True, the data type of the prep template to be
        created

    Returns
    -------
    dict of {str: str}
        A dict of the form {'status': str, 'message': str}
    """
    # The imports need to be in here because this code is executed in
    # the ipython workers
    import warnings
    from os import remove
    from qiita_db.metadata_template.sample_template import SampleTemplate
    from qiita_db.metadata_template.util import load_template_to_dataframe
    from qiita_ware.metadata_pipeline import (
        create_templates_from_qiime_mapping_file)

    status = 'success'
    msg = ''
    try:
        with warnings.catch_warnings(record=True) as warns:
            if is_mapping_file:
                create_templates_from_qiime_mapping_file(fp, study,
                                                         data_type)
            else:
                SampleTemplate.create(load_template_to_dataframe(fp),
                                      study)
            remove(fp)

            # join all the warning messages into one. Note that this
            # info will be ignored if an exception is raised
            if warns:
                msg = '\n'.join(set(str(w.message) for w in warns))
                status = 'warning'
    except Exception as e:
        # Some error occurred while processing the sample template
        # Show the error to the user so they can fix the template
        status = 'danger'
        msg = str(e)

    return {'status': status, 'message': msg}
Example #10
0
    def get(self, prep_template_id):
        pid = int(prep_template_id)
        pt = PrepTemplate(pid)
        sid = pt.study_id

        self._check_permissions(sid)

        st = SampleTemplate(sid)

        text = st.to_dataframe(samples=list(pt)).to_csv(None, sep='\t')

        self._finish_generate_files(
            'sample_information_from_prep_%s.tsv' % pid, text)
Example #11
0
    def display_template(self, preprocessed_data_id, msg, msg_level):
        """Simple function to avoid duplication of code"""
        preprocessed_data_id = int(preprocessed_data_id)
        try:
            preprocessed_data = Artifact(preprocessed_data_id)
        except QiitaDBUnknownIDError:
            raise HTTPError(
                404, "Artifact %d does not exist!" % preprocessed_data_id)
        else:
            user = self.current_user
            if user.level != 'admin':
                raise HTTPError(
                    403, "No permissions of admin, "
                    "get/VAMPSSubmitHandler: %s!" % user.id)

        prep_template = PrepTemplate(preprocessed_data.prep_template)
        sample_template = SampleTemplate(preprocessed_data.study)
        study = Study(preprocessed_data.study)
        stats = [('Number of samples', len(prep_template)),
                 ('Number of metadata headers',
                  len(sample_template.categories()))]

        demux = [
            path for _, path, ftype in preprocessed_data.get_filepaths()
            if ftype == 'preprocessed_demux'
        ]
        demux_length = len(demux)

        if not demux_length:
            msg = ("Study does not appear to have demultiplexed "
                   "sequences associated")
            msg_level = 'danger'
        elif demux_length > 1:
            msg = ("Study appears to have multiple demultiplexed files!")
            msg_level = 'danger'
        elif demux_length == 1:
            demux_file = demux[0]
            demux_file_stats = demux_stats(demux_file)
            stats.append(('Number of sequences', demux_file_stats.n))
            msg_level = 'success'

        self.render('vamps_submission.html',
                    study_title=study.title,
                    stats=stats,
                    message=msg,
                    study_id=study.id,
                    level=msg_level,
                    preprocessed_data_id=preprocessed_data_id)
Example #12
0
    def display_template(self,
                         study,
                         user,
                         msg,
                         msg_level,
                         full_access,
                         top_tab=None,
                         sub_tab=None,
                         prep_tab=None):
        """Simple function to avoid duplication of code"""
        study_status = study.status
        user_level = user.level
        sample_template_exists = SampleTemplate.exists(study.id)

        if sample_template_exists:
            st = SampleTemplate(study.id)
            missing_cols = st.check_restrictions(
                [SAMPLE_TEMPLATE_COLUMNS['qiita_main']])
            allow_approval = len(missing_cols) == 0
            approval_deny_msg = (
                "Processed data approval request is disabled due to missing "
                "columns in the sample template: %s" % ', '.join(missing_cols))
        else:
            allow_approval = False
            approval_deny_msg = ""

        # The general information of the study can be changed if the study is
        # not public or if the user is an admin, in which case they can always
        # modify the information of the study
        show_edit_btn = study_status != 'public' or user_level == 'admin'

        # Make the error message suitable for html
        msg = msg.replace('\n', "<br/>")

        self.render('study_description.html',
                    message=msg,
                    level=msg_level,
                    study=study,
                    study_title=study.title,
                    study_alias=study.info['study_alias'],
                    show_edit_btn=show_edit_btn,
                    show_data_tabs=sample_template_exists,
                    full_access=full_access,
                    allow_approval=allow_approval,
                    approval_deny_msg=approval_deny_msg,
                    top_tab=top_tab,
                    sub_tab=sub_tab,
                    prep_tab=prep_tab)
Example #13
0
def sample_template_checks(study_id, user, check_exists=False):
    """Performs different checks and raises errors if any of the checks fail

    Parameters
    ----------
    study_id : int
        The study id
    user : qiita_db.user.User
        The user trying to access the study
    check_exists : bool, optional
        If true, check if the sample template exists

    Raises
    ------
    HTTPError
        404 if the study does not exist
        403 if the user does not have access to the study
        404 if check_exists == True and the sample template doesn't exist
    """
    try:
        study = Study(int(study_id))
    except QiitaDBUnknownIDError:
        raise HTTPError(404, reason='Study does not exist')
    if not study.has_access(user):
        raise HTTPError(403, reason='User does not have access to study')

    # Check if the sample template exists
    if check_exists and not SampleTemplate.exists(study_id):
        raise HTTPError(404, reason="Study %s doesn't have sample information"
                        % study_id)
Example #14
0
def sample_template_checks(study_id, user, check_exists=False):
    """Performs different checks and raises errors if any of the checks fail

    Parameters
    ----------
    study_id : int
        The study id
    user : qiita_db.user.User
        The user trying to access the study
    check_exists : bool, optional
        If true, check if the sample template exists

    Raises
    ------
    HTTPError
        404 if the study does not exist
        403 if the user does not have access to the study
        404 if check_exists == True and the sample template doesn't exist
    """
    try:
        study = Study(int(study_id))
    except QiitaDBUnknownIDError:
        raise HTTPError(404, reason='Study does not exist')
    if not study.has_access(user):
        raise HTTPError(403, reason='User does not have access to study')

    # Check if the sample template exists
    if check_exists and not SampleTemplate.exists(study_id):
        raise HTTPError(404,
                        reason="Study %s doesn't have sample information" %
                        study_id)
Example #15
0
def sample_template_samples_get_req(samp_id, user_id):
    """Returns list of samples in the sample template

    Parameters
    ----------
    samp_id : int or str typecastable to int
        SampleTemplate id to get info for
    user_id : str
        User requesting the sample template info

    Returns
    -------
    dict
        Returns summary information in the form
        {'status': str,
         'message': str,
         'samples': list of str}
         samples is list of samples in the template
    """
    exists = _check_sample_template_exists(int(samp_id))
    if exists['status'] != 'success':
        return exists
    access_error = check_access(samp_id, user_id)
    if access_error:
        return access_error

    return {
        'status': 'success',
        'message': '',
        'samples': sorted(x for x in SampleTemplate(int(samp_id)))
    }
Example #16
0
    def get(self, study_id):
        sid = int(study_id)
        self._check_permissions(sid)

        self._generate_files(
            'sample_accession', SampleTemplate(sid).ebi_sample_accessions,
            'ebi_sample_accessions_study_%s.tsv' % sid)
Example #17
0
def sample_template_patch_request(user_id,
                                  req_op,
                                  req_path,
                                  req_value=None,
                                  req_from=None):
    """Modifies an attribute of the artifact

    Parameters
    ----------
    user_id : str
        The id of the user performing the patch operation
    req_op : str
        The operation to perform on the artifact
    req_path : str
        The prep information and attribute to patch
    req_value : str, optional
        The value that needs to be modified
    req_from : str, optional
        The original path of the element

    Returns
    -------
    dict of {str, str}
        A dictionary with the following keys:
        - status: str, whether if the request is successful or not
        - message: str, if the request is unsuccessful, a human readable error
    """
    if req_op == 'remove':
        req_path = [v for v in req_path.split('/') if v]

        if len(req_path) != 3:
            return {'status': 'error', 'message': 'Incorrect path parameter'}

        st_id = req_path[0]
        attribute = req_path[1]
        attr_id = req_path[2]

        # Check if the user actually has access to the template
        st = SampleTemplate(st_id)
        access_error = check_access(st.study_id, user_id)
        if access_error:
            return access_error

        # Offload the deletion of the sample or column to the cluster
        job_id = safe_submit(user_id, delete_sample_or_column, SampleTemplate,
                             int(st_id), attribute, attr_id)
        # Store the job id attaching it to the sample template id
        r_client.set(SAMPLE_TEMPLATE_KEY_FORMAT % st_id,
                     dumps({'job_id': job_id}))

        return {'status': 'success', 'message': ''}

    else:
        return {
            'status':
            'error',
            'message':
            'Operation "%s" not supported. '
            'Current supported operations: remove' % req_op
        }
Example #18
0
    def test_update_sample_template(self):
        fd, fp = mkstemp(suffix=".txt")
        close(fd)
        with open(fp, 'w') as f:
            f.write("sample_name\tnew_col\n1.SKD6.640190\tnew_value")
        self._clean_up_files.append(fp)

        job = self._create_job('update_sample_template', {
            'study': 1,
            'template_fp': fp
        })
        private_task(job.id)
        self.assertEqual(job.status, 'success')
        self.assertEqual(
            SampleTemplate(1)['1.SKD6.640190']['new_col'], 'new_value')
        obs = r_client.get("sample_template_1")
        self.assertIsNotNone(obs)
        obs = loads(obs)
        self.assertCountEqual(obs, ['job_id', 'alert_type', 'alert_msg'])
        self.assertEqual(obs['job_id'], job.id)
        self.assertEqual(obs['alert_type'], 'warning')
        self.assertIn(
            'The following columns have been added to the existing '
            'template: new_col', obs['alert_msg'])
        # making sure that the error name is not in the messages
        self.assertNotIn('QiitaDBWarning', obs['alert_msg'])
Example #19
0
    def test_delete_sample_template(self):
        # Error case
        job = self._create_job('delete_sample_template', {'study': 1})
        private_task(job.id)
        self.assertEqual(job.status, 'error')
        self.assertIn(
            "Sample template cannot be erased because there are "
            "prep templates associated", job.log.msg)

        # Success case
        info = {
            "timeseries_type_id": '1',
            "metadata_complete": 'true',
            "mixs_compliant": 'true',
            "number_samples_collected": 25,
            "number_samples_promised": 28,
            "study_alias": "TDST",
            "study_description": "Test delete sample template",
            "study_abstract": "Test delete sample template",
            "principal_investigator_id": StudyPerson(1)
        }
        study = Study.create(User('*****@*****.**'),
                             "Delete Sample Template test", info)
        metadata = pd.DataFrame.from_dict(
            {
                'Sample1': {
                    'physical_specimen_location': 'location1',
                    'physical_specimen_remaining': 'true',
                    'dna_extracted': 'true',
                    'sample_type': 'type1',
                    'collection_timestamp': '2014-05-29 12:24:15',
                    'host_subject_id': 'NotIdentified',
                    'Description': 'Test Sample 1',
                    'latitude': '42.42',
                    'longitude': '41.41',
                    'taxon_id': '9606',
                    'scientific_name': 'h**o sapiens'
                }
            },
            orient='index',
            dtype=str)
        SampleTemplate.create(metadata, study)

        job = self._create_job('delete_sample_template', {'study': study.id})
        private_task(job.id)
        self.assertEqual(job.status, 'success')
        self.assertFalse(SampleTemplate.exists(study.id))
Example #20
0
    def display_template(self, preprocessed_data_id, msg, msg_level):
        """Simple function to avoid duplication of code"""
        preprocessed_data_id = int(preprocessed_data_id)
        try:
            preprocessed_data = Artifact(preprocessed_data_id)
        except QiitaDBUnknownIDError:
            raise HTTPError(404, "Artifact %d does not exist!" % preprocessed_data_id)
        else:
            user = self.current_user
            if user.level != "admin":
                raise HTTPError(403, "No permissions of admin, " "get/VAMPSSubmitHandler: %s!" % user.id)

        prep_template = PrepTemplate(preprocessed_data.prep_template)
        sample_template = SampleTemplate(preprocessed_data.study)
        study = Study(preprocessed_data.study)
        stats = [
            ("Number of samples", len(prep_template)),
            ("Number of metadata headers", len(sample_template.categories())),
        ]

        demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == "preprocessed_demux"]
        demux_length = len(demux)

        if not demux_length:
            msg = "Study does not appear to have demultiplexed " "sequences associated"
            msg_level = "danger"
        elif demux_length > 1:
            msg = "Study appears to have multiple demultiplexed files!"
            msg_level = "danger"
        elif demux_length == 1:
            demux_file = demux[0]
            demux_file_stats = demux_stats(demux_file)
            stats.append(("Number of sequences", demux_file_stats.n))
            msg_level = "success"

        self.render(
            "vamps_submission.html",
            study_title=study.title,
            stats=stats,
            message=msg,
            study_id=study.id,
            level=msg_level,
            preprocessed_data_id=preprocessed_data_id,
        )
Example #21
0
 def get(self, message="", msg_level=None):
     all_emails_except_current = yield Task(self._get_all_emails)
     all_emails_except_current.remove(self.current_user.id)
     avail_meta = SampleTemplate.metadata_headers() +\
         get_table_cols("study")
     self.render('list_studies.html',
                 availmeta=avail_meta,
                 all_emails_except_current=all_emails_except_current,
                 message=message,
                 msg_level=msg_level)
 def get(self, message="", msg_level=None):
     all_emails_except_current = yield Task(self._get_all_emails)
     all_emails_except_current.remove(self.current_user.id)
     avail_meta = SampleTemplate.metadata_headers() +\
         get_table_cols("study")
     self.render('list_studies.html',
                 availmeta=avail_meta,
                 all_emails_except_current=all_emails_except_current,
                 message=message,
                 msg_level=msg_level)
Example #23
0
def sample_template_summary_get_req(study_id, user):
    """Returns a summary of the sample template metadata columns

    Parameters
    ----------
    study_id: int
        The study to retrieve the sample information summary
    user: qiita_db.user
        The user performing the request

    Returns
    -------
    dict of {str: object}
        Keys are metadata categories and the values are list of tuples. Each
        tuple is an observed value in the category and the number of times
        it's seen.

    Raises
    ------
    HTTPError
        404 If the sample template doesn't exist
    """
    # Check if the current user has access to the study and if the sample
    # template exists
    sample_template_checks(study_id, user, check_exists=True)

    st = SampleTemplate(study_id)
    df = st.to_dataframe()

    # Drop the study_id column if it exists
    if 'study_id' in df.columns:
        df.drop('study_id', axis=1, inplace=True)

    res = {}
    for column in df.columns:
        counts = df[column].value_counts()
        res[str(column)] = [(str(key), counts[key])
                            for key in natsorted(
                                counts.index,
                                key=lambda x: unicode(x, errors='ignore'))]

    return res
Example #24
0
def sample_template_summary_get_req(study_id, user):
    """Returns a summary of the sample template metadata columns

    Parameters
    ----------
    study_id: int
        The study to retrieve the sample information summary
    user: qiita_db.user
        The user performing the request

    Returns
    -------
    dict of {str: object}
        Keys are metadata categories and the values are list of tuples. Each
        tuple is an observed value in the category and the number of times
        it's seen.

    Raises
    ------
    HTTPError
        404 If the sample template doesn't exist
    """
    # Check if the current user has access to the study and if the sample
    # template exists
    sample_template_checks(study_id, user, check_exists=True)

    st = SampleTemplate(study_id)
    df = st.to_dataframe()

    # Drop the study_id column if it exists
    if 'study_id' in df.columns:
        df.drop('study_id', axis=1, inplace=True)

    res = {}
    for column in df.columns:
        counts = df[column].value_counts()
        res[str(column)] = [(str(key), counts[key])
                            for key in natsorted(
                                counts.index,
                                key=lambda x: unicode(x, errors='ignore'))]

    return res
Example #25
0
def update_sample_template(study_id, fp):
    """Updates a sample template

    Parameters
    ----------
    study_id : int
        Study id whose template is going to be updated
    fp : str
        The file path to the template file

    Returns
    -------
    dict of {str: str}
        A dict of the form {'status': str, 'message': str}
    """
    import warnings
    from os import remove
    from qiita_db.metadata_template.util import load_template_to_dataframe
    from qiita_db.metadata_template.sample_template import SampleTemplate

    msg = ''
    status = 'success'

    try:
        with warnings.catch_warnings(record=True) as warns:
            # deleting previous uploads and inserting new one
            st = SampleTemplate(study_id)
            df = load_template_to_dataframe(fp)
            st.extend(df)
            st.update(df)
            remove(fp)

            # join all the warning messages into one. Note that this info
            # will be ignored if an exception is raised
            if warns:
                msg = '\n'.join(set(str(w.message) for w in warns))
                status = 'warning'
    except Exception as e:
            status = 'danger'
            msg = str(e)

    return {'status': status, 'message': msg}
    def display_template(self, study, user, msg, msg_level, full_access,
                         top_tab=None, sub_tab=None, prep_tab=None):
        """Simple function to avoid duplication of code"""
        study_status = study.status
        user_level = user.level
        sample_template_exists = SampleTemplate.exists(study.id)

        if sample_template_exists:
            st = SampleTemplate(study.id)
            missing_cols = st.check_restrictions(
                [SAMPLE_TEMPLATE_COLUMNS['qiita_main']])
            allow_approval = len(missing_cols) == 0
            approval_deny_msg = (
                "Processed data approval request is disabled due to missing "
                "columns in the sample template: %s" % ', '.join(missing_cols))
        else:
            allow_approval = False
            approval_deny_msg = ""

        # The general information of the study can be changed if the study is
        # not public or if the user is an admin, in which case they can always
        # modify the information of the study
        show_edit_btn = study_status != 'public' or user_level == 'admin'

        # Make the error message suitable for html
        msg = msg.replace('\n', "<br/>")

        self.render('study_description.html',
                    message=msg,
                    level=msg_level,
                    study=study,
                    study_title=study.title,
                    study_alias=study.info['study_alias'],
                    show_edit_btn=show_edit_btn,
                    show_data_tabs=sample_template_exists,
                    full_access=full_access,
                    allow_approval=allow_approval,
                    approval_deny_msg=approval_deny_msg,
                    top_tab=top_tab,
                    sub_tab=sub_tab,
                    prep_tab=prep_tab)
Example #27
0
    def test_get_lat_longs_EMP_portal(self):
        info = {
            'timeseries_type_id': 1,
            'lab_person_id': None,
            'principal_investigator_id': 3,
            'metadata_complete': False,
            'mixs_compliant': True,
            'study_description': 'desc',
            'study_alias': 'alias',
            'study_abstract': 'abstract'}

        study = Study.create(User('*****@*****.**'), 'test_study_1', efo=[1],
                             info=info)
        Portal('EMP').add_studies([study.id])

        md = {
            'my.sample': {
                'physical_specimen_location': 'location1',
                'physical_specimen_remaining': True,
                'dna_extracted': True,
                'sample_type': 'type1',
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 4',
                'str_column': 'Value for sample 4',
                'int_column': 4,
                'latitude': 42.42,
                'longitude': 41.41,
                'taxon_id': 9606,
                'scientific_name': 'h**o sapiens'}
        }

        md_ext = pd.DataFrame.from_dict(md, orient='index')
        SampleTemplate.create(md_ext, study)

        qiita_config.portal = 'EMP'

        obs = get_lat_longs()
        exp = [[42.42, 41.41]]

        self.assertItemsEqual(obs, exp)
Example #28
0
def sample_template_filepaths_get_req(study_id, user_id):
    """Returns all the filepaths attached to the sample template

    Parameters
    ----------
    study_id : int
        The current study object id
    user_id : str
        The current user object id

    Returns
    -------
    dict
        Filepaths in the form
        {'status': status,
         'message': msg,
         'filepaths': filepaths}
        status can be success, warning, or error depending on result
        message has the warnings or errors
        filepaths is a list of tuple of int and str
        All files in the sample template, as [(id, URL), ...]
    """
    exists = _check_sample_template_exists(int(study_id))
    if exists['status'] != 'success':
        return exists
    access_error = check_access(study_id, user_id)
    if access_error:
        return access_error

    try:
        template = SampleTemplate(int(study_id))
    except QiitaDBUnknownIDError as e:
        return {'status': 'error', 'message': str(e)}

    return {
        'status': 'success',
        'message': '',
        'filepaths': template.get_filepaths()
    }
Example #29
0
def sample_template_overview_handler_get_request(study_id, user):
    # Check if the current user has access to the sample template
    sample_template_checks(study_id, user)

    # Check if the sample template exists
    exists = SampleTemplate.exists(study_id)

    # The following information should always be provided:
    # The files that have been uploaded to the system and can be a
    # sample template file
    files = [
        f for _, f in get_files_from_uploads_folders(study_id)
        if f.endswith(('txt', 'tsv'))
    ]
    # If there is a job associated with the sample information, the job id
    job = None
    job_info = r_client.get(SAMPLE_TEMPLATE_KEY_FORMAT % study_id)
    if job_info:
        job = loads(job_info)['job_id']

    # Specific information if it exists or not:
    data_types = []
    st_fp_id = None
    old_files = []
    num_samples = 0
    num_cols = 0
    if exists:
        # If it exists we need to provide:
        # The id of the sample template file so the user can download it and
        # the list of old filepaths
        st = SampleTemplate(study_id)
        all_st_files = st.get_filepaths()
        # The current sample template file is the first one in the list
        # (pop(0)) and we are interested only in the id ([0])
        st_fp_id = all_st_files.pop(0)[0]
        # For the old filepaths we are only interested in their basename
        old_files = [basename(fp) for _, fp in all_st_files]
        # The number of samples - this is a space efficient way of counting
        # the number of samples. Doing len(list(st.keys())) creates a list
        # that we are not using
        num_samples = sum(1 for _ in st.keys())
        # The number of columns
        num_cols = len(st.categories())
    else:
        # It doesn't exist, we also need to provide the data_types in case
        # the user uploads a QIIME mapping file
        data_types = sorted(data_types_get_req()['data_types'])

    return {
        'exists': exists,
        'uploaded_files': files,
        'data_types': data_types,
        'user_can_edit': Study(study_id).can_edit(user),
        'job': job,
        'download_id': st_fp_id,
        'old_files': old_files,
        'num_samples': num_samples,
        'num_columns': num_cols
    }
Example #30
0
    def test_delete_sample_template(self):
        # Error case
        job = self._create_job('delete_sample_template', {'study': 1})
        private_task(job.id)
        self.assertEqual(job.status, 'error')
        self.assertIn("Sample template cannot be erased because there are "
                      "prep templates associated", job.log.msg)

        # Success case
        info = {"timeseries_type_id": '1',
                "metadata_complete": 'true',
                "mixs_compliant": 'true',
                "number_samples_collected": 25,
                "number_samples_promised": 28,
                "study_alias": "TDST",
                "study_description": "Test delete sample template",
                "study_abstract": "Test delete sample template",
                "principal_investigator_id": StudyPerson(1)}
        study = Study.create(User('*****@*****.**'),
                             "Delete Sample Template test", info)
        metadata = pd.DataFrame.from_dict(
            {'Sample1': {'physical_specimen_location': 'location1',
                         'physical_specimen_remaining': 'true',
                         'dna_extracted': 'true',
                         'sample_type': 'type1',
                         'collection_timestamp': '2014-05-29 12:24:15',
                         'host_subject_id': 'NotIdentified',
                         'Description': 'Test Sample 1',
                         'latitude': '42.42',
                         'longitude': '41.41',
                         'taxon_id': '9606',
                         'scientific_name': 'h**o sapiens'}},
            orient='index', dtype=str)
        SampleTemplate.create(metadata, study)

        job = self._create_job('delete_sample_template', {'study': study.id})
        private_task(job.id)
        self.assertEqual(job.status, 'success')
        self.assertFalse(SampleTemplate.exists(study.id))
Example #31
0
def sample_template_filepaths_get_req(study_id, user_id):
    """Returns all the filepaths attached to the sample template

    Parameters
    ----------
    study_id : int
        The current study object id
    user_id : str
        The current user object id

    Returns
    -------
    dict
        Filepaths in the form
        {'status': status,
         'message': msg,
         'filepaths': filepaths}
        status can be success, warning, or error depending on result
        message has the warnings or errors
        filepaths is a list of tuple of int and str
        All files in the sample template, as [(id, URL), ...]
    """
    exists = _check_sample_template_exists(int(study_id))
    if exists['status'] != 'success':
        return exists
    access_error = check_access(study_id, user_id)
    if access_error:
        return access_error

    try:
        template = SampleTemplate(int(study_id))
    except QiitaDBUnknownIDError as e:
        return {'status': 'error',
                'message': str(e)}

    return {'status': 'success',
            'message': '',
            'filepaths': template.get_filepaths()
            }
Example #32
0
def sample_template_get_req(samp_id, user_id):
    """Gets the json of the full sample template

    Parameters
    ----------
    samp_id : int or int castable string
        SampleTemplate id to get info for
    user_id : str
        User requesting the sample template info

    Returns
    -------
    dict of objects
        {'status': status,
         'message': msg,
         'template': dict of {str: {str: object, ...}, ...}

        template is dictionary where the keys access_error the metadata samples
        and the values are a dictionary of column and value.
        Format {sample: {column: value, ...}, ...}
    """
    exists = _check_sample_template_exists(int(samp_id))
    if exists['status'] != 'success':
        return exists
    access_error = check_access(int(samp_id), user_id)
    if access_error:
        return access_error

    template = SampleTemplate(int(samp_id))
    access_error = check_access(template.study_id, user_id)
    if access_error:
        return access_error
    df = template.to_dataframe()
    return {
        'status': 'success',
        'message': '',
        'template': df.to_dict(orient='index')
    }
Example #33
0
    def update_sample_template(self, study, user, callback):
        """Update a sample template from the POST method

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done

        Raises
        ------
        HTTPError
            If the sample template file does not exists
        """
        # If we are on this function, the argument "sample_template" must
        # defined. If not, let tornado raise its error
        sample_template = self.get_argument('sample_template')

        # Define here the message and message level in case of success
        msg = "The sample template '%s' has been updated" % sample_template
        msg_level = "success"
        # Get the uploads folder
        _, base_fp = get_mountpoint("uploads")[0]
        # Get the path of the sample template in the uploads folder
        fp_rsp = join(base_fp, str(study.id), sample_template)

        if not exists(fp_rsp):
            # The file does not exist, fail nicely
            raise HTTPError(400, "This file doesn't exist: %s" % fp_rsp)
        try:
            with warnings.catch_warnings(record=True) as warns:
                # deleting previous uploads and inserting new one
                st = SampleTemplate(study.id)
                df = load_template_to_dataframe(fp_rsp)
                st.extend(df)
                st.update(df)
                remove(fp_rsp)

                # join all the warning messages into one. Note that this info
                # will be ignored if an exception is raised
                if warns:
                    msg = '\n'.join(set(str(w.message) for w in warns))
                    msg_level = 'warning'

        except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                QiitaDBDuplicateError, IOError, ValueError, KeyError,
                CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e:
            # Some error occurred while processing the sample template
            # Show the error to the user so they can fix the template
            msg = html_error_message % ('updating the sample template:',
                                        basename(fp_rsp), str(e))
            msg = convert_text_html(msg)
            msg_level = "danger"
        callback((msg, msg_level, None, None, None))
Example #34
0
def sample_template_category_get_req(category, samp_id, user_id):
    """Returns dict of values for each sample in the given category

    Parameters
    ----------
    category : str
        Metadata category to get values for
    samp_id : int or str typecastable to int
        SampleTemplate id to get info for
    user_id : str
        User requesting the sample template info

    Returns
    -------
    dict
        Returns information in the form
        {'status': str,
         'message': str,
         'values': dict of {str: object}}
    """
    exists = _check_sample_template_exists(int(samp_id))
    if exists['status'] != 'success':
        return exists
    access_error = check_access(samp_id, user_id)
    if access_error:
        return access_error

    st = SampleTemplate(int(samp_id))
    try:
        values = st.get_category(category)
    except QiitaDBColumnError:
        return {
            'status': 'error',
            'message':
            'Category %s does not exist in sample template' % category
        }
    return {'status': 'success', 'message': '', 'values': values}
Example #35
0
def sample_template_category_get_req(category, samp_id, user_id):
    """Returns dict of values for each sample in the given category

    Parameters
    ----------
    category : str
        Metadata category to get values for
    samp_id : int or str typecastable to int
        SampleTemplate id to get info for
    user_id : str
        User requesting the sample template info

    Returns
    -------
    dict
        Returns information in the form
        {'status': str,
         'message': str,
         'values': dict of {str: object}}
    """
    exists = _check_sample_template_exists(int(samp_id))
    if exists['status'] != 'success':
        return exists
    access_error = check_access(samp_id, user_id)
    if access_error:
        return access_error

    st = SampleTemplate(int(samp_id))
    try:
        values = st.get_category(category)
    except QiitaDBColumnError:
        return {'status': 'error',
                'message': 'Category %s does not exist in sample template' %
                category}
    return {'status': 'success',
            'message': '',
            'values': values}
Example #36
0
def delete_sample_template(study_id):
    """Delete a sample template

    Parameters
    ----------
    study_id : int
        Study id whose template is going to be deleted

    Returns
    -------
    dict of {str: str}
        A dict of the form {'status': str, 'message': str}
    """
    from qiita_db.metadata_template.sample_template import SampleTemplate

    msg = ''
    status = 'success'
    try:
        SampleTemplate.delete(study_id)
    except Exception as e:
        status = 'danger'
        msg = str(e)

    return {'status': status, 'message': msg}
Example #37
0
def delete_sample_template(study_id):
    """Delete a sample template

    Parameters
    ----------
    study_id : int
        Study id whose template is going to be deleted

    Returns
    -------
    dict of {str: str}
        A dict of the form {'status': str, 'message': str}
    """
    from qiita_db.metadata_template.sample_template import SampleTemplate

    msg = ''
    status = 'success'
    try:
        SampleTemplate.delete(study_id)
    except Exception as e:
        status = 'danger'
        msg = str(e)

    return {'status': status, 'message': msg}
Example #38
0
def sample_template_get_req(samp_id, user_id):
    """Gets the json of the full sample template

    Parameters
    ----------
    samp_id : int or int castable string
        SampleTemplate id to get info for
    user_id : str
        User requesting the sample template info

    Returns
    -------
    dict of objects
        {'status': status,
         'message': msg,
         'template': dict of {str: {str: object, ...}, ...}

        template is dictionary where the keys access_error the metadata samples
        and the values are a dictionary of column and value.
        Format {sample: {column: value, ...}, ...}
    """
    exists = _check_sample_template_exists(int(samp_id))
    if exists['status'] != 'success':
        return exists
    access_error = check_access(int(samp_id), user_id)
    if access_error:
        return access_error

    template = SampleTemplate(int(samp_id))
    access_error = check_access(template.study_id, user_id)
    if access_error:
        return access_error
    df = template.to_dataframe()
    return {'status': 'success',
            'message': '',
            'template': df.to_dict(orient='index')}
Example #39
0
def _check_sample_template_exists(samp_id):
    """Make sure a sample template exists in the system

    Parameters
    ----------
    samp_id : int or str castable to int
        SampleTemplate id to check

    Returns
    -------
    dict
        {'status': status,
         'message': msg}
    """
    if not SampleTemplate.exists(int(samp_id)):
        return {
            'status': 'error',
            'message': 'Sample template %d does not exist' % int(samp_id)
        }
    return {'status': 'success', 'message': ''}
Example #40
0
def _check_sample_template_exists(samp_id):
    """Make sure a sample template exists in the system

    Parameters
    ----------
    samp_id : int or str castable to int
        SampleTemplate id to check

    Returns
    -------
    dict
        {'status': status,
         'message': msg}
    """
    if not SampleTemplate.exists(int(samp_id)):
        return {'status': 'error',
                'message': 'Sample template %d does not exist' % int(samp_id)
                }
    return {'status': 'success',
            'message': ''}
Example #41
0
def update_sample_template(study_id, fp):
    """Updates a sample template

    Parameters
    ----------
    study_id : int
        Study id whose template is going to be updated
    fp : str
        The file path to the template file

    Returns
    -------
    dict of {str: str}
        A dict of the form {'status': str, 'message': str}
    """
    import warnings
    from os import remove
    from qiita_db.metadata_template.util import load_template_to_dataframe
    from qiita_db.metadata_template.sample_template import SampleTemplate

    msg = ''
    status = 'success'

    try:
        with warnings.catch_warnings(record=True) as warns:
            # deleting previous uploads and inserting new one
            st = SampleTemplate(study_id)
            df = load_template_to_dataframe(fp)
            st.extend(df)
            st.update(df)
            remove(fp)

            # join all the warning messages into one. Note that this info
            # will be ignored if an exception is raised
            if warns:
                msg = '\n'.join(set(str(w.message) for w in warns))
                status = 'warning'
    except Exception as e:
            status = 'danger'
            msg = str(e)

    return {'status': status, 'message': msg}
Example #42
0
    def test_delete_sample_or_column(self):
        st = SampleTemplate(1)

        # Delete a sample template column
        job = self._create_job(
            'delete_sample_or_column', {
                'obj_class': 'SampleTemplate',
                'obj_id': 1,
                'sample_or_col': 'columns',
                'name': 'season_environment'
            })
        private_task(job.id)
        self.assertEqual(job.status, 'success')
        self.assertNotIn('season_environment', st.categories())

        # Delete a sample template sample - need to add one
        # sample that we will remove
        npt.assert_warns(
            QiitaDBWarning, st.extend,
            pd.DataFrame.from_dict({'Sample1': {
                'taxon_id': '9606'
            }},
                                   orient='index',
                                   dtype=str))
        self.assertIn('1.Sample1', st.keys())
        job = self._create_job(
            'delete_sample_or_column', {
                'obj_class': 'SampleTemplate',
                'obj_id': 1,
                'sample_or_col': 'samples',
                'name': '1.Sample1'
            })
        private_task(job.id)
        self.assertEqual(job.status, 'success')
        self.assertNotIn('1.Sample1', st.keys())

        # Delete a prep template column
        pt = PrepTemplate(1)
        job = self._create_job(
            'delete_sample_or_column', {
                'obj_class': 'PrepTemplate',
                'obj_id': 1,
                'sample_or_col': 'columns',
                'name': 'target_subfragment'
            })
        private_task(job.id)
        self.assertEqual(job.status, 'success')
        self.assertNotIn('target_subfragment', pt.categories())

        # Delete a prep template sample
        metadata = pd.DataFrame.from_dict(
            {
                '1.SKB8.640193': {
                    'barcode': 'GTCCGCAAGTTA',
                    'primer': 'GTGCCAGCMGCCGCGGTAA'
                },
                '1.SKD8.640184': {
                    'barcode': 'CGTAGAGCTCTC',
                    'primer': 'GTGCCAGCMGCCGCGGTAA'
                }
            },
            orient='index',
            dtype=str)
        pt = npt.assert_warns(QiitaDBWarning, PrepTemplate.create, metadata,
                              Study(1), "16S")
        job = self._create_job(
            'delete_sample_or_column', {
                'obj_class': 'PrepTemplate',
                'obj_id': pt.id,
                'sample_or_col': 'samples',
                'name': '1.SKD8.640184'
            })
        private_task(job.id)
        self.assertNotIn('1.SKD8.640184', pt.keys())

        # Test exceptions
        job = self._create_job(
            'delete_sample_or_column', {
                'obj_class': 'UnknownClass',
                'obj_id': 1,
                'sample_or_col': 'columns',
                'name': 'column'
            })
        private_task(job.id)
        self.assertEqual(job.status, 'error')
        self.assertIn(
            'Unknown value "UnknownClass". Choose between '
            '"SampleTemplate" and "PrepTemplate"', job.log.msg)

        job = self._create_job(
            'delete_sample_or_column', {
                'obj_class': 'SampleTemplate',
                'obj_id': 1,
                'sample_or_col': 'unknown',
                'name': 'column'
            })
        private_task(job.id)
        self.assertEqual(job.status, 'error')
        self.assertIn(
            'Unknown value "unknown". Choose between "samples" '
            'and "columns"', job.log.msg)
Example #43
0
    def get(self):
        data = self.get_argument("data", None)
        study_id = self.get_argument("study_id",  None)
        prep_id = self.get_argument("prep_id",  None)
        data_type = self.get_argument("data_type",  None)
        dtypes = get_data_types().keys()

        templates = ['sample_information', 'prep_information']
        valid_data = ['raw', 'biom'] + templates

        to_download = []
        if data is None or (study_id is None and prep_id is None) or \
                data not in valid_data:
            raise HTTPError(422, reason='You need to specify both data (the '
                            'data type you want to download - %s) and '
                            'study_id or prep_id' % '/'.join(valid_data))
        elif data_type is not None and data_type not in dtypes:
            raise HTTPError(422, reason='Not a valid data_type. Valid types '
                            'are: %s' % ', '.join(dtypes))
        elif data in templates and prep_id is None and study_id is None:
            raise HTTPError(422, reason='If downloading a sample or '
                            'preparation file you need to define study_id or'
                            ' prep_id')
        elif data in templates:
            if data_type is not None:
                raise HTTPError(422, reason='If requesting an information '
                                'file you cannot specify the data_type')
            elif prep_id is not None and data == 'prep_information':
                fname = 'preparation_information_%s' % prep_id
                prep_id = int(prep_id)
                try:
                    infofile = PrepTemplate(prep_id)
                except QiitaDBUnknownIDError:
                    raise HTTPError(
                        422, reason='Preparation information does not exist')
            elif study_id is not None and data == 'sample_information':
                fname = 'sample_information_%s' % study_id
                study_id = int(study_id)
                try:
                    infofile = SampleTemplate(study_id)
                except QiitaDBUnknownIDError:
                    raise HTTPError(
                        422, reason='Sample information does not exist')
            else:
                raise HTTPError(422, reason='Review your parameters, not a '
                                'valid combination')
            x = retrieve_filepaths(
                infofile._filepath_table, infofile._id_column, infofile.id,
                sort='descending')[0]

            basedir_len = len(get_db_files_base_dir()) + 1
            fp = x['fp'][basedir_len:]
            to_download.append((fp, fp, str(x['checksum']), str(x['fp_size'])))
            self._write_nginx_file_list(to_download)

            zip_fn = '%s_%s.zip' % (
                fname, datetime.now().strftime('%m%d%y-%H%M%S'))
            self._set_nginx_headers(zip_fn)
        else:
            study_id = int(study_id)
            try:
                study = Study(study_id)
            except QiitaDBUnknownIDError:
                raise HTTPError(422, reason='Study does not exist')
            else:
                public_raw_download = study.public_raw_download
                if study.status != 'public':
                    raise HTTPError(404, reason='Study is not public. If this '
                                    'is a mistake contact: '
                                    '*****@*****.**')
                elif data == 'raw' and not public_raw_download:
                    raise HTTPError(422, reason='No raw data access. If this '
                                    'is a mistake contact: '
                                    '*****@*****.**')
                else:
                    # raw data
                    artifacts = [a for a in study.artifacts(dtype=data_type)
                                 if not a.parents]
                    # bioms
                    if data == 'biom':
                        artifacts = study.artifacts(
                            dtype=data_type, artifact_type='BIOM')
                    for a in artifacts:
                        if a.visibility != 'public':
                            continue
                        to_download.extend(self._list_artifact_files_nginx(a))

                if not to_download:
                    raise HTTPError(422, reason='Nothing to download. If '
                                    'this is a mistake contact: '
                                    '*****@*****.**')
                else:
                    self._write_nginx_file_list(to_download)

                    zip_fn = 'study_%d_%s_%s.zip' % (
                        study_id, data, datetime.now().strftime(
                            '%m%d%y-%H%M%S'))

                    self._set_nginx_headers(zip_fn)

        self.finish()
    def render(self, study):
        study_info = study.info
        id = study.id
        abstract = study_info['study_abstract']
        description = study_info['study_description']
        publications = []
        for doi, pmid in study.publications:
            if doi is not None:
                publications.append(doi_linkifier([doi]))
            if pmid is not None:
                publications.append(pubmed_linkifier([pmid]))
        publications = ", ".join(publications)
        princ_inv = StudyPerson(study_info['principal_investigator_id'])
        pi_link = study_person_linkifier((princ_inv.email, princ_inv.name))
        number_samples_promised = study_info['number_samples_promised']
        number_samples_collected = study_info['number_samples_collected']
        metadata_complete = study_info['metadata_complete']

        data_types = sorted(viewitems(get_data_types()), key=itemgetter(1))

        # Retrieve the files from the uploads folder, so the user can choose
        # the sample template of the study. Filter them to only include the
        # ones that ends with 'txt' or 'tsv'.
        files = [f for _, f in get_files_from_uploads_folders(str(study.id))
                 if f.endswith(('txt', 'tsv'))]

        # If the sample template exists, retrieve all its filepaths
        if SampleTemplate.exists(study.id):
            sample_templates = SampleTemplate(study.id).get_filepaths()
        else:
            # If the sample template does not exist, just pass an empty list
            sample_templates = []

        # Check if the request came from a local source
        is_local_request = is_localhost(self.request.headers['host'])

        # The user can choose the sample template only if the study is
        # sandboxed or the current user is an admin
        show_select_sample = (
            study.status == 'sandbox' or self.current_user.level == 'admin')

        # Ebi information
        ebi_status = study.ebi_submission_status
        ebi_accession = study.ebi_study_accession
        if ebi_accession:
            ebi_accession = (EBI_LINKIFIER.format(ebi_accession))

        return self.render_string(
            "study_description_templates/study_information_tab.html",
            abstract=abstract,
            description=description,
            id=id,
            publications=publications,
            principal_investigator=pi_link,
            number_samples_promised=number_samples_promised,
            number_samples_collected=number_samples_collected,
            metadata_complete=metadata_complete,
            show_select_sample=show_select_sample,
            files=files,
            study_id=study.id,
            sample_templates=sample_templates,
            is_local_request=is_local_request,
            data_types=data_types,
            ebi_status=ebi_status,
            ebi_accession=ebi_accession)
Example #45
0
    def generate_new_study_with_preprocessed_data(self):
        """Creates a new study up to the processed data for testing"""
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 3,
            "number_samples_promised": 3,
            "study_alias": "Test EBI",
            "study_description": "Study for testing EBI",
            "study_abstract": "Study for testing EBI",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        study = Study.create(User('*****@*****.**'), "Test EBI study", info)
        metadata_dict = {
            'Sample1': {
                'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0),
                'physical_specimen_location': 'location1',
                'taxon_id': 9606,
                'scientific_name': 'h**o sapiens',
                'Description': 'Test Sample 1'
            },
            'Sample2': {
                'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0),
                'physical_specimen_location': 'location1',
                'taxon_id': 9606,
                'scientific_name': 'h**o sapiens',
                'Description': 'Test Sample 2'
            },
            'Sample3': {
                'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0),
                'physical_specimen_location': 'location1',
                'taxon_id': 9606,
                'scientific_name': 'h**o sapiens',
                'Description': 'Test Sample 3'
            }
        }
        metadata = pd.DataFrame.from_dict(metadata_dict,
                                          orient='index',
                                          dtype=str)
        SampleTemplate.create(metadata, study)
        metadata_dict = {
            'Sample1': {
                'primer': 'GTGCCAGCMGCCGCGGTAA',
                'barcode': 'CGTAGAGCTCTC',
                'center_name': 'KnightLab',
                'platform': 'ILLUMINA',
                'instrument_model': 'Illumina MiSeq',
                'library_construction_protocol': 'Protocol ABC',
                'experiment_design_description': "Random value 1"
            },
            'Sample2': {
                'primer': 'GTGCCAGCMGCCGCGGTAA',
                'barcode': 'CGTAGAGCTCTA',
                'center_name': 'KnightLab',
                'platform': 'ILLUMINA',
                'instrument_model': 'Illumina MiSeq',
                'library_construction_protocol': 'Protocol ABC',
                'experiment_design_description': "Random value 2"
            },
            'Sample3': {
                'primer': 'GTGCCAGCMGCCGCGGTAA',
                'barcode': 'CGTAGAGCTCTT',
                'center_name': 'KnightLab',
                'platform': 'ILLUMINA',
                'instrument_model': 'Illumina MiSeq',
                'library_construction_protocol': 'Protocol ABC',
                'experiment_design_description': "Random value 3"
            },
        }
        metadata = pd.DataFrame.from_dict(metadata_dict,
                                          orient='index',
                                          dtype=str)
        pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics')
        fna_fp = join(self.temp_dir, 'seqs.fna')
        demux_fp = join(self.temp_dir, 'demux.seqs')
        with open(fna_fp, 'w') as f:
            f.write(FASTA_EXAMPLE_2.format(study.id))
        with File(demux_fp, 'w') as f:
            to_hdf5(fna_fp, f)

        ppd = Artifact.create([(demux_fp, 6)],
                              "Demultiplexed",
                              prep_template=pt)

        return ppd
Example #46
0
    def generate_new_study_with_preprocessed_data(self):
        """Creates a new study up to the processed data for testing"""
        # ignoring warnings generated when adding templates
        simplefilter("ignore")
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 3,
            "number_samples_promised": 3,
            "study_alias": "Test EBI",
            "study_description": "Study for testing EBI",
            "study_abstract": "Study for testing EBI",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        study = Study.create(User('*****@*****.**'), "Test EBI study", [1], info)
        metadata_dict = {
            'Sample1': {'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0),
                        'physical_specimen_location': 'location1',
                        'taxon_id': 9606,
                        'scientific_name': 'h**o sapiens',
                        'Description': 'Test Sample 1'},
            'Sample2': {'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0),
                        'physical_specimen_location': 'location1',
                        'taxon_id': 9606,
                        'scientific_name': 'h**o sapiens',
                        'Description': 'Test Sample 2'},
            'Sample3': {'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0),
                        'physical_specimen_location': 'location1',
                        'taxon_id': 9606,
                        'scientific_name': 'h**o sapiens',
                        'Description': 'Test Sample 3'}
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index',
                                          dtype=str)
        SampleTemplate.create(metadata, study)
        metadata_dict = {
            'Sample1': {'primer': 'GTGCCAGCMGCCGCGGTAA',
                        'barcode': 'CGTAGAGCTCTC',
                        'center_name': 'KnightLab',
                        'platform': 'ILLUMINA',
                        'instrument_model': 'Illumina MiSeq',
                        'library_construction_protocol': 'Protocol ABC',
                        'experiment_design_description': "Random value 1"},
            'Sample2': {'primer': 'GTGCCAGCMGCCGCGGTAA',
                        'barcode': 'CGTAGAGCTCTA',
                        'center_name': 'KnightLab',
                        'platform': 'ILLUMINA',
                        'instrument_model': 'Illumina MiSeq',
                        'library_construction_protocol': 'Protocol ABC',
                        'experiment_design_description': "Random value 2"},
            'Sample3': {'primer': 'GTGCCAGCMGCCGCGGTAA',
                        'barcode': 'CGTAGAGCTCTT',
                        'center_name': 'KnightLab',
                        'platform': 'ILLUMINA',
                        'instrument_model': 'Illumina MiSeq',
                        'library_construction_protocol': 'Protocol ABC',
                        'experiment_design_description': "Random value 3"},
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index',
                                          dtype=str)
        pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics')
        fna_fp = join(self.temp_dir, 'seqs.fna')
        demux_fp = join(self.temp_dir, 'demux.seqs')
        with open(fna_fp, 'w') as f:
            f.write(FASTA_EXAMPLE_2.format(study.id))
        with File(demux_fp, 'w') as f:
            to_hdf5(fna_fp, f)

        ppd = Artifact.create(
            [(demux_fp, 6)], "Demultiplexed", prep_template=pt)

        return ppd
Example #47
0
    def generate_new_study_with_preprocessed_data(self):
        """Creates a new study up to the processed data for testing"""
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 3,
            "number_samples_promised": 3,
            "study_alias": "Test EBI",
            "study_description": "Study for testing EBI",
            "study_abstract": "Study for testing EBI",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1),
        }
        study = Study.create(User("*****@*****.**"), "Test EBI study", [1], info)
        metadata_dict = {
            "Sample1": {
                "collection_timestamp": datetime(2015, 6, 1, 7, 0, 0),
                "physical_specimen_location": "location1",
                "taxon_id": 9606,
                "scientific_name": "h**o sapiens",
                "Description": "Test Sample 1",
            },
            "Sample2": {
                "collection_timestamp": datetime(2015, 6, 2, 7, 0, 0),
                "physical_specimen_location": "location1",
                "taxon_id": 9606,
                "scientific_name": "h**o sapiens",
                "Description": "Test Sample 2",
            },
            "Sample3": {
                "collection_timestamp": datetime(2015, 6, 3, 7, 0, 0),
                "physical_specimen_location": "location1",
                "taxon_id": 9606,
                "scientific_name": "h**o sapiens",
                "Description": "Test Sample 3",
            },
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str)
        SampleTemplate.create(metadata, study)
        metadata_dict = {
            "Sample1": {
                "primer": "GTGCCAGCMGCCGCGGTAA",
                "barcode": "CGTAGAGCTCTC",
                "center_name": "KnightLab",
                "platform": "ILLUMINA",
                "instrument_model": "Illumina MiSeq",
                "library_construction_protocol": "Protocol ABC",
                "experiment_design_description": "Random value 1",
            },
            "Sample2": {
                "primer": "GTGCCAGCMGCCGCGGTAA",
                "barcode": "CGTAGAGCTCTA",
                "center_name": "KnightLab",
                "platform": "ILLUMINA",
                "instrument_model": "Illumina MiSeq",
                "library_construction_protocol": "Protocol ABC",
                "experiment_design_description": "Random value 2",
            },
            "Sample3": {
                "primer": "GTGCCAGCMGCCGCGGTAA",
                "barcode": "CGTAGAGCTCTT",
                "center_name": "KnightLab",
                "platform": "ILLUMINA",
                "instrument_model": "Illumina MiSeq",
                "library_construction_protocol": "Protocol ABC",
                "experiment_design_description": "Random value 3",
            },
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str)
        pt = PrepTemplate.create(metadata, study, "16S", "Metagenomics")
        fna_fp = join(self.temp_dir, "seqs.fna")
        demux_fp = join(self.temp_dir, "demux.seqs")
        with open(fna_fp, "w") as f:
            f.write(FASTA_EXAMPLE_2.format(study.id))
        with File(demux_fp, "w") as f:
            to_hdf5(fna_fp, f)

        ppd = Artifact.create([(demux_fp, 6)], "Demultiplexed", prep_template=pt)

        return ppd
Example #48
0
    def test_sample_template_handler_patch_request(self):
        user = User('*****@*****.**')

        # Test user doesn't have access
        with self.assertRaisesRegexp(HTTPError,
                                     'User does not have access to study'):
            sample_template_handler_patch_request(
                User('*****@*****.**'), "remove",
                "/1/columns/season_environment/")

        # Test study doesn't exist
        with self.assertRaisesRegexp(HTTPError, 'Study does not exist'):
            sample_template_handler_patch_request(
                user, "remove", "/10000/columns/season_environment/")

        # Test sample template doesn't exist
        new_study = self._create_study('Patching test')
        with self.assertRaisesRegexp(HTTPError,
                                     "Study %s doesn't have sample information"
                                     % new_study.id):
            sample_template_handler_patch_request(
                user, "remove", "/%s/columns/season_environment/"
                                % new_study.id)

        # Test wrong operation value
        with self.assertRaisesRegexp(
                HTTPError, 'Operation add not supported. Current supported '
                           'operations: remove.'):
            sample_template_handler_patch_request(
                user, 'add', '/1/columns/season_environment')

        # Test wrong path parameter < 2
        with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'):
            sample_template_handler_patch_request(user, 'ignored', '1')

        # TESTS FOR OPERATION: remove
        # Test wrong path parameter
        with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'):
            sample_template_handler_patch_request(
                user, 'remove', '/1/season_environment/')

        # Add sample information to the new study so we can delete one column
        # without affecting the other tests
        md = pd.DataFrame.from_dict(
            {'Sample1': {'col1': 'val1', 'col2': 'val2'}},
            orient='index', dtype=str)
        st = SampleTemplate.create(md, new_study)

        # Test success
        obs = sample_template_handler_patch_request(
            user, "remove", "/%s/columns/col2/"
                            % new_study.id)
        self.assertEqual(obs.keys(), ['job'])
        job_info = r_client.get('sample_template_%s' % new_study.id)
        self.assertIsNotNone(job_info)

        # Wait until the job is done
        wait_for_processing_job(loads(job_info)['job_id'])
        self.assertNotIn('col2', st.categories())

        # TESTS FOR OPERATION: replace
        # Test incorrect path parameter with replace
        with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'):
            sample_template_handler_patch_request(user, "replace", "/1/")

        # Test attribute not found
        with self.assertRaisesRegexp(HTTPError, 'Attribute name not found'):
            sample_template_handler_patch_request(user, "replace", "/1/name")

        # Test missing value
        with self.assertRaisesRegexp(HTTPError,
                                     'Value is required when updating sample '
                                     'information'):
            sample_template_handler_patch_request(user, "replace", "/1/data")

        # Test file doesn't exist
        with self.assertRaisesRegexp(HTTPError, 'Filepath not found'):
            sample_template_handler_patch_request(user, "replace", "/1/data",
                                                  req_value='DoesNotExist')

        # Test success
        obs = sample_template_handler_patch_request(
            user, "replace", "/1/data", req_value='uploaded_file.txt')
        self.assertEqual(obs.keys(), ['job'])
        job_info = r_client.get('sample_template_1')
        self.assertIsNotNone(job_info)

        # Wait until the job is done
        wait_for_processing_job(loads(job_info)['job_id'])
Example #49
0
def sample_template_summary_get_req(samp_id, user_id):
    """Returns a summary of the sample template metadata columns

    Parameters
    ----------
    samp_id : int
        SampleTemplate id to get info for
    user_id : str
        User requesting the sample template info

    Returns
    -------
    dict
        Returns summary information in the form
        {'status': str,
         'message': str,
         'info': dict of {str: object}
        status can be success, warning, or error depending on result
        message has the warnings or errors
        info dictionary contains the keys as the metadata categories
        and the values are list of tuples. Each tuple is an observed value in
        the category and the number of times its seen.
        Format {num_samples: value,
                category: [(val1, count1), (val2, count2), ...], ...}
    """
    access_error = check_access(samp_id, user_id)
    if access_error:
        return access_error

    processing, alert_type, alert_msg = get_sample_template_processing_status(
        samp_id)

    exists = _check_sample_template_exists(int(samp_id))
    if exists['status'] != 'success':
        return {'status': 'success',
                'message': '',
                'num_samples': 0,
                'num_columns': 0,
                'editable': not processing,
                'alert_type': alert_type,
                'alert_message': alert_msg,
                'stats': {}}

    template = SampleTemplate(int(samp_id))

    df = template.to_dataframe()

    editable = (Study(template.study_id).can_edit(User(user_id)) and not
                processing)

    out = {'status': 'success',
           'message': '',
           'num_samples': df.shape[0],
           'num_columns': df.shape[1],
           'editable': editable,
           'alert_type': alert_type,
           'alert_message': alert_msg,
           'stats': {}}

    # drop the samp_id column if it exists
    if 'study_id' in df.columns:
        df.drop('study_id', axis=1, inplace=True)
    for column in df.columns:
        counts = df[column].value_counts()
        out['stats'][str(column)] = [(str(key), counts[key])
                                     for key in natsorted(counts.index)]

    return out
Example #50
0
    def test_delete_sample_or_column(self):
        st = SampleTemplate(1)

        # Delete a sample template column
        job = self._create_job('delete_sample_or_column',
                               {'obj_class': 'SampleTemplate', 'obj_id': 1,
                                'sample_or_col': 'columns',
                                'name': 'season_environment'})
        private_task(job.id)
        self.assertEqual(job.status, 'success')
        self.assertNotIn('season_environment', st.categories())

        # Delete a sample template sample - need to add one
        # sample that we will remove
        npt.assert_warns(
            QiitaDBWarning, st.extend,
            pd.DataFrame.from_dict({'Sample1': {'taxon_id': '9606'}},
                                   orient='index', dtype=str))
        self.assertIn('1.Sample1', st.keys())
        job = self._create_job('delete_sample_or_column',
                               {'obj_class': 'SampleTemplate', 'obj_id': 1,
                                'sample_or_col': 'samples',
                                'name': '1.Sample1'})
        private_task(job.id)
        self.assertEqual(job.status, 'success')
        self.assertNotIn('1.Sample1', st.keys())

        # Delete a prep template column
        pt = PrepTemplate(1)
        job = self._create_job('delete_sample_or_column',
                               {'obj_class': 'PrepTemplate', 'obj_id': 1,
                                'sample_or_col': 'columns',
                                'name': 'target_subfragment'})
        private_task(job.id)
        self.assertEqual(job.status, 'success')
        self.assertNotIn('target_subfragment', pt.categories())

        # Delete a prep template sample
        metadata = pd.DataFrame.from_dict(
            {'1.SKB8.640193': {'barcode': 'GTCCGCAAGTTA',
                               'primer': 'GTGCCAGCMGCCGCGGTAA'},
             '1.SKD8.640184': {'barcode': 'CGTAGAGCTCTC',
                               'primer': 'GTGCCAGCMGCCGCGGTAA'}},
            orient='index', dtype=str)
        pt = npt.assert_warns(QiitaDBWarning, PrepTemplate.create, metadata,
                              Study(1), "16S")
        job = self._create_job('delete_sample_or_column',
                               {'obj_class': 'PrepTemplate', 'obj_id': pt.id,
                                'sample_or_col': 'samples',
                                'name': '1.SKD8.640184'})
        private_task(job.id)
        self.assertNotIn('1.SKD8.640184', pt.keys())

        # Test exceptions
        job = self._create_job('delete_sample_or_column',
                               {'obj_class': 'UnknownClass', 'obj_id': 1,
                                'sample_or_col': 'columns', 'name': 'column'})
        private_task(job.id)
        self.assertEqual(job.status, 'error')
        self.assertIn('Unknown value "UnknownClass". Choose between '
                      '"SampleTemplate" and "PrepTemplate"', job.log.msg)

        job = self._create_job('delete_sample_or_column',
                               {'obj_class': 'SampleTemplate', 'obj_id': 1,
                                'sample_or_col': 'unknown', 'name': 'column'})
        private_task(job.id)
        self.assertEqual(job.status, 'error')
        self.assertIn('Unknown value "unknown". Choose between "samples" '
                      'and "columns"', job.log.msg)
Example #51
0
        # a few notes: just getting the preps with duplicated values; ignoring
        # column 'sample_id' and tables 'study_sample', 'prep_template',
        # 'prep_template_sample'
        sql = """SELECT table_name, array_agg(column_name::text)
                    FROM information_schema.columns
                    WHERE column_name IN %s
                        AND table_name LIKE 'sample_%%'
                        AND table_name NOT IN (
                            'prep_template', 'prep_template_sample')
                    GROUP BY table_name"""
        # note that we are looking for those columns with duplicated names in
        # the headers
        TRN.add(sql, [tuple(set(cols_sample))])
        for table, columns in viewitems(dict(TRN.execute_fetchindex())):
            # [1] the format is table_# so taking the #
            st = SampleTemplate(int(table.split('_')[1]))
            # getting just the columns of interest
            st_df = st.to_dataframe()[columns]
            # converting to datetime
            for col in columns:
                st_df[col] = st_df[col].apply(transform_date)
            st.update(st_df)

if cols_prep:
    with TRN:
        # a few notes: just getting the preps with duplicated values; ignoring
        # column 'sample_id' and tables 'study_sample', 'prep_template',
        # 'prep_template_sample'
        sql = """SELECT table_name, array_agg(column_name::text)
                    FROM information_schema.columns
                    WHERE column_name IN %s
Example #52
0
def sample_template_summary_get_req(samp_id, user_id):
    """Returns a summary of the sample template metadata columns

    Parameters
    ----------
    samp_id : int
        SampleTemplate id to get info for
    user_id : str
        User requesting the sample template info

    Returns
    -------
    dict
        Returns summary information in the form
        {'status': str,
         'message': str,
         'info': dict of {str: object}
        status can be success, warning, or error depending on result
        message has the warnings or errors
        info dictionary contains the keys as the metadata categories
        and the values are list of tuples. Each tuple is an observed value in
        the category and the number of times its seen.
        Format {num_samples: value,
                category: [(val1, count1), (val2, count2), ...], ...}
    """
    access_error = check_access(samp_id, user_id)
    if access_error:
        return access_error

    job_info = r_client.get(SAMPLE_TEMPLATE_KEY_FORMAT % samp_id)
    if job_info:
        job_info = loads(job_info)
        job_id = job_info['job_id']
        if job_id:
            redis_info = loads(r_client.get(job_id))
            processing = redis_info['status_msg'] == 'Running'
            if processing:
                alert_type = 'info'
                alert_msg = 'This sample template is currently being processed'
            elif redis_info['status_msg'] == 'Success':
                alert_type = redis_info['return']['status']
                alert_msg = redis_info['return']['message'].replace('\n',
                                                                    '</br>')
                payload = {'job_id': None,
                           'status': alert_type,
                           'message': alert_msg}
                r_client.set(SAMPLE_TEMPLATE_KEY_FORMAT % samp_id,
                             dumps(payload))
            else:
                alert_type = redis_info['return']['status']
                alert_msg = redis_info['return']['message'].replace('\n',
                                                                    '</br>')
        else:
            processing = False
            alert_type = job_info['status']
            alert_msg = job_info['message'].replace('\n', '</br>')
    else:
        processing = False
        alert_type = ''
        alert_msg = ''

    exists = _check_sample_template_exists(int(samp_id))
    if exists['status'] != 'success':
        return {'status': 'success',
                'message': '',
                'num_samples': 0,
                'num_columns': 0,
                'editable': not processing,
                'alert_type': alert_type,
                'alert_message': alert_msg,
                'stats': {}}

    template = SampleTemplate(int(samp_id))

    df = template.to_dataframe()

    editable = (Study(template.study_id).can_edit(User(user_id)) and not
                processing)

    out = {'status': 'success',
           'message': '',
           'num_samples': df.shape[0],
           'num_columns': df.shape[1],
           'editable': editable,
           'alert_type': alert_type,
           'alert_message': alert_msg,
           'stats': {}}

    # drop the samp_id column if it exists
    if 'study_id' in df.columns:
        df.drop('study_id', axis=1, inplace=True)
    for column in df.columns:
        counts = df[column].value_counts()
        out['stats'][str(column)] = [(str(key), counts[key])
                                     for key in natsorted(counts.index)]

    return out
Example #53
0
File: 51.py Project: mestaki/qiita
        # a few notes: just getting the preps with duplicated values; ignoring
        # column 'sample_id' and tables 'study_sample', 'prep_template',
        # 'prep_template_sample'
        sql = """SELECT table_name, array_agg(column_name::text)
                    FROM information_schema.columns
                    WHERE column_name IN %s
                        AND table_name LIKE 'sample_%%'
                        AND table_name NOT IN (
                            'prep_template', 'prep_template_sample')
                    GROUP BY table_name"""
        # note that we are looking for those columns with duplicated names in
        # the headers
        TRN.add(sql, [tuple(set(cols_sample))])
        for table, columns in dict(TRN.execute_fetchindex()).items():
            # [1] the format is table_# so taking the #
            st = SampleTemplate(int(table.split('_')[1]))
            # getting just the columns of interest
            st_df = st.to_dataframe()[columns]
            # converting to datetime
            for col in columns:
                st_df[col] = st_df[col].apply(transform_date)
            st.update(st_df)

if cols_prep:
    with TRN:
        # a few notes: just getting the preps with duplicated values; ignoring
        # column 'sample_id' and tables 'study_sample', 'prep_template',
        # 'prep_template_sample'
        sql = """SELECT table_name, array_agg(column_name::text)
                    FROM information_schema.columns
                    WHERE column_name IN %s
Example #54
0
    def test_delete_sample_or_column(self):
        st = SampleTemplate(1)

        # Delete a sample template column
        obs = delete_sample_or_column(SampleTemplate, 1, "columns",
                                      "season_environment")
        exp = {'status': "success", 'message': ""}
        self.assertEqual(obs, exp)
        self.assertNotIn('season_environment', st.categories())

        # Delete a sample template sample - need to add one sample that we
        # will remove
        npt.assert_warns(
            QiitaDBWarning, st.extend,
            pd.DataFrame.from_dict({'Sample1': {
                'taxon_id': '9606'
            }},
                                   orient='index',
                                   dtype=str))
        self.assertIn('1.Sample1', st.keys())
        obs = delete_sample_or_column(SampleTemplate, 1, "samples",
                                      "1.Sample1")
        exp = {'status': "success", 'message': ""}
        self.assertEqual(obs, exp)
        self.assertNotIn('1.Sample1', st.keys())

        # Delete a prep template column
        pt = PrepTemplate(2)

        obs = delete_sample_or_column(PrepTemplate, 2, "columns",
                                      "target_subfragment")
        exp = {'status': "success", 'message': ""}
        self.assertEqual(obs, exp)
        self.assertNotIn('target_subfragment', pt.categories())

        # Delte a prep template sample
        metadata = pd.DataFrame.from_dict(
            {
                '1.SKB8.640193': {
                    'barcode': 'GTCCGCAAGTTA',
                    'primer': 'GTGCCAGCMGCCGCGGTAA'
                },
                '1.SKD8.640184': {
                    'barcode': 'CGTAGAGCTCTC',
                    'primer': 'GTGCCAGCMGCCGCGGTAA'
                }
            },
            orient='index',
            dtype=str)
        pt = npt.assert_warns(QiitaDBWarning, PrepTemplate.create, metadata,
                              Study(1), "16S")
        obs = delete_sample_or_column(PrepTemplate, pt.id, "samples",
                                      '1.SKD8.640184')
        exp = {'status': "success", 'message': ""}
        self.assertEqual(obs, exp)
        self.assertNotIn('1.SKD8.640184', pt.categories())

        # Exception
        obs = delete_sample_or_column(PrepTemplate, 2, "samples",
                                      "1.SKM9.640192")
        exp = {
            'status':
            "danger",
            'message':
            "Prep info file '2' has files attached, you cannot "
            "delete samples."
        }
        self.assertEqual(obs, exp)

        # No "samples" or "columns"
        obs = delete_sample_or_column(PrepTemplate, 2, "not_samples", "NOP")
        exp = {
            'status':
            'danger',
            'message':
            'Unknown value "not_samples". Choose between '
            '"samples" and "columns"'
        }
        self.assertEqual(obs, exp)
Example #55
0
def create_templates_from_qiime_mapping_file(fp, study, data_type):
    """Creates a sample template and a prep template from qiime mapping file

    Parameters
    ----------
    fp : str or file-like object
        Path to the QIIME mapping file
    study : Study
        The study to which the sample template belongs to
    data_type : str or int
        The data_type of the prep_template

    Returns
    -------
    (SampleTemplate, PrepTemplate)
        The templates created from the QIIME mapping file
    """
    qiime_map = load_template_to_dataframe(fp, index='#SampleID')

    # There are a few columns in the QIIME mapping file that are special and
    # we know how to deal with them
    rename_cols = {
        'BarcodeSequence': 'barcode',
        'LinkerPrimerSequence': 'primer',
        'Description': 'description',
    }

    if 'ReverseLinkerPrimer' in qiime_map:
        rename_cols['ReverseLinkerPrimer'] = 'reverselinkerprimer'

    missing = set(rename_cols).difference(qiime_map.columns)
    if missing:
        raise QiitaWareError(
            "Error generating the templates from the QIIME mapping file. "
            "Missing QIIME mapping file columns: %s" % ', '.join(missing))

    qiime_map.rename(columns=rename_cols, inplace=True)

    # Fix the casing in the columns that we control
    qiime_map.columns = [c.lower() if c.lower() in CONTROLLED_COLS else c
                         for c in qiime_map.columns]

    # Figure out which columns belong to the prep template
    def _col_iterator(restriction_set):
        for restriction in viewvalues(restriction_set):
            for cols in viewkeys(restriction.columns):
                yield cols

    pt_cols = set(col for col in _col_iterator(PREP_TEMPLATE_COLUMNS))

    data_type_str = (convert_from_id(data_type, "data_type")
                     if isinstance(data_type, int) else data_type)

    if data_type_str in TARGET_GENE_DATA_TYPES:
        pt_cols.update(
            col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE))
        pt_cols.add('reverselinkerprimer')

    qiime_cols = set(qiime_map.columns)
    pt_cols = qiime_cols.intersection(pt_cols)
    st_cols = qiime_cols.difference(pt_cols)

    st_md = qiime_map.ix[:, st_cols]
    pt_md = qiime_map.ix[:, pt_cols]

    return (SampleTemplate.create(st_md, study),
            PrepTemplate.create(pt_md, study, data_type))
    def process_sample_template(self, study, user, callback):
        """Process a sample template from the POST method

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done

        Raises
        ------
        HTTPError
            If the sample template file does not exists
        """
        # If we are on this function, the arguments "sample_template" and
        # "data_type" must be defined. If not, let tornado raise its error
        sample_template = self.get_argument('sample_template')
        data_type = self.get_argument('data_type')

        # Get the uploads folder
        _, base_fp = get_mountpoint("uploads")[0]
        # Get the path of the sample template in the uploads folder
        fp_rsp = join(base_fp, str(study.id), sample_template)

        if not exists(fp_rsp):
            # The file does not exist, fail nicely
            raise HTTPError(404, "This file doesn't exist: %s" % fp_rsp)

        # Define here the message and message level in case of success
        msg = "The sample template '%s' has been added" % sample_template
        msg_level = "success"
        is_mapping_file = looks_like_qiime_mapping_file(fp_rsp)

        try:
            if is_mapping_file and not data_type:
                raise ValueError("Please, choose a data type if uploading a "
                                 "QIIME mapping file")

            with warnings.catch_warnings(record=True) as warns:
                if is_mapping_file:
                    create_templates_from_qiime_mapping_file(fp_rsp, study,
                                                             int(data_type))
                else:
                    SampleTemplate.create(load_template_to_dataframe(fp_rsp),
                                          study)
                remove(fp_rsp)

                # join all the warning messages into one. Note that this
                # info will be ignored if an exception is raised
                if warns:
                    msg = '; '.join([convert_text_html(str(w.message))
                                     for w in warns])
                    msg_level = 'warning'

        except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                QiitaDBDuplicateError, IOError, ValueError, KeyError,
                CParserError, QiitaDBDuplicateHeaderError,
                QiitaDBError, QiitaWareError) as e:
            # Some error occurred while processing the sample template
            # Show the error to the user so they can fix the template
            error_msg = ('parsing the QIIME mapping file'
                         if is_mapping_file
                         else 'parsing the sample template')
            msg = html_error_message % (error_msg, basename(fp_rsp),
                                        str(e))
            msg = convert_text_html(msg)
            msg_level = "danger"

        callback((msg, msg_level, None, None, None))
Example #57
0
def sample_template_summary_get_req(samp_id, user_id):
    """Returns a summary of the sample template metadata columns

    Parameters
    ----------
    samp_id : int
        SampleTemplate id to get info for
    user_id : str
        User requesting the sample template info

    Returns
    -------
    dict
        Returns summary information in the form
        {'status': str,
         'message': str,
         'info': dict of {str: object}
        status can be success, warning, or error depending on result
        message has the warnings or errors
        info dictionary contains the keys as the metadata categories
        and the values are list of tuples. Each tuple is an observed value in
        the category and the number of times its seen.
        Format {num_samples: value,
                category: [(val1, count1), (val2, count2), ...], ...}
    """
    access_error = check_access(samp_id, user_id)
    if access_error:
        return access_error

    processing, alert_type, alert_msg = get_sample_template_processing_status(
        samp_id)

    exists = _check_sample_template_exists(int(samp_id))
    if exists['status'] != 'success':
        return {
            'status': 'success',
            'message': '',
            'num_samples': 0,
            'num_columns': 0,
            'editable': not processing,
            'alert_type': alert_type,
            'alert_message': alert_msg,
            'stats': {}
        }

    template = SampleTemplate(int(samp_id))

    df = template.to_dataframe()

    editable = (Study(template.study_id).can_edit(User(user_id))
                and not processing)

    out = {
        'status': 'success',
        'message': '',
        'num_samples': df.shape[0],
        'num_columns': df.shape[1],
        'editable': editable,
        'alert_type': alert_type,
        'alert_message': alert_msg,
        'stats': {}
    }

    # drop the samp_id column if it exists
    if 'study_id' in df.columns:
        df.drop('study_id', axis=1, inplace=True)
    for column in df.columns:
        counts = df[column].value_counts()
        out['stats'][str(column)] = [
            (str(key), counts[key])
            for key in natsorted(counts.index,
                                 key=lambda x: unicode(x, errors='ignore'))
        ]

    return out
Example #58
0
with TRN:
    # a few notes: just getting the preps with duplicated values; ignoring
    # column 'sample_id' and tables 'study_sample', 'prep_template',
    # 'prep_template_sample'
    sql = """SELECT table_name, array_agg(column_name::text)
                FROM information_schema.columns
                WHERE column_name IN %s
                    AND column_name != 'sample_id'
                    AND table_name LIKE 'prep_%%'
                    AND table_name NOT IN (
                        'prep_template', 'prep_template_sample')
                GROUP BY table_name"""
    # note that we are looking for those columns with duplicated names in
    # the headers
    headers = set(PrepTemplate.metadata_headers()) & \
        set(SampleTemplate.metadata_headers())

    if headers:
        TRN.add(sql, [tuple(headers)])
        overlapping = dict(TRN.execute_fetchindex())
    else:
        overlapping = None

if overlapping is not None:
    # finding actual duplicates
    for table_name, cols in viewitems(overlapping):
        # leaving print so when we patch in the main system we know that
        # nothing was renamed or deal with that
        print table_name
        with TRN:
            for c in cols:
Example #59
0
    def test_delete_sample_or_column(self):
        st = SampleTemplate(1)

        # Delete a sample template column
        obs = delete_sample_or_column(SampleTemplate, 1, "columns",
                                      "season_environment")
        exp = {'status': "success", 'message': ""}
        self.assertEqual(obs, exp)
        self.assertNotIn('season_environment', st.categories())

        # Delete a sample template sample - need to add one sample that we
        # will remove
        npt.assert_warns(
            QiitaDBWarning, st.extend,
            pd.DataFrame.from_dict({'Sample1': {'taxon_id': '9606'}},
                                   orient='index', dtype=str))
        self.assertIn('1.Sample1', st.keys())
        obs = delete_sample_or_column(SampleTemplate, 1, "samples",
                                      "1.Sample1")
        exp = {'status': "success", 'message': ""}
        self.assertEqual(obs, exp)
        self.assertNotIn('1.Sample1', st.keys())

        # Delete a prep template column
        pt = PrepTemplate(2)

        obs = delete_sample_or_column(PrepTemplate, 2, "columns",
                                      "target_subfragment")
        exp = {'status': "success", 'message': ""}
        self.assertEqual(obs, exp)
        self.assertNotIn('target_subfragment', pt.categories())

        # Delte a prep template sample
        metadata = pd.DataFrame.from_dict(
            {'1.SKB8.640193': {'barcode': 'GTCCGCAAGTTA',
                               'primer': 'GTGCCAGCMGCCGCGGTAA'},
             '1.SKD8.640184': {'barcode': 'CGTAGAGCTCTC',
                               'primer': 'GTGCCAGCMGCCGCGGTAA'}},
            orient='index', dtype=str)
        pt = npt.assert_warns(QiitaDBWarning, PrepTemplate.create, metadata,
                              Study(1), "16S")
        obs = delete_sample_or_column(PrepTemplate, pt.id, "samples",
                                      '1.SKD8.640184')
        exp = {'status': "success", 'message': ""}
        self.assertEqual(obs, exp)
        self.assertNotIn('1.SKD8.640184', pt.categories())

        # Exception
        obs = delete_sample_or_column(PrepTemplate, 2, "samples",
                                      "1.SKM9.640192")
        exp = {'status': "danger",
               'message': "Prep info file '2' has files attached, you cannot "
                          "delete samples."}
        self.assertEqual(obs, exp)

        # No "samples" or "columns"
        obs = delete_sample_or_column(PrepTemplate, 2, "not_samples", "NOP")
        exp = {'status': 'danger',
               'message': 'Unknown value "not_samples". Choose between '
                          '"samples" and "columns"'}
        self.assertEqual(obs, exp)
Example #60
0
def sample_template_patch_request(user_id,
                                  req_op,
                                  req_path,
                                  req_value=None,
                                  req_from=None):
    """Modifies an attribute of the artifact

    Parameters
    ----------
    user_id : str
        The id of the user performing the patch operation
    req_op : str
        The operation to perform on the artifact
    req_path : str
        The prep information and attribute to patch
    req_value : str, optional
        The value that needs to be modified
    req_from : str, optional
        The original path of the element

    Returns
    -------
    dict of {str, str}
        A dictionary with the following keys:
        - status: str, whether if the request is successful or not
        - message: str, if the request is unsuccessful, a human readable error
    """
    if req_op == 'remove':
        req_path = [v for v in req_path.split('/') if v]

        # format
        # column: study_id/row_id/columns/column_name
        # sample: study_id/row_id/samples/sample_id
        if len(req_path) != 4:
            return {'status': 'error', 'message': 'Incorrect path parameter'}

        st_id = req_path[0]
        row_id = req_path[1]
        attribute = req_path[2]
        attr_id = req_path[3]

        # Check if the user actually has access to the template
        st = SampleTemplate(st_id)
        access_error = check_access(st.study_id, user_id)
        if access_error:
            return access_error

        qiita_plugin = Software.from_name_and_version('Qiita', 'alpha')
        cmd = qiita_plugin.get_command('delete_sample_or_column')
        params = Parameters.load(cmd,
                                 values_dict={
                                     'obj_class': 'SampleTemplate',
                                     'obj_id': int(st_id),
                                     'sample_or_col': attribute,
                                     'name': attr_id
                                 })
        job = ProcessingJob.create(User(user_id), params)

        # Store the job id attaching it to the sample template id
        r_client.set(SAMPLE_TEMPLATE_KEY_FORMAT % st_id,
                     dumps({'job_id': job.id}))

        job.submit()

        return {'status': 'success', 'message': '', 'row_id': row_id}

    else:
        return {
            'status':
            'error',
            'message':
            'Operation "%s" not supported. '
            'Current supported operations: remove' % req_op,
            'row_id':
            0
        }