예제 #1
0
    def delete_sample_template(self, study, user, callback):
        """Delete sample template

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        sample_template_id = int(self.get_argument('sample_template_id'))

        try:
            SampleTemplate.delete(sample_template_id)
            msg = ("Sample template %d has been deleted from study: "
                   "<b><i>%s</i></b>" % (sample_template_id, study.title))
            msg_level = "success"
        except Exception as e:
            msg = "Couldn't remove %d sample template: %s" % (
                sample_template_id, str(e))
            msg_level = "danger"

        callback((msg, msg_level, 'study_information_tab', None, None))
예제 #2
0
파일: commands.py 프로젝트: jenwei/qiita
def submit_VAMPS(preprocessed_data_id):
    """Submit preprocessed data to VAMPS

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    status = preprocessed_data.submitted_to_vamps_status()
    if status in ('submitting', 'success'):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

        preprocessed_data.update_vamps_status('submitting')

    # Generating a tgz
    targz_folder = mkdtemp(prefix=qiita_config.working_dir)
    targz_fp = join(targz_folder, '%d_%d_%d.tgz' % (study.id,
                                                    prep_template.id,
                                                    preprocessed_data.id))
    targz = taropen(targz_fp, mode='w:gz')

    # adding sample/prep
    samp_fp = join(targz_folder, 'sample_metadata.txt')
    sample_template.to_file(samp_fp)
    targz.add(samp_fp, arcname='sample_metadata.txt')
    prep_fp = join(targz_folder, 'prep_metadata.txt')
    prep_template.to_file(prep_fp)
    targz.add(prep_fp, arcname='prep_metadata.txt')

    # adding preprocessed data
    for _, fp, fp_type in preprocessed_data.get_filepaths():
        if fp_type == 'preprocessed_fasta':
            targz.add(fp, arcname='preprocessed_fasta.fna')

    targz.close()

    # submitting
    cmd = ("curl -F user=%s -F pass='******' -F uploadFile=@%s -F "
           "press=UploadFile %s" % (qiita_config.vamps_user,
                                    qiita_config.vamps_pass,
                                    targz_fp,
                                    qiita_config.vamps_url))
    obs, _, _ = system_call(cmd)

    exp = ("<html>\n<head>\n<title>Process Uploaded File</title>\n</head>\n"
           "<body>\n</body>\n</html>")

    if obs != exp:
        preprocessed_data.update_vamps_status('failure')
        return False
    else:
        preprocessed_data.update_vamps_status('success')
        return True
예제 #3
0
    def update_sample_template(self, study, user, callback):
        """Update a sample template from the POST method

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done

        Raises
        ------
        HTTPError
            If the sample template file does not exists
        """
        # If we are on this function, the argument "sample_template" must
        # defined. If not, let tornado raise its error
        sample_template = self.get_argument('sample_template')

        # Define here the message and message level in case of success
        msg = "The sample template '%s' has been updated" % sample_template
        msg_level = "success"
        # Get the uploads folder
        _, base_fp = get_mountpoint("uploads")[0]
        # Get the path of the sample template in the uploads folder
        fp_rsp = join(base_fp, str(study.id), sample_template)

        if not exists(fp_rsp):
            # The file does not exist, fail nicely
            raise HTTPError(400, "This file doesn't exist: %s" % fp_rsp)
        try:
            with warnings.catch_warnings(record=True) as warns:
                # deleting previous uploads and inserting new one
                st = SampleTemplate(study.id)
                df = load_template_to_dataframe(fp_rsp)
                st.extend(df)
                st.update(df)
                remove(fp_rsp)

                # join all the warning messages into one. Note that this info
                # will be ignored if an exception is raised
                if warns:
                    msg = '\n'.join(set(str(w.message) for w in warns))
                    msg_level = 'warning'

        except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                QiitaDBDuplicateError, IOError, ValueError, KeyError,
                CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e:
            # Some error occurred while processing the sample template
            # Show the error to the user so they can fix the template
            msg = html_error_message % ('updating the sample template:',
                                        basename(fp_rsp), str(e))
            msg = convert_text_html(msg)
            msg_level = "danger"
        callback((msg, msg_level, None, None, None))
예제 #4
0
def submit_VAMPS(preprocessed_data_id):
    """Submit preprocessed data to VAMPS

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    status = preprocessed_data.submitted_to_vamps_status()
    if status in ('submitting', 'success'):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

        preprocessed_data.update_vamps_status('submitting')

    # Generating a tgz
    targz_folder = mkdtemp(prefix=qiita_config.working_dir)
    targz_fp = join(
        targz_folder,
        '%d_%d_%d.tgz' % (study.id, prep_template.id, preprocessed_data.id))
    targz = taropen(targz_fp, mode='w:gz')

    # adding sample/prep
    samp_fp = join(targz_folder, 'sample_metadata.txt')
    sample_template.to_file(samp_fp)
    targz.add(samp_fp, arcname='sample_metadata.txt')
    prep_fp = join(targz_folder, 'prep_metadata.txt')
    prep_template.to_file(prep_fp)
    targz.add(prep_fp, arcname='prep_metadata.txt')

    # adding preprocessed data
    for _, fp, fp_type in preprocessed_data.get_filepaths():
        if fp_type == 'preprocessed_fasta':
            targz.add(fp, arcname='preprocessed_fasta.fna')

    targz.close()

    # submitting
    cmd = ("curl -F user=%s -F pass='******' -F uploadFile=@%s -F "
           "press=UploadFile %s" %
           (qiita_config.vamps_user, qiita_config.vamps_pass, targz_fp,
            qiita_config.vamps_url))
    obs, _, _ = system_call(cmd)

    exp = ("<html>\n<head>\n<title>Process Uploaded File</title>\n</head>\n"
           "<body>\n</body>\n</html>")

    if obs != exp:
        preprocessed_data.update_vamps_status('failure')
        return False
    else:
        preprocessed_data.update_vamps_status('success')
        return True
예제 #5
0
    def update_sample_template(self, study, user, callback):
        """Update a sample template from the POST method

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done

        Raises
        ------
        HTTPError
            If the sample template file does not exists
        """
        # If we are on this function, the argument "sample_template" must
        # defined. If not, let tornado raise its error
        sample_template = self.get_argument('sample_template')

        # Define here the message and message level in case of success
        msg = "The sample template '%s' has been updated" % sample_template
        msg_level = "success"
        # Get the uploads folder
        _, base_fp = get_mountpoint("uploads")[0]
        # Get the path of the sample template in the uploads folder
        fp_rsp = join(base_fp, str(study.id), sample_template)

        if not exists(fp_rsp):
            # The file does not exist, fail nicely
            raise HTTPError(400, "This file doesn't exist: %s" % fp_rsp)
        try:
            with warnings.catch_warnings(record=True) as warns:
                # deleting previous uploads and inserting new one
                st = SampleTemplate(study.id)
                st.update(load_template_to_dataframe(fp_rsp))

                # join all the warning messages into one. Note that this info
                # will be ignored if an exception is raised
                if warns:
                    msg = '; '.join([str(w.message) for w in warns])
                    msg_level = 'warning'

        except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                QiitaDBDuplicateError, IOError, ValueError, KeyError,
                CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e:
            # Some error occurred while processing the sample template
            # Show the error to the user so they can fix the template
            msg = html_error_message % ('updating the sample template:',
                                        basename(fp_rsp), str(e))
            msg_level = "danger"
        callback((msg, msg_level, None, None, None))
예제 #6
0
    def remove_add_study_template(self, raw_data, study_id, fp_rsp):
        """Replace prep templates, raw data, and sample template with a new one
        """
        for rd in raw_data():
            rd = RawData(rd)
            for pt in rd.prep_templates:
                if PrepTemplate.exists(pt):
                    PrepTemplate.delete(pt)
        if SampleTemplate.exists(study_id):
            SampleTemplate.delete(study_id)

        SampleTemplate.create(load_template_to_dataframe(fp_rsp),
                              Study(study_id))
        remove(fp_rsp)
예제 #7
0
    def remove_add_study_template(self, raw_data, study_id, fp_rsp):
        """Replace prep templates, raw data, and sample template with a new one
        """
        for rd in raw_data():
            rd = RawData(rd)
            for pt in rd.prep_templates:
                if PrepTemplate.exists(pt):
                    PrepTemplate.delete(pt)
        if SampleTemplate.exists(study_id):
            SampleTemplate.delete(study_id)

        SampleTemplate.create(load_template_to_dataframe(fp_rsp),
                              Study(study_id))
        remove(fp_rsp)
예제 #8
0
    def render(self, study):
        study_info = study.info
        id = study.id
        abstract = study_info['study_abstract']
        description = study_info['study_description']
        pmids = ", ".join([pubmed_linkifier([pmid]) for pmid in study.pmids])
        princ_inv = StudyPerson(study_info['principal_investigator_id'])
        pi_link = study_person_linkifier((princ_inv.email, princ_inv.name))
        number_samples_promised = study_info['number_samples_promised']
        number_samples_collected = study_info['number_samples_collected']
        metadata_complete = study_info['metadata_complete']
        data_types = sorted(viewitems(get_data_types()), key=itemgetter(1))

        # Retrieve the files from the uploads folder, so the user can choose
        # the sample template of the study. Filter them to only include the
        # ones that ends with 'txt' or 'tsv'.
        files = [f for _, f in get_files_from_uploads_folders(str(study.id))
                 if f.endswith(('txt', 'tsv'))]

        # If the sample template exists, retrieve all its filepaths
        if SampleTemplate.exists(study.id):
            sample_templates = SampleTemplate(study.id).get_filepaths()
        else:
            # If the sample template does not exist, just pass an empty list
            sample_templates = []

        # Check if the request came from a local source
        is_local_request = is_localhost(self.request.headers['host'])

        # The user can choose the sample template only if the study is
        # sandboxed or the current user is an admin
        show_select_sample = (
            study.status == 'sandbox' or self.current_user.level == 'admin')

        # Ebi information
        ebi_status = study.ebi_submission_status
        ebi_accession = study.ebi_study_accession
        if ebi_accession:
            ebi_accession = (EBI_LINKIFIER.format(ebi_accession))

        return self.render_string(
            "study_description_templates/study_information_tab.html",
            abstract=abstract,
            description=description,
            id=id,
            pmids=pmids,
            principal_investigator=pi_link,
            number_samples_promised=number_samples_promised,
            number_samples_collected=number_samples_collected,
            metadata_complete=metadata_complete,
            show_select_sample=show_select_sample,
            files=files,
            study_id=study.id,
            sample_templates=sample_templates,
            is_local_request=is_local_request,
            data_types=data_types,
            ebi_status=ebi_status,
            ebi_accession=ebi_accession)
예제 #9
0
    def display_template(self, preprocessed_data_id, msg, msg_level):
        """Simple function to avoid duplication of code"""
        preprocessed_data_id = int(preprocessed_data_id)
        try:
            preprocessed_data = PreprocessedData(preprocessed_data_id)
        except QiitaDBUnknownIDError:
            raise HTTPError(404, "PreprocessedData %d does not exist!" % preprocessed_data_id)
        else:
            user = self.current_user
            if user.level != "admin":
                raise HTTPError(403, "No permissions of admin, " "get/VAMPSSubmitHandler: %s!" % user.id)

        prep_template = PrepTemplate(preprocessed_data.prep_template)
        sample_template = SampleTemplate(preprocessed_data.study)
        study = Study(preprocessed_data.study)
        stats = [
            ("Number of samples", len(prep_template)),
            ("Number of metadata headers", len(sample_template.categories())),
        ]

        demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == "preprocessed_demux"]
        demux_length = len(demux)

        if not demux_length:
            msg = "Study does not appear to have demultiplexed " "sequences associated"
            msg_level = "danger"
        elif demux_length > 1:
            msg = "Study appears to have multiple demultiplexed files!"
            msg_level = "danger"
        elif demux_length == 1:
            demux_file = demux[0]
            demux_file_stats = demux_stats(demux_file)
            stats.append(("Number of sequences", demux_file_stats.n))
            msg_level = "success"

        self.render(
            "vamps_submission.html",
            study_title=study.title,
            stats=stats,
            message=msg,
            study_id=study.id,
            level=msg_level,
            preprocessed_data_id=preprocessed_data_id,
        )
예제 #10
0
    def render(self, study):
        study_info = study.info
        abstract = study_info['study_abstract']
        description = study_info['study_description']
        pmids = ", ".join([pubmed_linkifier([pmid]) for pmid in study.pmids])
        princ_inv = StudyPerson(study_info['principal_investigator_id'])
        pi_link = study_person_linkifier((princ_inv.email, princ_inv.name))
        number_samples_promised = study_info['number_samples_promised']
        number_samples_collected = study_info['number_samples_collected']
        metadata_complete = study_info['metadata_complete']

        # Retrieve the files from the uploads folder, so the user can choose
        # the sample template of the study
        files = [f for _, f in get_files_from_uploads_folders(str(study.id))]

        # If the sample template exists, retrieve all its filepaths
        if SampleTemplate.exists(study.id):
            sample_templates = SampleTemplate(study.id).get_filepaths()
        else:
            # If the sample template does not exist, just pass an empty list
            sample_templates = []

        # Check if the request came from a local source
        is_local_request = self._is_local()

        # The user can choose the sample template only if the study is
        # sandboxed or the current user is an admin
        show_select_sample = (
            study.status == 'sandbox' or self.current_user.level == 'admin')

        return self.render_string(
            "study_description_templates/study_information_tab.html",
            abstract=abstract,
            description=description,
            pmids=pmids,
            principal_investigator=pi_link,
            number_samples_promised=number_samples_promised,
            number_samples_collected=number_samples_collected,
            metadata_complete=metadata_complete,
            show_select_sample=show_select_sample,
            files=files,
            study_id=study.id,
            sample_templates=sample_templates,
            is_local_request=is_local_request)
예제 #11
0
    def display_template(self, preprocessed_data_id, msg, msg_level):
        """Simple function to avoid duplication of code"""
        preprocessed_data_id = int(preprocessed_data_id)
        try:
            preprocessed_data = PreprocessedData(preprocessed_data_id)
        except QiitaDBUnknownIDError:
            raise HTTPError(404, "PreprocessedData %d does not exist!" %
                                 preprocessed_data_id)
        else:
            user = User(self.current_user)
            if user.level != 'admin':
                raise HTTPError(403, "No permissions of admin, "
                                     "get/EBISubmitHandler: %s!" % user.id)

        prep_template = PrepTemplate(preprocessed_data.prep_template)
        sample_template = SampleTemplate(preprocessed_data.study)
        study = Study(preprocessed_data.study)
        stats = [('Number of samples', len(prep_template)),
                 ('Number of metadata headers',
                  len(sample_template.metadata_headers()))]

        demux = [path for _, path, ftype in preprocessed_data.get_filepaths()
                 if ftype == 'preprocessed_demux']
        demux_length = len(demux)

        if not demux_length:
            msg = ("Study does not appear to have demultiplexed "
                   "sequences associated")
            msg_level = 'danger'
        elif demux_length > 1:
            msg = ("Study appears to have multiple demultiplexed files!")
            msg_level = 'danger'
        elif demux_length == 1:
            demux_file = demux[0]
            demux_file_stats = demux_stats(demux_file)
            stats.append(('Number of sequences', demux_file_stats.n))
            msg_level = 'success'

        self.render('ebi_submission.html', user=self.current_user,
                    study_title=study.title, stats=stats, message=msg,
                    study_id=study.id, level=msg_level,
                    preprocessed_data_id=preprocessed_data_id,
                    investigation_type=prep_template.investigation_type)
예제 #12
0
    def display_template(self, preprocessed_data_id, msg, msg_level):
        """Simple function to avoid duplication of code"""
        preprocessed_data_id = int(preprocessed_data_id)
        try:
            preprocessed_data = PreprocessedData(preprocessed_data_id)
        except QiitaDBUnknownIDError:
            raise HTTPError(404, "PreprocessedData %d does not exist!" %
                                 preprocessed_data_id)
        else:
            user = self.current_user
            if user.level != 'admin':
                raise HTTPError(403, "No permissions of admin, "
                                     "get/EBISubmitHandler: %s!" % user.id)

        prep_template = PrepTemplate(preprocessed_data.prep_template)
        sample_template = SampleTemplate(preprocessed_data.study)
        study = Study(preprocessed_data.study)
        stats = [('Number of samples', len(prep_template)),
                 ('Number of metadata headers',
                  len(sample_template.metadata_headers()))]

        demux = [path for _, path, ftype in preprocessed_data.get_filepaths()
                 if ftype == 'preprocessed_demux']
        demux_length = len(demux)

        if not demux_length:
            msg = ("Study does not appear to have demultiplexed "
                   "sequences associated")
            msg_level = 'danger'
        elif demux_length > 1:
            msg = ("Study appears to have multiple demultiplexed files!")
            msg_level = 'danger'
        elif demux_length == 1:
            demux_file = demux[0]
            demux_file_stats = demux_stats(demux_file)
            stats.append(('Number of sequences', demux_file_stats.n))
            msg_level = 'success'

        self.render('ebi_submission.html',
                    study_title=study.title, stats=stats, message=msg,
                    study_id=study.id, level=msg_level,
                    preprocessed_data_id=preprocessed_data_id,
                    investigation_type=prep_template.investigation_type)
예제 #13
0
 def test_to_file(self):
     """to file writes a tab delimited file with all the metadata"""
     fd, fp = mkstemp()
     close(fd)
     st = SampleTemplate.create(self.metadata, self.new_study)
     st.to_file(fp)
     self._clean_up_files.append(fp)
     with open(fp, 'U') as f:
         obs = f.read()
     self.assertEqual(obs, EXP_SAMPLE_TEMPLATE)
예제 #14
0
 def get(self, message="", msg_level=None):
     all_emails_except_current = yield Task(self._get_all_emails)
     all_emails_except_current.remove(self.current_user.id)
     avail_meta = SampleTemplate.metadata_headers() +\
         get_table_cols("study")
     self.render('list_studies.html',
                 availmeta=avail_meta,
                 all_emails_except_current=all_emails_except_current,
                 message=message,
                 msg_level=msg_level)
예제 #15
0
 def test_to_file(self):
     """to file writes a tab delimited file with all the metadata"""
     fd, fp = mkstemp()
     close(fd)
     st = SampleTemplate.create(self.metadata, self.new_study)
     st.to_file(fp)
     self._clean_up_files.append(fp)
     with open(fp, 'U') as f:
         obs = f.read()
     self.assertEqual(obs, EXP_SAMPLE_TEMPLATE)
예제 #16
0
    def display_template(self, study, user, msg, msg_level, full_access, top_tab=None, sub_tab=None, prep_tab=None):
        """Simple function to avoid duplication of code"""
        study_status = study.status
        user_level = user.level
        sample_template_exists = SampleTemplate.exists(study.id)

        if sample_template_exists:
            st = SampleTemplate(study.id)
            missing_cols = st.check_restrictions([SAMPLE_TEMPLATE_COLUMNS["qiita_main"]])
            allow_approval = len(missing_cols) == 0
            approval_deny_msg = (
                "Processed data approval request is disabled due to missing "
                "columns in the sample template: %s" % ", ".join(missing_cols)
            )
        else:
            allow_approval = False
            approval_deny_msg = ""

        # The general information of the study can be changed if the study is
        # not public or if the user is an admin, in which case they can always
        # modify the information of the study
        show_edit_btn = study_status != "public" or user_level == "admin"

        # Make the error message suitable for html
        msg = msg.replace("\n", "<br/>")

        self.render(
            "study_description.html",
            message=msg,
            level=msg_level,
            study=study,
            study_title=study.title,
            study_alias=study.info["study_alias"],
            show_edit_btn=show_edit_btn,
            show_data_tabs=sample_template_exists,
            full_access=full_access,
            allow_approval=allow_approval,
            approval_deny_msg=approval_deny_msg,
            top_tab=top_tab,
            sub_tab=sub_tab,
            prep_tab=prep_tab,
        )
예제 #17
0
    def remove_add_study_template(self, raw_data, study_id, fp_rsp, data_type, is_mapping_file):
        """Replace prep templates, raw data, and sample template with a new one
        """
        if is_mapping_file and data_type == "":
            raise ValueError("Please, choose a data type if uploading a QIIME " "mapping file")

        for rd in raw_data():
            rd = RawData(rd)
            for pt in rd.prep_templates:
                if PrepTemplate.exists(pt):
                    PrepTemplate.delete(pt)
        if SampleTemplate.exists(study_id):
            SampleTemplate.delete(study_id)

        if is_mapping_file:
            create_templates_from_qiime_mapping_file(fp_rsp, Study(study_id), int(data_type))
        else:
            SampleTemplate.create(load_template_to_dataframe(fp_rsp), Study(study_id))

        remove(fp_rsp)
예제 #18
0
 def setUp(self):
     self.sample_template = SampleTemplate(1)
     self.sample_id = 'SKB8.640193'
     self.tester = Sample(self.sample_id, self.sample_template)
     self.exp_categories = {
         'physical_location', 'has_physical_specimen', 'has_extracted_data',
         'sample_type', 'required_sample_info_status_id',
         'collection_timestamp', 'host_subject_id', 'description',
         'season_environment', 'assigned_from_geo', 'texture', 'taxon_id',
         'depth', 'host_taxid', 'common_name', 'water_content_soil',
         'elevation', 'temp', 'tot_nitro', 'samp_salinity', 'altitude',
         'env_biome', 'country', 'ph', 'anonymized_name', 'tot_org_carb',
         'description_duplicate', 'env_feature', 'latitude', 'longitude'
     }
예제 #19
0
    def get(self):
        userobj = self.current_user
        analysis = Analysis(int(self.get_argument("aid")))
        # make sure user has access to the analysis
        check_analysis_access(userobj, analysis)

        # get the dictionaries of selected samples and data types
        selproc_data, selsamples = self._selected_parser(analysis)

        self.render('search_studies.html', aid=analysis.id,
                    selsamples=selsamples, selproc_data=selproc_data,
                    counts={}, fullcounts={}, searchmsg="", query="",
                    results={}, availmeta=SampleTemplate.metadata_headers() +
                    get_table_cols("study"))
예제 #20
0
    def test_create(self):
        """Creates a new SampleTemplate"""
        st = SampleTemplate.create(self.metadata, self.new_study)
        # The returned object has the correct id
        self.assertEqual(st.id, 2)

        # The relevant rows to required_sample_info have been added.
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.required_sample_info WHERE study_id=2")
        # study_id sample_id physical_location has_physical_specimen
        # has_extracted_data sample_type required_sample_info_status_id
        # collection_timestamp host_subject_id description
        exp = [[
            2, "Sample1", "location1", True, True, "type1", 1,
            datetime(2014, 5, 29, 12, 24, 51), "NotIdentified",
            "Test Sample 1", 42.42, 41.41
        ],
               [
                   2, "Sample2", "location1", True, True, "type1", 1,
                   datetime(2014, 5, 29, 12, 24, 51), "NotIdentified",
                   "Test Sample 2", 4.2, 1.1
               ],
               [
                   2, "Sample3", "location1", True, True, "type1", 1,
                   datetime(2014, 5, 29, 12, 24, 51), "NotIdentified",
                   "Test Sample 3", 4.8, 4.41
               ]]
        self.assertEqual(obs, exp)

        # The relevant rows have been added to the study_sample_columns
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_sample_columns WHERE study_id=2")
        # study_id, column_name, column_type
        exp = [[2, "str_column", "varchar"]]
        self.assertEqual(obs, exp)

        # The new table exists
        self.assertTrue(exists_table("sample_2", self.conn_handler))

        # The new table hosts the correct values
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.sample_2")
        # sample_id, str_column
        exp = [['Sample1', "Value for sample 1"],
               ['Sample2', "Value for sample 2"],
               ['Sample3', "Value for sample 3"]]
        self.assertEqual(obs, exp)
예제 #21
0
파일: test_util.py 프로젝트: Jorge-C/qiita
    def test_metadata_map_from_sample_and_prep_templates(self):
        obs = metadata_map_from_sample_and_prep_templates(
            SampleTemplate(1), PrepTemplate(1))

        # We don't test the specific values as this would blow up the size
        # of this file as the amount of lines would go to ~1000

        # 27 samples
        self.assertEqual(len(obs), 27)
        self.assertTrue(
            all(obs.index == pd.Index([
                u'SKB1.640202', u'SKB2.640194', u'SKB3.640195', u'SKB4.640189',
                u'SKB5.640181', u'SKB6.640176', u'SKB7.640196', u'SKB8.640193',
                u'SKB9.640200', u'SKD1.640179', u'SKD2.640178', u'SKD3.640198',
                u'SKD4.640185', u'SKD5.640186', u'SKD6.640190', u'SKD7.640191',
                u'SKD8.640184', u'SKD9.640182', u'SKM1.640183', u'SKM2.640199',
                u'SKM3.640197', u'SKM4.640180', u'SKM5.640177', u'SKM6.640187',
                u'SKM7.640188', u'SKM8.640201', u'SKM9.640192'
            ],
                                      dtype='object')))

        self.assertTrue(
            all(obs.columns == pd.Index([
                u'tot_org_carb', u'common_name', u'has_extracted_data',
                u'water_content_soil', u'env_feature', u'assigned_from_geo',
                u'altitude', u'env_biome', u'texture',
                u'has_physical_specimen', u'description_duplicate',
                u'physical_location', u'latitude', u'ph', u'host_taxid',
                u'elevation', u'description', u'collection_timestamp',
                u'taxon_id', u'samp_salinity', u'host_subject_id',
                u'sample_type', u'season_environment',
                u'required_sample_info_status_id', u'temp', u'country',
                u'longitude', u'tot_nitro', u'depth', u'anonymized_name',
                u'experiment_center', u'center_name', u'run_center',
                u'run_prefix', u'data_type_id', u'target_gene',
                u'sequencing_meth', u'run_date', u'pcr_primers',
                u'linkerprimersequence', u'platform',
                u'library_construction_protocol',
                u'experiment_design_description', u'study_center',
                u'center_project_name', u'sample_center', u'samp_size',
                u'illumina_technology', u'experiment_title', u'emp_status_id',
                u'target_subfragment', u'barcodesequence'
            ],
                                        dtype='object')))
예제 #22
0
    def get(self):
        userobj = self.current_user
        analysis = Analysis(int(self.get_argument("aid")))
        # make sure user has access to the analysis
        check_analysis_access(userobj, analysis)

        # get the dictionaries of selected samples and data types
        selproc_data, selsamples = self._selected_parser(analysis)

        self.render('search_studies.html',
                    aid=analysis.id,
                    selsamples=selsamples,
                    selproc_data=selproc_data,
                    counts={},
                    fullcounts={},
                    searchmsg="",
                    query="",
                    results={},
                    availmeta=SampleTemplate.metadata_headers() +
                    get_table_cols("study"))
예제 #23
0
    def test_create(self):
        """Creates a new SampleTemplate"""
        st = SampleTemplate.create(self.metadata, self.new_study)
        # The returned object has the correct id
        self.assertEqual(st.id, 2)

        # The relevant rows to required_sample_info have been added.
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.required_sample_info WHERE study_id=2")
        # study_id sample_id physical_location has_physical_specimen
        # has_extracted_data sample_type required_sample_info_status_id
        # collection_timestamp host_subject_id description
        exp = [[2, "Sample1", "location1", True, True, "type1", 1,
                datetime(2014, 5, 29, 12, 24, 51), "NotIdentified",
                "Test Sample 1"],
               [2, "Sample2", "location1", True, True, "type1", 1,
                datetime(2014, 5, 29, 12, 24, 51), "NotIdentified",
                "Test Sample 2"],
               [2, "Sample3", "location1", True, True, "type1", 1,
                datetime(2014, 5, 29, 12, 24, 51), "NotIdentified",
                "Test Sample 3"]]
        self.assertEqual(obs, exp)

        # The relevant rows have been added to the study_sample_columns
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_sample_columns WHERE study_id=2")
        # study_id, column_name, column_type
        exp = [[2, "str_column", "varchar"]]
        self.assertEqual(obs, exp)

        # The new table exists
        self.assertTrue(exists_table("sample_2", self.conn_handler))

        # The new table hosts the correct values
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.sample_2")
        # sample_id, str_column
        exp = [['Sample1', "Value for sample 1"],
               ['Sample2', "Value for sample 2"],
               ['Sample3', "Value for sample 3"]]
        self.assertEqual(obs, exp)
예제 #24
0
    def display_template(self, study, user, msg, msg_level, full_access,
                         top_tab=None, sub_tab=None, prep_tab=None):
        """Simple function to avoid duplication of code"""
        study_status = study.status
        user_level = user.level
        sample_template_exists = SampleTemplate.exists(study.id)

        # The general information of the study can be changed if the study is
        # not public or if the user is an admin, in which case they can always
        # modify the information of the study
        show_edit_btn = study_status != 'public' or user_level == 'admin'

        self.render('study_description.html',
                    message=msg,
                    level=msg_level,
                    study=study,
                    study_title=study.title,
                    study_alias=study.info['study_alias'],
                    show_edit_btn=show_edit_btn,
                    show_data_tabs=sample_template_exists,
                    full_access=full_access,
                    top_tab=top_tab,
                    sub_tab=sub_tab,
                    prep_tab=prep_tab)
예제 #25
0
파일: 6.py 프로젝트: BrindhaBioinfo/qiita
from os.path import join
from time import strftime

from qiita_db.util import get_mountpoint
from qiita_db.sql_connection import SQLConnectionHandler
from qiita_db.metadata_template import SampleTemplate, PrepTemplate

conn_handler = SQLConnectionHandler()

_id, fp_base = get_mountpoint('templates')[0]

for study_id in conn_handler.execute_fetchall(
        "SELECT study_id FROM qiita.study"):
    study_id = study_id[0]
    if SampleTemplate.exists(study_id):
        st = SampleTemplate(study_id)
        fp = join(fp_base, '%d_%s.txt' % (study_id, strftime("%Y%m%d-%H%M%S")))
        st.to_file(fp)
        st.add_filepath(fp)

for prep_template_id in conn_handler.execute_fetchall(
        "SELECT prep_template_id FROM qiita.prep_template"):
    prep_template_id = prep_template_id[0]
    pt = PrepTemplate(prep_template_id)
    study_id = pt.study_id

    fp = join(fp_base, '%d_prep_%d_%s.txt' % (pt.study_id, prep_template_id,
              strftime("%Y%m%d-%H%M%S")))
    pt.to_file(fp)
    pt.add_filepath(fp)
예제 #26
0
 def test_init(self):
     """Init successfully instantiates the object"""
     st = SampleTemplate(1)
     self.assertTrue(st.id, 1)
예제 #27
0
    def process_sample_template(self, study, user, callback):
        """Process a sample template from the POST method

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done

        Raises
        ------
        HTTPError
            If the sample template file does not exists
        """
        # If we are on this function, the arguments "sample_template" and
        # "data_type" must be defined. If not, let tornado raise its error
        sample_template = self.get_argument('sample_template')
        data_type = self.get_argument('data_type')

        # Get the uploads folder
        _, base_fp = get_mountpoint("uploads")[0]
        # Get the path of the sample template in the uploads folder
        fp_rsp = join(base_fp, str(study.id), sample_template)

        if not exists(fp_rsp):
            # The file does not exist, fail nicely
            raise HTTPError(404, "This file doesn't exist: %s" % fp_rsp)

        # Define here the message and message level in case of success
        msg = "The sample template '%s' has been added" % sample_template
        msg_level = "success"
        is_mapping_file = looks_like_qiime_mapping_file(fp_rsp)

        try:
            if is_mapping_file and not data_type:
                raise ValueError("Please, choose a data type if uploading a "
                                 "QIIME mapping file")

            with warnings.catch_warnings(record=True) as warns:
                if is_mapping_file:
                    create_templates_from_qiime_mapping_file(fp_rsp, study,
                                                             int(data_type))
                else:
                    SampleTemplate.create(load_template_to_dataframe(fp_rsp),
                                          study)
                remove(fp_rsp)

                # join all the warning messages into one. Note that this
                # info will be ignored if an exception is raised
                if warns:
                    msg = '; '.join([convert_text_html(str(w.message))
                                     for w in warns])
                    msg_level = 'warning'

        except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                QiitaDBDuplicateError, IOError, ValueError, KeyError,
                CParserError, QiitaDBDuplicateHeaderError,
                QiitaDBError, QiitaWareError) as e:
            # Some error occurred while processing the sample template
            # Show the error to the user so they can fix the template
            error_msg = ('parsing the QIIME mapping file'
                         if is_mapping_file
                         else 'parsing the sample template')
            msg = html_error_message % (error_msg, basename(fp_rsp),
                                        str(e))
            msg = convert_text_html(msg)
            msg_level = "danger"

        callback((msg, msg_level, None, None, None))
예제 #28
0
 def _extend_sample_template(self, st_id, fp_rpt):
     SampleTemplate(st_id).extend(load_template_to_dataframe(fp_rpt))
예제 #29
0
def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None):
    """Submit a preprocessed data to EBI

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    action : %s
        The action to perform with this data
    send : bool
        True to actually send the files
    fastq_dir_fp : str, optional
        The fastq filepath
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    preprocessed_data_id_str = str(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    investigation_type = None
    new_investigation_type = None

    status = preprocessed_data.submitted_to_insdc_status()
    if status in ('submitting', 'success'):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

    if send:
        # If we intend actually to send the files, then change the status in
        # the database
        preprocessed_data.update_insdc_status('submitting')

    # we need to figure out whether the investigation type is a known one
    # or if we have to submit a "new_investigation_type" to EBI
    current_type = prep_template.investigation_type
    ena_ontology = Ontology(convert_to_id('ENA', 'ontology'))
    if current_type in ena_ontology.terms:
        investigation_type = current_type
    elif current_type in ena_ontology.user_defined_terms:
        investigation_type = 'Other'
        new_investigation_type = current_type
    else:
        # This should never happen
        raise ValueError("Unrecognized investigation type: '%s'. This term "
                         "is neither one of the official terms nor one of the "
                         "user-defined terms in the ENA ontology")

    if fastq_dir_fp is not None:
        # If the user specifies a FASTQ directory, use it

        # Set demux_samples to None so that MetadataTemplate.to_file will put
        # all samples in the template files
        demux_samples = None
    else:
        # If the user does not specify a FASTQ directory, create one and
        # re-serialize the per-sample FASTQs from the demux file
        fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir)
        demux = [
            path for _, path, ftype in preprocessed_data.get_filepaths()
            if ftype == 'preprocessed_demux'
        ][0]

        # Keep track of which files were actually in the demux file so that we
        # can write those rows to the prep and samples templates
        demux_samples = set()

        with open_file(demux) as demux_fh:
            for samp, iterator in to_per_sample_ascii(demux_fh,
                                                      list(sample_template)):
                demux_samples.add(samp)
                sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp)
                with gzopen(sample_fp, 'w') as fh:
                    for record in iterator:
                        fh.write(record)

    output_dir = fastq_dir_fp + '_submission'

    samp_fp = join(fastq_dir_fp, 'sample_metadata.txt')
    prep_fp = join(fastq_dir_fp, 'prep_metadata.txt')

    sample_template.to_file(samp_fp, demux_samples)
    prep_template.to_file(prep_fp, demux_samples)

    # Get specific output directory and set filepaths
    get_output_fp = partial(join, output_dir)
    study_fp = get_output_fp('study.xml')
    sample_fp = get_output_fp('sample.xml')
    experiment_fp = get_output_fp('experiment.xml')
    run_fp = get_output_fp('run.xml')
    submission_fp = get_output_fp('submission.xml')

    if not isdir(output_dir):
        makedirs(output_dir)
    else:
        raise IOError('The output folder already exists: %s' % output_dir)

    with open(samp_fp, 'U') as st, open(prep_fp, 'U') as pt:
        submission = EBISubmission.from_templates_and_per_sample_fastqs(
            preprocessed_data_id_str,
            study.title,
            study.info['study_abstract'],
            investigation_type,
            st,
            pt,
            fastq_dir_fp,
            new_investigation_type=new_investigation_type,
            pmids=study.pmids)

    submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp,
                                   submission_fp, action)

    if send:
        submission.send_sequences()
        study_accession, submission_accession = submission.send_xml()

        if study_accession is None or submission_accession is None:
            preprocessed_data.update_insdc_status('failed')

            raise ComputeError("EBI Submission failed!")
        else:
            preprocessed_data.update_insdc_status('success', study_accession,
                                                  submission_accession)
    else:
        study_accession, submission_accession = None, None

    return study_accession, submission_accession
예제 #30
0
 def test_table_name(self):
     """Table name return the correct string"""
     obs = SampleTemplate._table_name(self.test_study)
     self.assertEqual(obs, "sample_1")
예제 #31
0
 def test_create_duplicate(self):
     """Create raises an error when creating a duplicated SampleTemplate"""
     with self.assertRaises(QiitaDBDuplicateError):
         SampleTemplate.create(self.metadata, self.test_study)
예제 #32
0
 def test_exists_false(self):
     """Exists returns false when the SampleTemplate does not exists"""
     self.assertFalse(SampleTemplate.exists(self.new_study))
예제 #33
0
 def test_exists_true(self):
     """Exists returns true when the SampleTemplate already exists"""
     self.assertTrue(SampleTemplate.exists(self.test_study))
예제 #34
0
 def test_init_wrong_template(self):
     """Raises an error if using a SampleTemplate instead of PrepTemplate"""
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PrepSample('SKB8.640193', SampleTemplate(1))
예제 #35
0
 def test_exists(self):
     """exists should raise an error if called from the base class"""
     with self.assertRaises(IncompetentQiitaDeveloperError):
         BaseSample.exists('SKM7.640188', SampleTemplate(1))
예제 #36
0
    def setUp(self):
        metadata_dict = {
            'Sample1': {
                'physical_location': 'location1',
                'has_physical_specimen': True,
                'has_extracted_data': True,
                'sample_type': 'type1',
                'required_sample_info_status_id': 1,
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 1',
                'str_column': 'Value for sample 1',
                'latitude': 42.42,
                'longitude': 41.41
            },
            'Sample2': {
                'physical_location': 'location1',
                'has_physical_specimen': True,
                'has_extracted_data': True,
                'sample_type': 'type1',
                'required_sample_info_status_id': 1,
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 2',
                'str_column': 'Value for sample 2',
                'latitude': 4.2,
                'longitude': 1.1
            },
            'Sample3': {
                'physical_location': 'location1',
                'has_physical_specimen': True,
                'has_extracted_data': True,
                'sample_type': 'type1',
                'required_sample_info_status_id': 1,
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 3',
                'str_column': 'Value for sample 3',
                'latitude': 4.8,
                'longitude': 4.41
            },
        }
        self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')

        self.test_study = Study(1)
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 25,
            "number_samples_promised": 28,
            "portal_type_id": 3,
            "study_alias": "FCM",
            "study_description": "Microbiome of people who eat nothing but "
            "fried chicken",
            "study_abstract": "Exploring how a high fat diet changes the "
            "gut microbiome",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        self.new_study = Study.create(User('*****@*****.**'),
                                      "Fried Chicken Microbiome", [1], info)
        self.tester = SampleTemplate(1)
        self.exp_sample_ids = {
            'SKB1.640202', 'SKB2.640194', 'SKB3.640195', 'SKB4.640189',
            'SKB5.640181', 'SKB6.640176', 'SKB7.640196', 'SKB8.640193',
            'SKB9.640200', 'SKD1.640179', 'SKD2.640178', 'SKD3.640198',
            'SKD4.640185', 'SKD5.640186', 'SKD6.640190', 'SKD7.640191',
            'SKD8.640184', 'SKD9.640182', 'SKM1.640183', 'SKM2.640199',
            'SKM3.640197', 'SKM4.640180', 'SKM5.640177', 'SKM6.640187',
            'SKM7.640188', 'SKM8.640201', 'SKM9.640192'
        }
        self._clean_up_files = []
예제 #37
0
 def test_init(self):
     """BaseSample init should raise an error (it's a base class)"""
     with self.assertRaises(IncompetentQiitaDeveloperError):
         BaseSample('SKM7.640188', SampleTemplate(1))
예제 #38
0
 def test_create_duplicate_header(self):
     """Create raises an error when duplicate headers are present"""
     self.metadata['STR_COLUMN'] = pd.Series(['', '', ''],
                                             index=self.metadata.index)
     with self.assertRaises(QiitaDBDuplicateHeaderError):
         SampleTemplate.create(self.metadata, self.new_study)
예제 #39
0
def create_templates_from_qiime_mapping_file(fp, study, data_type):
    """Creates a sample template and a prep template from qiime mapping file

    Parameters
    ----------
    fp : str or file-like object
        Path to the QIIME mapping file
    study : Study
        The study to which the sample template belongs to
    data_type : str or int
        The data_type of the prep_template

    Returns
    -------
    (SampleTemplate, PrepTemplate)
        The templates created from the QIIME mapping file
    """
    qiime_map = load_template_to_dataframe(fp, index='#SampleID')

    # There are a few columns in the QIIME mapping file that are special and
    # we know how to deal with them
    rename_cols = {
        'BarcodeSequence': 'barcode',
        'LinkerPrimerSequence': 'primer',
        'Description': 'description',
    }

    if 'ReverseLinkerPrimer' in qiime_map:
        rename_cols['ReverseLinkerPrimer'] = 'reverselinkerprimer'

    missing = set(rename_cols).difference(qiime_map.columns)
    if missing:
        raise QiitaWareError(
            "Error generating the templates from the QIIME mapping file. "
            "Missing QIIME mapping file columns: %s" % ', '.join(missing))

    qiime_map.rename(columns=rename_cols, inplace=True)

    # Fix the casing in the columns that we control
    qiime_map.columns = [c.lower() if c.lower() in CONTROLLED_COLS else c
                         for c in qiime_map.columns]

    # Figure out which columns belong to the prep template
    def _col_iterator(restriction_set):
        for restriction in viewvalues(restriction_set):
            for cols in viewkeys(restriction.columns):
                yield cols

    pt_cols = set(col for col in _col_iterator(PREP_TEMPLATE_COLUMNS))

    data_type_str = (convert_from_id(data_type, "data_type")
                     if isinstance(data_type, (int, long)) else data_type)

    if data_type_str in TARGET_GENE_DATA_TYPES:
        pt_cols.update(
            col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE))
        pt_cols.add('reverselinkerprimer')

    qiime_cols = set(qiime_map.columns)
    pt_cols = qiime_cols.intersection(pt_cols)
    st_cols = qiime_cols.difference(pt_cols)

    st_md = qiime_map.ix[:, st_cols]
    pt_md = qiime_map.ix[:, pt_cols]

    return (SampleTemplate.create(st_md, study),
            PrepTemplate.create(pt_md, study, data_type))
예제 #40
0
 def test_create_duplicate_header(self):
     """Create raises an error when duplicate headers are present"""
     self.metadata['STR_COLUMN'] = pd.Series(['', '', ''],
                                             index=self.metadata.index)
     with self.assertRaises(QiitaDBDuplicateHeaderError):
         SampleTemplate.create(self.metadata, self.new_study)
예제 #41
0
 def test_exists_true(self):
     """Exists returns true when the SampleTemplate already exists"""
     self.assertTrue(SampleTemplate.exists(self.test_study))
예제 #42
0
 def test_exists_false(self):
     """Exists returns false when the SampleTemplate does not exists"""
     self.assertFalse(SampleTemplate.exists(self.new_study))
예제 #43
0
    def post(self):
        user = self.current_user
        action = self.get_argument("action")
        # set required template variables
        results = {}
        meta_headers = []
        counts = {}
        fullcounts = {}
        query = ""
        searchmsg = ""
        selsamples = {}
        selproc_data = {}
        # get analysis and selected samples if exists, or create if necessary
        if action == "create":
            name = self.get_argument('name')
            description = self.get_argument('description')
            analysis = Analysis.create(User(user), name, description)
            analysis_id = analysis.id
            # set to second step since this page is second step in workflow
            analysis.step = SELECT_SAMPLES
            # fill example studies by running query for specific studies
            search = QiitaStudySearch()
            def_query = 'study_id = 1 OR study_id = 2 OR study_id = 3'
            results, meta_headers = search(def_query, user)
            results, counts, fullcounts = self._parse_search_results(
                results, selsamples, meta_headers)
        else:
            analysis_id = int(self.get_argument("analysis-id"))
            check_analysis_access(User(user), analysis_id)
            analysis = Analysis(analysis_id)
            selproc_data, selsamples = self._selected_parser(analysis)

        # run through action requested
        if action == "search":
            search = QiitaStudySearch()
            query = str(self.get_argument("query"))
            try:
                results, meta_headers = search(query, user)
            except ParseException:
                searchmsg = "Malformed search query, please read search help."
            except QiitaDBIncompatibleDatatypeError as e:
                searchmsg = ''.join(e)

            if not results and not searchmsg:
                searchmsg = "No results found."
            else:
                results, counts, fullcounts = self._parse_search_results(
                    results, selsamples, meta_headers)

        elif action == "select":
            analysis.add_samples(self._parse_form_select())

            # rebuild the selected from database to reflect changes
            selproc_data, selsamples = self._selected_parser(analysis)

        elif action == "deselect":
            proc_data, samples = self._parse_form_deselect()
            if proc_data:
                analysis.remove_samples(proc_data=proc_data)
            if samples:
                analysis.remove_samples(samples=samples)
            if not proc_data and not samples:
                searchmsg = "Must select samples to remove from analysis!"

            # rebuild the selected from database to reflect changes
            selproc_data, selsamples = self._selected_parser(analysis)

        self.render('search_studies.html',
                    user=user,
                    aid=analysis_id,
                    results=results,
                    meta_headers=meta_headers,
                    selsamples=selsamples,
                    selproc_data=selproc_data,
                    counts=counts,
                    fullcounts=fullcounts,
                    searchmsg=searchmsg,
                    query=query,
                    availmeta=SampleTemplate.metadata_headers() +
                    get_table_cols("study"))
예제 #44
0
    def display_template(self, preprocessed_data_id, msg, msg_level):
        """Simple function to avoid duplication of code"""
        preprocessed_data_id = int(preprocessed_data_id)
        try:
            preprocessed_data = PreprocessedData(preprocessed_data_id)
        except QiitaDBUnknownIDError:
            raise HTTPError(404, "PreprocessedData %d does not exist!" %
                                 preprocessed_data_id)
        else:
            user = self.current_user
            if user.level != 'admin':
                raise HTTPError(403, "No permissions of admin, "
                                     "get/EBISubmitHandler: %s!" % user.id)

        prep_template = PrepTemplate(preprocessed_data.prep_template)
        sample_template = SampleTemplate(preprocessed_data.study)
        study = Study(preprocessed_data.study)
        stats = [('Number of samples', len(prep_template)),
                 ('Number of metadata headers',
                  len(sample_template.categories()))]

        demux = [path for _, path, ftype in preprocessed_data.get_filepaths()
                 if ftype == 'preprocessed_demux']
        demux_length = len(demux)

        if not demux_length:
            msg = ("Study does not appear to have demultiplexed "
                   "sequences associated")
            msg_level = 'danger'
        elif demux_length > 1:
            msg = ("Study appears to have multiple demultiplexed files!")
            msg_level = 'danger'
        elif demux_length == 1:
            demux_file = demux[0]
            demux_file_stats = demux_stats(demux_file)
            stats.append(('Number of sequences', demux_file_stats.n))
            msg_level = 'success'

        # Check if the templates have all the required columns for EBI
        pt_missing_cols = prep_template.check_restrictions(
            [PREP_TEMPLATE_COLUMNS['EBI']])
        st_missing_cols = sample_template.check_restrictions(
            [SAMPLE_TEMPLATE_COLUMNS['EBI']])
        allow_submission = (len(pt_missing_cols) == 0 and
                            len(st_missing_cols) == 0)

        if not allow_submission:
            msg_list = ["Submission to EBI disabled due to missing columns:"]
            if len(pt_missing_cols) > 0:
                msg_list.append("Columns missing in prep template: %s"
                                % ', '.join(pt_missing_cols))
            if len(st_missing_cols) > 0:
                msg_list.append("Columns missing in sample template: %s"
                                % ', '.join(st_missing_cols))
            ebi_disabled_msg = "<br/>".join(msg_list)
        else:
            ebi_disabled_msg = None

        self.render('ebi_submission.html',
                    study_title=study.title, stats=stats, message=msg,
                    study_id=study.id, level=msg_level,
                    preprocessed_data_id=preprocessed_data_id,
                    investigation_type=prep_template.investigation_type,
                    allow_submission=allow_submission,
                    ebi_disabled_msg=ebi_disabled_msg)
예제 #45
0
class TestSampleTemplate(TestCase):
    """Tests the SampleTemplate class"""
    def setUp(self):
        metadata_dict = {
            'Sample1': {
                'physical_location': 'location1',
                'has_physical_specimen': True,
                'has_extracted_data': True,
                'sample_type': 'type1',
                'required_sample_info_status_id': 1,
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 1',
                'str_column': 'Value for sample 1',
                'latitude': 42.42,
                'longitude': 41.41
            },
            'Sample2': {
                'physical_location': 'location1',
                'has_physical_specimen': True,
                'has_extracted_data': True,
                'sample_type': 'type1',
                'required_sample_info_status_id': 1,
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 2',
                'str_column': 'Value for sample 2',
                'latitude': 4.2,
                'longitude': 1.1
            },
            'Sample3': {
                'physical_location': 'location1',
                'has_physical_specimen': True,
                'has_extracted_data': True,
                'sample_type': 'type1',
                'required_sample_info_status_id': 1,
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 3',
                'str_column': 'Value for sample 3',
                'latitude': 4.8,
                'longitude': 4.41
            },
        }
        self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')

        self.test_study = Study(1)
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 25,
            "number_samples_promised": 28,
            "portal_type_id": 3,
            "study_alias": "FCM",
            "study_description": "Microbiome of people who eat nothing but "
            "fried chicken",
            "study_abstract": "Exploring how a high fat diet changes the "
            "gut microbiome",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        self.new_study = Study.create(User('*****@*****.**'),
                                      "Fried Chicken Microbiome", [1], info)
        self.tester = SampleTemplate(1)
        self.exp_sample_ids = {
            'SKB1.640202', 'SKB2.640194', 'SKB3.640195', 'SKB4.640189',
            'SKB5.640181', 'SKB6.640176', 'SKB7.640196', 'SKB8.640193',
            'SKB9.640200', 'SKD1.640179', 'SKD2.640178', 'SKD3.640198',
            'SKD4.640185', 'SKD5.640186', 'SKD6.640190', 'SKD7.640191',
            'SKD8.640184', 'SKD9.640182', 'SKM1.640183', 'SKM2.640199',
            'SKM3.640197', 'SKM4.640180', 'SKM5.640177', 'SKM6.640187',
            'SKM7.640188', 'SKM8.640201', 'SKM9.640192'
        }
        self._clean_up_files = []

    def tearDown(self):
        for f in self._clean_up_files:
            remove(f)

    def test_init_unknown_error(self):
        """Init raises an error if the id is not known"""
        with self.assertRaises(QiitaDBUnknownIDError):
            SampleTemplate(2)

    def test_init(self):
        """Init successfully instantiates the object"""
        st = SampleTemplate(1)
        self.assertTrue(st.id, 1)

    def test_table_name(self):
        """Table name return the correct string"""
        obs = SampleTemplate._table_name(self.test_study)
        self.assertEqual(obs, "sample_1")

    def test_create_duplicate(self):
        """Create raises an error when creating a duplicated SampleTemplate"""
        with self.assertRaises(QiitaDBDuplicateError):
            SampleTemplate.create(self.metadata, self.test_study)

    def test_create_duplicate_header(self):
        """Create raises an error when duplicate headers are present"""
        self.metadata['STR_COLUMN'] = pd.Series(['', '', ''],
                                                index=self.metadata.index)
        with self.assertRaises(QiitaDBDuplicateHeaderError):
            SampleTemplate.create(self.metadata, self.new_study)

    def test_create(self):
        """Creates a new SampleTemplate"""
        st = SampleTemplate.create(self.metadata, self.new_study)
        # The returned object has the correct id
        self.assertEqual(st.id, 2)

        # The relevant rows to required_sample_info have been added.
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.required_sample_info WHERE study_id=2")
        # study_id sample_id physical_location has_physical_specimen
        # has_extracted_data sample_type required_sample_info_status_id
        # collection_timestamp host_subject_id description
        exp = [[
            2, "Sample1", "location1", True, True, "type1", 1,
            datetime(2014, 5, 29, 12, 24, 51), "NotIdentified",
            "Test Sample 1", 42.42, 41.41
        ],
               [
                   2, "Sample2", "location1", True, True, "type1", 1,
                   datetime(2014, 5, 29, 12, 24, 51), "NotIdentified",
                   "Test Sample 2", 4.2, 1.1
               ],
               [
                   2, "Sample3", "location1", True, True, "type1", 1,
                   datetime(2014, 5, 29, 12, 24, 51), "NotIdentified",
                   "Test Sample 3", 4.8, 4.41
               ]]
        self.assertEqual(obs, exp)

        # The relevant rows have been added to the study_sample_columns
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_sample_columns WHERE study_id=2")
        # study_id, column_name, column_type
        exp = [[2, "str_column", "varchar"]]
        self.assertEqual(obs, exp)

        # The new table exists
        self.assertTrue(exists_table("sample_2", self.conn_handler))

        # The new table hosts the correct values
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.sample_2")
        # sample_id, str_column
        exp = [['Sample1', "Value for sample 1"],
               ['Sample2', "Value for sample 2"],
               ['Sample3', "Value for sample 3"]]
        self.assertEqual(obs, exp)

    def test_exists_true(self):
        """Exists returns true when the SampleTemplate already exists"""
        self.assertTrue(SampleTemplate.exists(self.test_study))

    def test_exists_false(self):
        """Exists returns false when the SampleTemplate does not exists"""
        self.assertFalse(SampleTemplate.exists(self.new_study))

    def test_get_sample_ids(self):
        """get_sample_ids returns the correct set of sample ids"""
        obs = self.tester._get_sample_ids(self.conn_handler)
        self.assertEqual(obs, self.exp_sample_ids)

    def test_len(self):
        """Len returns the correct number of sample ids"""
        self.assertEqual(len(self.tester), 27)

    def test_getitem(self):
        """Get item returns the correct sample object"""
        obs = self.tester['SKM7.640188']
        exp = Sample('SKM7.640188', self.tester)
        self.assertEqual(obs, exp)

    def test_getitem_error(self):
        """Get item raises an error if key does not exists"""
        with self.assertRaises(KeyError):
            self.tester['Not_a_Sample']

    def test_setitem(self):
        """setitem raises an error (currently not allowed)"""
        with self.assertRaises(QiitaDBNotImplementedError):
            self.tester['SKM7.640188'] = Sample('SKM7.640188', self.tester)

    def test_delitem(self):
        """delitem raises an error (currently not allowed)"""
        with self.assertRaises(QiitaDBNotImplementedError):
            del self.tester['SKM7.640188']

    def test_iter(self):
        """iter returns an iterator over the sample ids"""
        obs = self.tester.__iter__()
        self.assertTrue(isinstance(obs, Iterable))
        self.assertEqual(set(obs), self.exp_sample_ids)

    def test_contains_true(self):
        """contains returns true if the sample id exists"""
        self.assertTrue('SKM7.640188' in self.tester)

    def test_contains_false(self):
        """contains returns false if the sample id does not exists"""
        self.assertFalse('Not_a_Sample' in self.tester)

    def test_keys(self):
        """keys returns an iterator over the sample ids"""
        obs = self.tester.keys()
        self.assertTrue(isinstance(obs, Iterable))
        self.assertEqual(set(obs), self.exp_sample_ids)

    def test_values(self):
        """values returns an iterator over the values"""
        obs = self.tester.values()
        self.assertTrue(isinstance(obs, Iterable))
        exp = {
            Sample('SKB1.640202', self.tester),
            Sample('SKB2.640194', self.tester),
            Sample('SKB3.640195', self.tester),
            Sample('SKB4.640189', self.tester),
            Sample('SKB5.640181', self.tester),
            Sample('SKB6.640176', self.tester),
            Sample('SKB7.640196', self.tester),
            Sample('SKB8.640193', self.tester),
            Sample('SKB9.640200', self.tester),
            Sample('SKD1.640179', self.tester),
            Sample('SKD2.640178', self.tester),
            Sample('SKD3.640198', self.tester),
            Sample('SKD4.640185', self.tester),
            Sample('SKD5.640186', self.tester),
            Sample('SKD6.640190', self.tester),
            Sample('SKD7.640191', self.tester),
            Sample('SKD8.640184', self.tester),
            Sample('SKD9.640182', self.tester),
            Sample('SKM1.640183', self.tester),
            Sample('SKM2.640199', self.tester),
            Sample('SKM3.640197', self.tester),
            Sample('SKM4.640180', self.tester),
            Sample('SKM5.640177', self.tester),
            Sample('SKM6.640187', self.tester),
            Sample('SKM7.640188', self.tester),
            Sample('SKM8.640201', self.tester),
            Sample('SKM9.640192', self.tester)
        }
        # Creating a list and looping over it since unittest does not call
        # the __eq__ function on the objects
        for o, e in zip(sorted(list(obs), key=lambda x: x.id),
                        sorted(exp, key=lambda x: x.id)):
            self.assertEqual(o, e)

    def test_items(self):
        """items returns an iterator over the (key, value) tuples"""
        obs = self.tester.items()
        self.assertTrue(isinstance(obs, Iterable))
        exp = [('SKB1.640202', Sample('SKB1.640202', self.tester)),
               ('SKB2.640194', Sample('SKB2.640194', self.tester)),
               ('SKB3.640195', Sample('SKB3.640195', self.tester)),
               ('SKB4.640189', Sample('SKB4.640189', self.tester)),
               ('SKB5.640181', Sample('SKB5.640181', self.tester)),
               ('SKB6.640176', Sample('SKB6.640176', self.tester)),
               ('SKB7.640196', Sample('SKB7.640196', self.tester)),
               ('SKB8.640193', Sample('SKB8.640193', self.tester)),
               ('SKB9.640200', Sample('SKB9.640200', self.tester)),
               ('SKD1.640179', Sample('SKD1.640179', self.tester)),
               ('SKD2.640178', Sample('SKD2.640178', self.tester)),
               ('SKD3.640198', Sample('SKD3.640198', self.tester)),
               ('SKD4.640185', Sample('SKD4.640185', self.tester)),
               ('SKD5.640186', Sample('SKD5.640186', self.tester)),
               ('SKD6.640190', Sample('SKD6.640190', self.tester)),
               ('SKD7.640191', Sample('SKD7.640191', self.tester)),
               ('SKD8.640184', Sample('SKD8.640184', self.tester)),
               ('SKD9.640182', Sample('SKD9.640182', self.tester)),
               ('SKM1.640183', Sample('SKM1.640183', self.tester)),
               ('SKM2.640199', Sample('SKM2.640199', self.tester)),
               ('SKM3.640197', Sample('SKM3.640197', self.tester)),
               ('SKM4.640180', Sample('SKM4.640180', self.tester)),
               ('SKM5.640177', Sample('SKM5.640177', self.tester)),
               ('SKM6.640187', Sample('SKM6.640187', self.tester)),
               ('SKM7.640188', Sample('SKM7.640188', self.tester)),
               ('SKM8.640201', Sample('SKM8.640201', self.tester)),
               ('SKM9.640192', Sample('SKM9.640192', self.tester))]
        # Creating a list and looping over it since unittest does not call
        # the __eq__ function on the objects
        for o, e in zip(sorted(list(obs)), sorted(exp)):
            self.assertEqual(o, e)

    def test_get(self):
        """get returns the correct sample object"""
        obs = self.tester.get('SKM7.640188')
        exp = Sample('SKM7.640188', self.tester)
        self.assertEqual(obs, exp)

    def test_get_none(self):
        """get returns none if the sample id is not present"""
        self.assertTrue(self.tester.get('Not_a_Sample') is None)

    def test_to_file(self):
        """to file writes a tab delimited file with all the metadata"""
        fd, fp = mkstemp()
        close(fd)
        st = SampleTemplate.create(self.metadata, self.new_study)
        st.to_file(fp)
        self._clean_up_files.append(fp)
        with open(fp, 'U') as f:
            obs = f.read()
        self.assertEqual(obs, EXP_SAMPLE_TEMPLATE)
예제 #46
0
    def test_retrieve_dropped_samples(self):
        # Create and populate second study to do test with
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 25,
            "number_samples_promised": 28,
            "portal_type_id": 3,
            "study_alias": "FCM",
            "study_description": "Microbiome of people who eat nothing but "
            "fried chicken",
            "study_abstract": "Exploring how a high fat diet changes the "
            "gut microbiome",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        metadata_dict = {
            'SKB8.640193': {
                'physical_location': 'location1',
                'has_physical_specimen': True,
                'has_extracted_data': True,
                'sample_type': 'type1',
                'required_sample_info_status': 'received',
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 1',
                'str_column': 'Value for sample 1',
                'latitude': 42.42,
                'longitude': 41.41
            },
            'SKD8.640184': {
                'physical_location': 'location1',
                'has_physical_specimen': True,
                'has_extracted_data': True,
                'sample_type': 'type1',
                'required_sample_info_status': 'received',
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 2',
                'str_column': 'Value for sample 2',
                'latitude': 4.2,
                'longitude': 1.1
            },
            'SKB7.640196': {
                'physical_location': 'location1',
                'has_physical_specimen': True,
                'has_extracted_data': True,
                'sample_type': 'type1',
                'required_sample_info_status': 'received',
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 3',
                'str_column': 'Value for sample 3',
                'latitude': 4.8,
                'longitude': 4.41
            },
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')

        Study.create(User("*****@*****.**"), "Test study 2", [1], info)

        SampleTemplate.create(metadata, Study(2))

        mp = get_mountpoint("processed_data")[0][1]
        study_fp = join(mp, "2_study_1001_closed_reference_otu_table.biom")
        ProcessedData.create("processed_params_uclust",
                             1, [(study_fp, 6)],
                             study=Study(2),
                             data_type="16S")
        self.conn_handler.execute(
            "INSERT INTO qiita.analysis_sample (analysis_id, "
            "processed_data_id, sample_id) VALUES "
            "(1,2,'2.SKB8.640193'), (1,2,'2.SKD8.640184'), "
            "(1,2,'2.SKB7.640196')")

        samples = {
            1: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'],
            2: ['2.SKB8.640193', '2.SKD8.640184']
        }
        self.analysis._build_biom_tables(samples,
                                         10000,
                                         conn_handler=self.conn_handler)
        exp = {1: {'1.SKM4.640180', '1.SKM9.640192'}, 2: {'2.SKB7.640196'}}
        self.assertEqual(self.analysis.dropped_samples, exp)
예제 #47
0
파일: test_util.py 프로젝트: Jorge-C/qiita
 def test_metadata_stats_from_sample_and_prep_templates(self):
     obs = metadata_stats_from_sample_and_prep_templates(
         SampleTemplate(1), PrepTemplate(1))
     for k in obs:
         self.assertEqual(obs[k], SUMMARY_STATS[k])
예제 #48
0
    def display_template(self, study, user, msg, msg_level, top_tab=None,
                         sub_tab=None, prep_tab=None):
        """Simple function to avoid duplication of code"""
        # getting the RawData and its prep templates
        available_raw_data = yield Task(self.get_raw_data, study.raw_data())
        available_prep_templates = yield Task(self.get_prep_templates,
                                              available_raw_data)

        # set variable holding if we have files attached to all raw data or not
        raw_files = True if available_raw_data else False
        for r in available_raw_data:
            if not r.get_filepaths():
                raw_files = False

        # set variable holding if we have all prep templates or not
        if available_prep_templates:
            _test = lambda item: not item
            prep_templates = all(
                [_test(val) for val in viewvalues(available_prep_templates)])
        else:
            prep_templates = False

        study_status = study.status
        user_level = user.level
        sample_template_exists = SampleTemplate.exists(study.id)

        # The general information of the study can be changed if the study is
        # not public or if the user is an admin, in which case they can always
        # modify the information of the study
        show_edit_btn = study_status != 'public' or user_level == 'admin'

        # Files can be added to a study only if the study is sandboxed
        # or if the user is the admin
        show_upload_btn = study_status == 'sandbox' or user_level == 'admin'

        # The request approval, approve study and make public buttons are
        # mutually exclusive. Only one of them will be shown, depending on the
        # current status of the study
        btn_to_show = None
        if (study_status == 'sandbox' and qiita_config.require_approval
                and sample_template_exists and raw_files and prep_templates):
            # The request approval button only appears if the study is
            # sandboxed, the qiita_config specifies that the approval should
            # be requested and the sample template, raw files and prep
            # prep templates have been added to the study
            btn_to_show = 'request_approval'
        elif (user_level == 'admin' and study_status == 'awaiting_approval'
                and qiita_config.require_approval):
            # The approve study button only appears if the user is an admin,
            # the study is waiting approval and the qiita config requires
            # study approval
            btn_to_show = 'approve_study'
        elif study_status == 'private':
            # The make public button only appers if the study is private
            btn_to_show = 'make_public'

        # The revert to sandbox button only appears if the study is not
        # sandboxed or public
        show_revert_btn = study_status not in {'sandbox', 'public'}

        self.render('study_description.html',
                    message=msg,
                    level=msg_level,
                    user=self.current_user,
                    study=study,
                    study_title=study.title,
                    study_alias=study.info['study_alias'],
                    show_edit_btn=show_edit_btn,
                    show_upload_btn=show_upload_btn,
                    show_revert_btn=show_revert_btn,
                    btn_to_show=btn_to_show,
                    show_data_tabs=sample_template_exists,
                    top_tab=top_tab,
                    sub_tab=sub_tab,
                    prep_tab=prep_tab)
예제 #49
0
 def test_eq_false_type(self):
     """Equality returns false if types are not equal"""
     other = Sample(self.sample_id, SampleTemplate(1))
     self.assertFalse(self.tester == other)
예제 #50
0
파일: commands.py 프로젝트: MarkBruns/qiita
def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None):
    """Submit a preprocessed data to EBI

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    action : %s
        The action to perform with this data
    send : bool
        True to actually send the files
    fastq_dir_fp : str, optional
        The fastq filepath

    Notes
    -----
    If fastq_dir_fp is passed, it must not contain any empty files, or
    gzipped empty files
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    preprocessed_data_id_str = str(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    investigation_type = None
    new_investigation_type = None

    status = preprocessed_data.submitted_to_insdc_status()
    if status in ("submitting", "success"):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

    if send:
        # If we intend actually to send the files, then change the status in
        # the database
        preprocessed_data.update_insdc_status("submitting")

    # we need to figure out whether the investigation type is a known one
    # or if we have to submit a "new_investigation_type" to EBI
    current_type = prep_template.investigation_type
    ena_ontology = Ontology(convert_to_id("ENA", "ontology"))
    if current_type in ena_ontology.terms:
        investigation_type = current_type
    elif current_type in ena_ontology.user_defined_terms:
        investigation_type = "Other"
        new_investigation_type = current_type
    else:
        # This should never happen
        raise ValueError(
            "Unrecognized investigation type: '%s'. This term "
            "is neither one of the official terms nor one of the "
            "user-defined terms in the ENA ontology"
        )

    if fastq_dir_fp is not None:
        # If the user specifies a FASTQ directory, use it

        # Set demux_samples to None so that MetadataTemplate.to_file will put
        # all samples in the template files
        demux_samples = None
    else:
        # If the user does not specify a FASTQ directory, create one and
        # re-serialize the per-sample FASTQs from the demux file
        fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir)
        demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == "preprocessed_demux"][0]

        # Keep track of which files were actually in the demux file so that we
        # can write those rows to the prep and samples templates
        demux_samples = set()

        with open_file(demux) as demux_fh:
            for samp, iterator in to_per_sample_ascii(demux_fh, list(sample_template)):
                demux_samples.add(samp)
                sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp)
                wrote_sequences = False
                with gzopen(sample_fp, "w") as fh:
                    for record in iterator:
                        fh.write(record)
                        wrote_sequences = True

                if not wrote_sequences:
                    remove(sample_fp)

    output_dir = fastq_dir_fp + "_submission"

    samp_fp = join(fastq_dir_fp, "sample_metadata.txt")
    prep_fp = join(fastq_dir_fp, "prep_metadata.txt")

    sample_template.to_file(samp_fp, demux_samples)
    prep_template.to_file(prep_fp, demux_samples)

    # Get specific output directory and set filepaths
    get_output_fp = partial(join, output_dir)
    study_fp = get_output_fp("study.xml")
    sample_fp = get_output_fp("sample.xml")
    experiment_fp = get_output_fp("experiment.xml")
    run_fp = get_output_fp("run.xml")
    submission_fp = get_output_fp("submission.xml")

    if not isdir(output_dir):
        makedirs(output_dir)
    else:
        raise IOError("The output folder already exists: %s" % output_dir)

    with open(samp_fp, "U") as st, open(prep_fp, "U") as pt:
        submission = EBISubmission.from_templates_and_per_sample_fastqs(
            preprocessed_data_id_str,
            study.title,
            study.info["study_abstract"],
            investigation_type,
            st,
            pt,
            fastq_dir_fp,
            new_investigation_type=new_investigation_type,
            pmids=study.pmids,
        )

    submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp, submission_fp, action)

    if send:
        submission.send_sequences()
        study_accession, submission_accession = submission.send_xml()

        if study_accession is None or submission_accession is None:
            preprocessed_data.update_insdc_status("failed")

            raise ComputeError("EBI Submission failed!")
        else:
            preprocessed_data.update_insdc_status("success", study_accession, submission_accession)
    else:
        study_accession, submission_accession = None, None

    return study_accession, submission_accession
예제 #51
0
class TestSampleTemplate(TestCase):
    """Tests the SampleTemplate class"""

    def setUp(self):
        metadata_dict = {
            'Sample1': {'physical_location': 'location1',
                        'has_physical_specimen': True,
                        'has_extracted_data': True,
                        'sample_type': 'type1',
                        'required_sample_info_status_id': 1,
                        'collection_timestamp':
                        datetime(2014, 5, 29, 12, 24, 51),
                        'host_subject_id': 'NotIdentified',
                        'Description': 'Test Sample 1',
                        'str_column': 'Value for sample 1'},
            'Sample2': {'physical_location': 'location1',
                        'has_physical_specimen': True,
                        'has_extracted_data': True,
                        'sample_type': 'type1',
                        'required_sample_info_status_id': 1,
                        'collection_timestamp':
                        datetime(2014, 5, 29, 12, 24, 51),
                        'host_subject_id': 'NotIdentified',
                        'Description': 'Test Sample 2',
                        'str_column': 'Value for sample 2'},
            'Sample3': {'physical_location': 'location1',
                        'has_physical_specimen': True,
                        'has_extracted_data': True,
                        'sample_type': 'type1',
                        'required_sample_info_status_id': 1,
                        'collection_timestamp':
                        datetime(2014, 5, 29, 12, 24, 51),
                        'host_subject_id': 'NotIdentified',
                        'Description': 'Test Sample 3',
                        'str_column': 'Value for sample 3'}
            }
        self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')

        self.test_study = Study(1)
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 25,
            "number_samples_promised": 28,
            "portal_type_id": 3,
            "study_alias": "FCM",
            "study_description": "Microbiome of people who eat nothing but "
                                 "fried chicken",
            "study_abstract": "Exploring how a high fat diet changes the "
                              "gut microbiome",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        self.new_study = Study.create(User('*****@*****.**'),
                                      "Fried Chicken Microbiome", [1], info)
        self.tester = SampleTemplate(1)
        self.exp_sample_ids = {'SKB1.640202', 'SKB2.640194', 'SKB3.640195',
                               'SKB4.640189', 'SKB5.640181', 'SKB6.640176',
                               'SKB7.640196', 'SKB8.640193', 'SKB9.640200',
                               'SKD1.640179', 'SKD2.640178', 'SKD3.640198',
                               'SKD4.640185', 'SKD5.640186', 'SKD6.640190',
                               'SKD7.640191', 'SKD8.640184', 'SKD9.640182',
                               'SKM1.640183', 'SKM2.640199', 'SKM3.640197',
                               'SKM4.640180', 'SKM5.640177', 'SKM6.640187',
                               'SKM7.640188', 'SKM8.640201', 'SKM9.640192'}
        self._clean_up_files = []

    def tearDown(self):
        for f in self._clean_up_files:
            remove(f)

    def test_init_unknown_error(self):
        """Init raises an error if the id is not known"""
        with self.assertRaises(QiitaDBUnknownIDError):
            SampleTemplate(2)

    def test_init(self):
        """Init successfully instantiates the object"""
        st = SampleTemplate(1)
        self.assertTrue(st.id, 1)

    def test_table_name(self):
        """Table name return the correct string"""
        obs = SampleTemplate._table_name(self.test_study)
        self.assertEqual(obs, "sample_1")

    def test_create_duplicate(self):
        """Create raises an error when creating a duplicated SampleTemplate"""
        with self.assertRaises(QiitaDBDuplicateError):
            SampleTemplate.create(self.metadata, self.test_study)

    def test_create_duplicate_header(self):
        """Create raises an error when duplicate headers are present"""
        self.metadata['STR_COLUMN'] = pd.Series(['', '', ''],
                                                index=self.metadata.index)
        with self.assertRaises(QiitaDBDuplicateHeaderError):
            SampleTemplate.create(self.metadata, self.new_study)

    def test_create(self):
        """Creates a new SampleTemplate"""
        st = SampleTemplate.create(self.metadata, self.new_study)
        # The returned object has the correct id
        self.assertEqual(st.id, 2)

        # The relevant rows to required_sample_info have been added.
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.required_sample_info WHERE study_id=2")
        # study_id sample_id physical_location has_physical_specimen
        # has_extracted_data sample_type required_sample_info_status_id
        # collection_timestamp host_subject_id description
        exp = [[2, "Sample1", "location1", True, True, "type1", 1,
                datetime(2014, 5, 29, 12, 24, 51), "NotIdentified",
                "Test Sample 1"],
               [2, "Sample2", "location1", True, True, "type1", 1,
                datetime(2014, 5, 29, 12, 24, 51), "NotIdentified",
                "Test Sample 2"],
               [2, "Sample3", "location1", True, True, "type1", 1,
                datetime(2014, 5, 29, 12, 24, 51), "NotIdentified",
                "Test Sample 3"]]
        self.assertEqual(obs, exp)

        # The relevant rows have been added to the study_sample_columns
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_sample_columns WHERE study_id=2")
        # study_id, column_name, column_type
        exp = [[2, "str_column", "varchar"]]
        self.assertEqual(obs, exp)

        # The new table exists
        self.assertTrue(exists_table("sample_2", self.conn_handler))

        # The new table hosts the correct values
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.sample_2")
        # sample_id, str_column
        exp = [['Sample1', "Value for sample 1"],
               ['Sample2', "Value for sample 2"],
               ['Sample3', "Value for sample 3"]]
        self.assertEqual(obs, exp)

    def test_exists_true(self):
        """Exists returns true when the SampleTemplate already exists"""
        self.assertTrue(SampleTemplate.exists(self.test_study))

    def test_exists_false(self):
        """Exists returns false when the SampleTemplate does not exists"""
        self.assertFalse(SampleTemplate.exists(self.new_study))

    def test_get_sample_ids(self):
        """get_sample_ids returns the correct set of sample ids"""
        obs = self.tester._get_sample_ids(self.conn_handler)
        self.assertEqual(obs, self.exp_sample_ids)

    def test_len(self):
        """Len returns the correct number of sample ids"""
        self.assertEqual(len(self.tester), 27)

    def test_getitem(self):
        """Get item returns the correct sample object"""
        obs = self.tester['SKM7.640188']
        exp = Sample('SKM7.640188', self.tester)
        self.assertEqual(obs, exp)

    def test_getitem_error(self):
        """Get item raises an error if key does not exists"""
        with self.assertRaises(KeyError):
            self.tester['Not_a_Sample']

    def test_setitem(self):
        """setitem raises an error (currently not allowed)"""
        with self.assertRaises(QiitaDBNotImplementedError):
            self.tester['SKM7.640188'] = Sample('SKM7.640188', self.tester)

    def test_delitem(self):
        """delitem raises an error (currently not allowed)"""
        with self.assertRaises(QiitaDBNotImplementedError):
            del self.tester['SKM7.640188']

    def test_iter(self):
        """iter returns an iterator over the sample ids"""
        obs = self.tester.__iter__()
        self.assertTrue(isinstance(obs, Iterable))
        self.assertEqual(set(obs), self.exp_sample_ids)

    def test_contains_true(self):
        """contains returns true if the sample id exists"""
        self.assertTrue('SKM7.640188' in self.tester)

    def test_contains_false(self):
        """contains returns false if the sample id does not exists"""
        self.assertFalse('Not_a_Sample' in self.tester)

    def test_keys(self):
        """keys returns an iterator over the sample ids"""
        obs = self.tester.keys()
        self.assertTrue(isinstance(obs, Iterable))
        self.assertEqual(set(obs), self.exp_sample_ids)

    def test_values(self):
        """values returns an iterator over the values"""
        obs = self.tester.values()
        self.assertTrue(isinstance(obs, Iterable))
        exp = {Sample('SKB1.640202', self.tester),
               Sample('SKB2.640194', self.tester),
               Sample('SKB3.640195', self.tester),
               Sample('SKB4.640189', self.tester),
               Sample('SKB5.640181', self.tester),
               Sample('SKB6.640176', self.tester),
               Sample('SKB7.640196', self.tester),
               Sample('SKB8.640193', self.tester),
               Sample('SKB9.640200', self.tester),
               Sample('SKD1.640179', self.tester),
               Sample('SKD2.640178', self.tester),
               Sample('SKD3.640198', self.tester),
               Sample('SKD4.640185', self.tester),
               Sample('SKD5.640186', self.tester),
               Sample('SKD6.640190', self.tester),
               Sample('SKD7.640191', self.tester),
               Sample('SKD8.640184', self.tester),
               Sample('SKD9.640182', self.tester),
               Sample('SKM1.640183', self.tester),
               Sample('SKM2.640199', self.tester),
               Sample('SKM3.640197', self.tester),
               Sample('SKM4.640180', self.tester),
               Sample('SKM5.640177', self.tester),
               Sample('SKM6.640187', self.tester),
               Sample('SKM7.640188', self.tester),
               Sample('SKM8.640201', self.tester),
               Sample('SKM9.640192', self.tester)}
        # Creating a list and looping over it since unittest does not call
        # the __eq__ function on the objects
        for o, e in zip(sorted(list(obs), key=lambda x: x.id),
                        sorted(exp, key=lambda x: x.id)):
            self.assertEqual(o, e)

    def test_items(self):
        """items returns an iterator over the (key, value) tuples"""
        obs = self.tester.items()
        self.assertTrue(isinstance(obs, Iterable))
        exp = [('SKB1.640202', Sample('SKB1.640202', self.tester)),
               ('SKB2.640194', Sample('SKB2.640194', self.tester)),
               ('SKB3.640195', Sample('SKB3.640195', self.tester)),
               ('SKB4.640189', Sample('SKB4.640189', self.tester)),
               ('SKB5.640181', Sample('SKB5.640181', self.tester)),
               ('SKB6.640176', Sample('SKB6.640176', self.tester)),
               ('SKB7.640196', Sample('SKB7.640196', self.tester)),
               ('SKB8.640193', Sample('SKB8.640193', self.tester)),
               ('SKB9.640200', Sample('SKB9.640200', self.tester)),
               ('SKD1.640179', Sample('SKD1.640179', self.tester)),
               ('SKD2.640178', Sample('SKD2.640178', self.tester)),
               ('SKD3.640198', Sample('SKD3.640198', self.tester)),
               ('SKD4.640185', Sample('SKD4.640185', self.tester)),
               ('SKD5.640186', Sample('SKD5.640186', self.tester)),
               ('SKD6.640190', Sample('SKD6.640190', self.tester)),
               ('SKD7.640191', Sample('SKD7.640191', self.tester)),
               ('SKD8.640184', Sample('SKD8.640184', self.tester)),
               ('SKD9.640182', Sample('SKD9.640182', self.tester)),
               ('SKM1.640183', Sample('SKM1.640183', self.tester)),
               ('SKM2.640199', Sample('SKM2.640199', self.tester)),
               ('SKM3.640197', Sample('SKM3.640197', self.tester)),
               ('SKM4.640180', Sample('SKM4.640180', self.tester)),
               ('SKM5.640177', Sample('SKM5.640177', self.tester)),
               ('SKM6.640187', Sample('SKM6.640187', self.tester)),
               ('SKM7.640188', Sample('SKM7.640188', self.tester)),
               ('SKM8.640201', Sample('SKM8.640201', self.tester)),
               ('SKM9.640192', Sample('SKM9.640192', self.tester))]
        # Creating a list and looping over it since unittest does not call
        # the __eq__ function on the objects
        for o, e in zip(sorted(list(obs)), sorted(exp)):
            self.assertEqual(o, e)

    def test_get(self):
        """get returns the correct sample object"""
        obs = self.tester.get('SKM7.640188')
        exp = Sample('SKM7.640188', self.tester)
        self.assertEqual(obs, exp)

    def test_get_none(self):
        """get returns none if the sample id is not present"""
        self.assertTrue(self.tester.get('Not_a_Sample') is None)

    def test_to_file(self):
        """to file writes a tab delimited file with all the metadata"""
        fd, fp = mkstemp()
        close(fd)
        st = SampleTemplate.create(self.metadata, self.new_study)
        st.to_file(fp)
        self._clean_up_files.append(fp)
        with open(fp, 'U') as f:
            obs = f.read()
        self.assertEqual(obs, EXP_SAMPLE_TEMPLATE)
예제 #52
0
    def post(self):
        user = self.current_user
        action = self.get_argument("action")
        # set required template variables
        results = {}
        meta_headers = []
        counts = {}
        fullcounts = {}
        query = ""
        searchmsg = ""
        selsamples = {}
        selproc_data = {}
        # get analysis and selected samples if exists, or create if necessary
        if action == "create":
            name = self.get_argument('name')
            description = self.get_argument('description')
            analysis = Analysis.create(user, name, description)
            analysis_id = analysis.id
            # set to second step since this page is second step in workflow
            analysis.step = SELECT_SAMPLES
            # fill example studies by running query for specific studies
            search = QiitaStudySearch()
            def_query = 'study_id = 1 OR study_id = 2 OR study_id = 3'
            results, meta_headers = search(def_query, user)
            results, counts, fullcounts = self._parse_search_results(
                results, selsamples, meta_headers)
        else:
            analysis_id = int(self.get_argument("analysis-id"))
            analysis = Analysis(analysis_id)
            check_analysis_access(user, analysis)
            selproc_data, selsamples = self._selected_parser(analysis)

        # run through action requested
        if action == "search":
            search = QiitaStudySearch()
            query = str(self.get_argument("query"))
            try:
                results, meta_headers = search(query, user)
            except ParseException:
                searchmsg = "Malformed search query, please read search help."
            except QiitaDBIncompatibleDatatypeError as e:
                searchmsg = ''.join(e)

            if not results and not searchmsg:
                searchmsg = "No results found."
            else:
                results, counts, fullcounts = self._parse_search_results(
                    results, selsamples, meta_headers)

        elif action == "select":
            analysis.add_samples(self._parse_form_select())

            # rebuild the selected from database to reflect changes
            selproc_data, selsamples = self._selected_parser(analysis)

        elif action == "deselect":
            proc_data, samples = self._parse_form_deselect()
            if proc_data:
                analysis.remove_samples(proc_data=proc_data)
            if samples:
                analysis.remove_samples(samples=samples)
            if not proc_data and not samples:
                searchmsg = "Must select samples to remove from analysis!"

            # rebuild the selected from database to reflect changes
            selproc_data, selsamples = self._selected_parser(analysis)

        self.render('search_studies.html', user=user, aid=analysis_id,
                    results=results, meta_headers=meta_headers,
                    selsamples=selsamples, selproc_data=selproc_data,
                    counts=counts, fullcounts=fullcounts, searchmsg=searchmsg,
                    query=query, availmeta=SampleTemplate.metadata_headers() +
                    get_table_cols("study"))
예제 #53
0
    def setUp(self):
        metadata_dict = {
            'Sample1': {'physical_location': 'location1',
                        'has_physical_specimen': True,
                        'has_extracted_data': True,
                        'sample_type': 'type1',
                        'required_sample_info_status_id': 1,
                        'collection_timestamp':
                        datetime(2014, 5, 29, 12, 24, 51),
                        'host_subject_id': 'NotIdentified',
                        'Description': 'Test Sample 1',
                        'str_column': 'Value for sample 1'},
            'Sample2': {'physical_location': 'location1',
                        'has_physical_specimen': True,
                        'has_extracted_data': True,
                        'sample_type': 'type1',
                        'required_sample_info_status_id': 1,
                        'collection_timestamp':
                        datetime(2014, 5, 29, 12, 24, 51),
                        'host_subject_id': 'NotIdentified',
                        'Description': 'Test Sample 2',
                        'str_column': 'Value for sample 2'},
            'Sample3': {'physical_location': 'location1',
                        'has_physical_specimen': True,
                        'has_extracted_data': True,
                        'sample_type': 'type1',
                        'required_sample_info_status_id': 1,
                        'collection_timestamp':
                        datetime(2014, 5, 29, 12, 24, 51),
                        'host_subject_id': 'NotIdentified',
                        'Description': 'Test Sample 3',
                        'str_column': 'Value for sample 3'}
            }
        self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')

        self.test_study = Study(1)
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 25,
            "number_samples_promised": 28,
            "portal_type_id": 3,
            "study_alias": "FCM",
            "study_description": "Microbiome of people who eat nothing but "
                                 "fried chicken",
            "study_abstract": "Exploring how a high fat diet changes the "
                              "gut microbiome",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        self.new_study = Study.create(User('*****@*****.**'),
                                      "Fried Chicken Microbiome", [1], info)
        self.tester = SampleTemplate(1)
        self.exp_sample_ids = {'SKB1.640202', 'SKB2.640194', 'SKB3.640195',
                               'SKB4.640189', 'SKB5.640181', 'SKB6.640176',
                               'SKB7.640196', 'SKB8.640193', 'SKB9.640200',
                               'SKD1.640179', 'SKD2.640178', 'SKD3.640198',
                               'SKD4.640185', 'SKD5.640186', 'SKD6.640190',
                               'SKD7.640191', 'SKD8.640184', 'SKD9.640182',
                               'SKM1.640183', 'SKM2.640199', 'SKM3.640197',
                               'SKM4.640180', 'SKM5.640177', 'SKM6.640187',
                               'SKM7.640188', 'SKM8.640201', 'SKM9.640192'}
        self._clean_up_files = []
예제 #54
0
    def display_template(self, study, user, msg, msg_level, top_tab=None,
                         sub_tab=None, prep_tab=None):
        """Simple function to avoid duplication of code"""
        # getting the RawData and its prep templates
        available_raw_data = yield Task(self.get_raw_data, study.raw_data())
        available_prep_templates = yield Task(self.get_prep_templates,
                                              available_raw_data)

        # set variable holding if we have files attached to all raw data or not
        raw_files = True if available_raw_data else False
        for r in available_raw_data:
            if not r.get_filepaths():
                raw_files = False

        # set variable holding if we have all prep templates or not
        if available_prep_templates:
            def _test(item):
                return not item
            prep_templates = all(
                [_test(val) for val in viewvalues(available_prep_templates)])
        else:
            prep_templates = False

        study_status = study.status
        user_level = user.level
        sample_template_exists = SampleTemplate.exists(study.id)

        # The general information of the study can be changed if the study is
        # not public or if the user is an admin, in which case they can always
        # modify the information of the study
        show_edit_btn = study_status != 'public' or user_level == 'admin'

        # Files can be added to a study only if the study is sandboxed
        # or if the user is the admin
        show_upload_btn = study_status == 'sandbox' or user_level == 'admin'

        # The request approval, approve study and make public buttons are
        # mutually exclusive. Only one of them will be shown, depending on the
        # current status of the study
        btn_to_show = None
        if (study_status == 'sandbox' and
                qiita_config.require_approval and sample_template_exists and
                raw_files and prep_templates):
            # The request approval button only appears if the study is
            # sandboxed, the qiita_config specifies that the approval should
            # be requested and the sample template, raw files and prep
            # prep templates have been added to the study
            btn_to_show = 'request_approval'
        elif (user_level == 'admin' and
                study_status == 'awaiting_approval' and
                qiita_config.require_approval):
            # The approve study button only appears if the user is an admin,
            # the study is waiting approval and the qiita config requires
            # study approval
            btn_to_show = 'approve_study'
        elif study_status == 'private':
            # The make public button only appers if the study is private
            btn_to_show = 'make_public'

        # The revert to sandbox button only appears if the study is not
        # sandboxed or public
        show_revert_btn = study_status not in {'sandbox', 'public'}

        self.render('study_description.html',
                    message=msg,
                    level=msg_level,
                    study=study,
                    study_title=study.title,
                    study_alias=study.info['study_alias'],
                    show_edit_btn=show_edit_btn,
                    show_upload_btn=show_upload_btn,
                    show_revert_btn=show_revert_btn,
                    btn_to_show=btn_to_show,
                    show_data_tabs=sample_template_exists,
                    top_tab=top_tab,
                    sub_tab=sub_tab,
                    prep_tab=prep_tab)
예제 #55
0
 def test_table_name(self):
     """Table name return the correct string"""
     obs = SampleTemplate._table_name(self.test_study)
     self.assertEqual(obs, "sample_1")
예제 #56
0
 def test_stats_from_df(self):
     obs = stats_from_df(dataframe_from_template(SampleTemplate(1)))
     for k in obs:
         self.assertEqual(obs[k], SUMMARY_STATS[k])
예제 #57
0
 def test_create_duplicate(self):
     """Create raises an error when creating a duplicated SampleTemplate"""
     with self.assertRaises(QiitaDBDuplicateError):
         SampleTemplate.create(self.metadata, self.test_study)
예제 #58
0
    def test_retrieve_dropped_samples(self):
        # Create and populate second study to do test with
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 25,
            "number_samples_promised": 28,
            "portal_type_id": 3,
            "study_alias": "FCM",
            "study_description": "Microbiome of people who eat nothing but "
                                 "fried chicken",
            "study_abstract": "Exploring how a high fat diet changes the "
                              "gut microbiome",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        metadata_dict = {
            'SKB8.640193': {'physical_location': 'location1',
                            'has_physical_specimen': True,
                            'has_extracted_data': True,
                            'sample_type': 'type1',
                            'required_sample_info_status': 'received',
                            'collection_timestamp':
                            datetime(2014, 5, 29, 12, 24, 51),
                            'host_subject_id': 'NotIdentified',
                            'Description': 'Test Sample 1',
                            'str_column': 'Value for sample 1',
                            'latitude': 42.42,
                            'longitude': 41.41},
            'SKD8.640184': {'physical_location': 'location1',
                            'has_physical_specimen': True,
                            'has_extracted_data': True,
                            'sample_type': 'type1',
                            'required_sample_info_status': 'received',
                            'collection_timestamp':
                            datetime(2014, 5, 29, 12, 24, 51),
                            'host_subject_id': 'NotIdentified',
                            'Description': 'Test Sample 2',
                            'str_column': 'Value for sample 2',
                            'latitude': 4.2,
                            'longitude': 1.1},
            'SKB7.640196': {'physical_location': 'location1',
                            'has_physical_specimen': True,
                            'has_extracted_data': True,
                            'sample_type': 'type1',
                            'required_sample_info_status': 'received',
                            'collection_timestamp':
                            datetime(2014, 5, 29, 12, 24, 51),
                            'host_subject_id': 'NotIdentified',
                            'Description': 'Test Sample 3',
                            'str_column': 'Value for sample 3',
                            'latitude': 4.8,
                            'longitude': 4.41},
            }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')

        Study.create(User("*****@*****.**"), "Test study 2", [1], info)

        SampleTemplate.create(metadata, Study(2))

        mp = get_mountpoint("processed_data")[0][1]
        study_fp = join(mp, "2_study_1001_closed_reference_otu_table.biom")
        ProcessedData.create("processed_params_uclust", 1, [(study_fp, 6)],
                             study=Study(2), data_type="16S")
        self.conn_handler.execute(
            "INSERT INTO qiita.analysis_sample (analysis_id, "
            "processed_data_id, sample_id) VALUES "
            "(1,2,'2.SKB8.640193'), (1,2,'2.SKD8.640184'), "
            "(1,2,'2.SKB7.640196')")

        samples = {1: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'],
                   2: ['2.SKB8.640193', '2.SKD8.640184']}
        self.analysis._build_biom_tables(samples, 10000,
                                         conn_handler=self.conn_handler)
        exp = {1: {'1.SKM4.640180', '1.SKM9.640192'},
               2: {'2.SKB7.640196'}}
        self.assertEqual(self.analysis.dropped_samples, exp)
예제 #59
0
 def test_init_unknown_error(self):
     """Init raises an error if the id is not known"""
     with self.assertRaises(QiitaDBUnknownIDError):
         SampleTemplate(2)