Esempio n. 1
0
def submit_VAMPS(preprocessed_data_id):
    """Submit preprocessed data to VAMPS

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    status = preprocessed_data.submitted_to_vamps_status()
    if status in ('submitting', 'success'):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

        preprocessed_data.update_vamps_status('submitting')

    # Generating a tgz
    targz_folder = mkdtemp(prefix=qiita_config.working_dir)
    targz_fp = join(
        targz_folder,
        '%d_%d_%d.tgz' % (study.id, prep_template.id, preprocessed_data.id))
    targz = taropen(targz_fp, mode='w:gz')

    # adding sample/prep
    samp_fp = join(targz_folder, 'sample_metadata.txt')
    sample_template.to_file(samp_fp)
    targz.add(samp_fp, arcname='sample_metadata.txt')
    prep_fp = join(targz_folder, 'prep_metadata.txt')
    prep_template.to_file(prep_fp)
    targz.add(prep_fp, arcname='prep_metadata.txt')

    # adding preprocessed data
    for _, fp, fp_type in preprocessed_data.get_filepaths():
        if fp_type == 'preprocessed_fasta':
            targz.add(fp, arcname='preprocessed_fasta.fna')

    targz.close()

    # submitting
    cmd = ("curl -F user=%s -F pass='******' -F uploadFile=@%s -F "
           "press=UploadFile %s" %
           (qiita_config.vamps_user, qiita_config.vamps_pass, targz_fp,
            qiita_config.vamps_url))
    obs, _, _ = system_call(cmd)

    exp = ("<html>\n<head>\n<title>Process Uploaded File</title>\n</head>\n"
           "<body>\n</body>\n</html>")

    if obs != exp:
        preprocessed_data.update_vamps_status('failure')
        return False
    else:
        preprocessed_data.update_vamps_status('success')
        return True
Esempio n. 2
0
    def update_sample_template(self, study, user, callback):
        """Update a sample template from the POST method

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done

        Raises
        ------
        HTTPError
            If the sample template file does not exists
        """
        # If we are on this function, the argument "sample_template" must
        # defined. If not, let tornado raise its error
        sample_template = self.get_argument('sample_template')

        # Define here the message and message level in case of success
        msg = "The sample template '%s' has been updated" % sample_template
        msg_level = "success"
        # Get the uploads folder
        _, base_fp = get_mountpoint("uploads")[0]
        # Get the path of the sample template in the uploads folder
        fp_rsp = join(base_fp, str(study.id), sample_template)

        if not exists(fp_rsp):
            # The file does not exist, fail nicely
            raise HTTPError(400, "This file doesn't exist: %s" % fp_rsp)
        try:
            with warnings.catch_warnings(record=True) as warns:
                # deleting previous uploads and inserting new one
                st = SampleTemplate(study.id)
                st.update(load_template_to_dataframe(fp_rsp))

                # join all the warning messages into one. Note that this info
                # will be ignored if an exception is raised
                if warns:
                    msg = '; '.join([str(w.message) for w in warns])
                    msg_level = 'warning'

        except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                QiitaDBDuplicateError, IOError, ValueError, KeyError,
                CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e:
            # Some error occurred while processing the sample template
            # Show the error to the user so they can fix the template
            msg = html_error_message % ('updating the sample template:',
                                        basename(fp_rsp), str(e))
            msg_level = "danger"
        callback((msg, msg_level, None, None, None))
Esempio n. 3
0
 def setUp(self):
     self.sample_template = SampleTemplate(1)
     self.sample_id = 'SKB8.640193'
     self.tester = Sample(self.sample_id, self.sample_template)
     self.exp_categories = {
         'physical_location', 'has_physical_specimen', 'has_extracted_data',
         'sample_type', 'required_sample_info_status_id',
         'collection_timestamp', 'host_subject_id', 'description',
         'season_environment', 'assigned_from_geo', 'texture', 'taxon_id',
         'depth', 'host_taxid', 'common_name', 'water_content_soil',
         'elevation', 'temp', 'tot_nitro', 'samp_salinity', 'altitude',
         'env_biome', 'country', 'ph', 'anonymized_name', 'tot_org_carb',
         'description_duplicate', 'env_feature', 'latitude', 'longitude'
     }
Esempio n. 4
0
    def test_metadata_map_from_sample_and_prep_templates(self):
        obs = metadata_map_from_sample_and_prep_templates(
            SampleTemplate(1), PrepTemplate(1))

        # We don't test the specific values as this would blow up the size
        # of this file as the amount of lines would go to ~1000

        # 27 samples
        self.assertEqual(len(obs), 27)
        self.assertTrue(
            all(obs.index == pd.Index([
                u'SKB1.640202', u'SKB2.640194', u'SKB3.640195', u'SKB4.640189',
                u'SKB5.640181', u'SKB6.640176', u'SKB7.640196', u'SKB8.640193',
                u'SKB9.640200', u'SKD1.640179', u'SKD2.640178', u'SKD3.640198',
                u'SKD4.640185', u'SKD5.640186', u'SKD6.640190', u'SKD7.640191',
                u'SKD8.640184', u'SKD9.640182', u'SKM1.640183', u'SKM2.640199',
                u'SKM3.640197', u'SKM4.640180', u'SKM5.640177', u'SKM6.640187',
                u'SKM7.640188', u'SKM8.640201', u'SKM9.640192'
            ],
                                      dtype='object')))

        self.assertTrue(
            all(obs.columns == pd.Index([
                u'tot_org_carb', u'common_name', u'has_extracted_data',
                u'water_content_soil', u'env_feature', u'assigned_from_geo',
                u'altitude', u'env_biome', u'texture',
                u'has_physical_specimen', u'description_duplicate',
                u'physical_location', u'latitude', u'ph', u'host_taxid',
                u'elevation', u'description', u'collection_timestamp',
                u'taxon_id', u'samp_salinity', u'host_subject_id',
                u'sample_type', u'season_environment',
                u'required_sample_info_status_id', u'temp', u'country',
                u'longitude', u'tot_nitro', u'depth', u'anonymized_name',
                u'experiment_center', u'center_name', u'run_center',
                u'run_prefix', u'data_type_id', u'target_gene',
                u'sequencing_meth', u'run_date', u'pcr_primers',
                u'linkerprimersequence', u'platform',
                u'library_construction_protocol',
                u'experiment_design_description', u'study_center',
                u'center_project_name', u'sample_center', u'samp_size',
                u'illumina_technology', u'experiment_title', u'emp_status_id',
                u'target_subfragment', u'barcodesequence'
            ],
                                        dtype='object')))
Esempio n. 5
0
    def render(self, study):
        study_info = study.info
        abstract = study_info['study_abstract']
        description = study_info['study_description']
        pmids = ", ".join([pubmed_linkifier([pmid]) for pmid in study.pmids])
        princ_inv = StudyPerson(study_info['principal_investigator_id'])
        pi_link = study_person_linkifier((princ_inv.email, princ_inv.name))
        number_samples_promised = study_info['number_samples_promised']
        number_samples_collected = study_info['number_samples_collected']
        metadata_complete = study_info['metadata_complete']

        # Retrieve the files from the uploads folder, so the user can choose
        # the sample template of the study
        files = [f for _, f in get_files_from_uploads_folders(str(study.id))]

        # If the sample template exists, retrieve all its filepaths
        if SampleTemplate.exists(study.id):
            sample_templates = SampleTemplate(study.id).get_filepaths()
        else:
            # If the sample template does not exist, just pass an empty list
            sample_templates = []

        # Check if the request came from a local source
        is_local_request = self._is_local()

        # The user can choose the sample template only if the study is
        # sandboxed or the current user is an admin
        show_select_sample = (
            study.status == 'sandbox' or self.current_user.level == 'admin')

        return self.render_string(
            "study_description_templates/study_information_tab.html",
            abstract=abstract,
            description=description,
            pmids=pmids,
            principal_investigator=pi_link,
            number_samples_promised=number_samples_promised,
            number_samples_collected=number_samples_collected,
            metadata_complete=metadata_complete,
            show_select_sample=show_select_sample,
            files=files,
            study_id=study.id,
            sample_templates=sample_templates,
            is_local_request=is_local_request)
Esempio n. 6
0
    def display_template(self, preprocessed_data_id, msg, msg_level):
        """Simple function to avoid duplication of code"""
        preprocessed_data_id = int(preprocessed_data_id)
        try:
            preprocessed_data = PreprocessedData(preprocessed_data_id)
        except QiitaDBUnknownIDError:
            raise HTTPError(404, "PreprocessedData %d does not exist!" %
                                 preprocessed_data_id)
        else:
            user = self.current_user
            if user.level != 'admin':
                raise HTTPError(403, "No permissions of admin, "
                                     "get/EBISubmitHandler: %s!" % user.id)

        prep_template = PrepTemplate(preprocessed_data.prep_template)
        sample_template = SampleTemplate(preprocessed_data.study)
        study = Study(preprocessed_data.study)
        stats = [('Number of samples', len(prep_template)),
                 ('Number of metadata headers',
                  len(sample_template.metadata_headers()))]

        demux = [path for _, path, ftype in preprocessed_data.get_filepaths()
                 if ftype == 'preprocessed_demux']
        demux_length = len(demux)

        if not demux_length:
            msg = ("Study does not appear to have demultiplexed "
                   "sequences associated")
            msg_level = 'danger'
        elif demux_length > 1:
            msg = ("Study appears to have multiple demultiplexed files!")
            msg_level = 'danger'
        elif demux_length == 1:
            demux_file = demux[0]
            demux_file_stats = demux_stats(demux_file)
            stats.append(('Number of sequences', demux_file_stats.n))
            msg_level = 'success'

        self.render('ebi_submission.html',
                    study_title=study.title, stats=stats, message=msg,
                    study_id=study.id, level=msg_level,
                    preprocessed_data_id=preprocessed_data_id,
                    investigation_type=prep_template.investigation_type)
Esempio n. 7
0
 def test_metadata_stats_from_sample_and_prep_templates(self):
     obs = metadata_stats_from_sample_and_prep_templates(
         SampleTemplate(1), PrepTemplate(1))
     for k in obs:
         self.assertEqual(obs[k], SUMMARY_STATS[k])
Esempio n. 8
0
 def test_exists(self):
     """exists should raise an error if called from the base class"""
     with self.assertRaises(IncompetentQiitaDeveloperError):
         BaseSample.exists('SKM7.640188', SampleTemplate(1))
Esempio n. 9
0
 def test_init(self):
     """BaseSample init should raise an error (it's a base class)"""
     with self.assertRaises(IncompetentQiitaDeveloperError):
         BaseSample('SKM7.640188', SampleTemplate(1))
Esempio n. 10
0
from os.path import join
from time import strftime

from qiita_db.util import get_mountpoint
from qiita_db.sql_connection import SQLConnectionHandler
from qiita_db.metadata_template import SampleTemplate, PrepTemplate

conn_handler = SQLConnectionHandler()

_id, fp_base = get_mountpoint('templates')[0]

for study_id in conn_handler.execute_fetchall(
        "SELECT study_id FROM qiita.study"):
    study_id = study_id[0]
    if SampleTemplate.exists(study_id):
        st = SampleTemplate(study_id)
        fp = join(fp_base, '%d_%s.txt' % (study_id, strftime("%Y%m%d-%H%M%S")))
        st.to_file(fp)
        st.add_filepath(fp)

for prep_template_id in conn_handler.execute_fetchall(
        "SELECT prep_template_id FROM qiita.prep_template"):
    prep_template_id = prep_template_id[0]
    pt = PrepTemplate(prep_template_id)
    study_id = pt.study_id

    fp = join(
        fp_base, '%d_prep_%d_%s.txt' %
        (pt.study_id, prep_template_id, strftime("%Y%m%d-%H%M%S")))
    pt.to_file(fp)
    pt.add_filepath(fp)
Esempio n. 11
0
 def test_init_unknown_error(self):
     """Init raises an error if the id is not known"""
     with self.assertRaises(QiitaDBUnknownIDError):
         SampleTemplate(2)
Esempio n. 12
0
    def setUp(self):
        metadata_dict = {
            'Sample1': {
                'physical_location': 'location1',
                'has_physical_specimen': True,
                'has_extracted_data': True,
                'sample_type': 'type1',
                'required_sample_info_status_id': 1,
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 1',
                'str_column': 'Value for sample 1',
                'latitude': 42.42,
                'longitude': 41.41
            },
            'Sample2': {
                'physical_location': 'location1',
                'has_physical_specimen': True,
                'has_extracted_data': True,
                'sample_type': 'type1',
                'required_sample_info_status_id': 1,
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 2',
                'str_column': 'Value for sample 2',
                'latitude': 4.2,
                'longitude': 1.1
            },
            'Sample3': {
                'physical_location': 'location1',
                'has_physical_specimen': True,
                'has_extracted_data': True,
                'sample_type': 'type1',
                'required_sample_info_status_id': 1,
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 3',
                'str_column': 'Value for sample 3',
                'latitude': 4.8,
                'longitude': 4.41
            },
        }
        self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')

        self.test_study = Study(1)
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 25,
            "number_samples_promised": 28,
            "portal_type_id": 3,
            "study_alias": "FCM",
            "study_description": "Microbiome of people who eat nothing but "
            "fried chicken",
            "study_abstract": "Exploring how a high fat diet changes the "
            "gut microbiome",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        self.new_study = Study.create(User('*****@*****.**'),
                                      "Fried Chicken Microbiome", [1], info)
        self.tester = SampleTemplate(1)
        self.exp_sample_ids = {
            'SKB1.640202', 'SKB2.640194', 'SKB3.640195', 'SKB4.640189',
            'SKB5.640181', 'SKB6.640176', 'SKB7.640196', 'SKB8.640193',
            'SKB9.640200', 'SKD1.640179', 'SKD2.640178', 'SKD3.640198',
            'SKD4.640185', 'SKD5.640186', 'SKD6.640190', 'SKD7.640191',
            'SKD8.640184', 'SKD9.640182', 'SKM1.640183', 'SKM2.640199',
            'SKM3.640197', 'SKM4.640180', 'SKM5.640177', 'SKM6.640187',
            'SKM7.640188', 'SKM8.640201', 'SKM9.640192'
        }
        self._clean_up_files = []
Esempio n. 13
0
 def test_eq_false_type(self):
     """Equality returns false if types are not equal"""
     other = Sample(self.sample_id, SampleTemplate(1))
     self.assertFalse(self.tester == other)
Esempio n. 14
0
 def test_init_wrong_template(self):
     """Raises an error if using a SampleTemplate instead of PrepTemplate"""
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PrepSample('SKB8.640193', SampleTemplate(1))
Esempio n. 15
0
 def _extend_sample_template(self, st_id, fp_rpt):
     SampleTemplate(st_id).extend(load_template_to_dataframe(fp_rpt))
Esempio n. 16
0
 def test_stats_from_df(self):
     obs = stats_from_df(dataframe_from_template(SampleTemplate(1)))
     for k in obs:
         self.assertEqual(obs[k], SUMMARY_STATS[k])
Esempio n. 17
0
def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None):
    """Submit a preprocessed data to EBI

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    action : %s
        The action to perform with this data
    send : bool
        True to actually send the files
    fastq_dir_fp : str, optional
        The fastq filepath
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    preprocessed_data_id_str = str(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    investigation_type = None
    new_investigation_type = None

    status = preprocessed_data.submitted_to_insdc_status()
    if status in ('submitting', 'success'):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

    if send:
        # If we intend actually to send the files, then change the status in
        # the database
        preprocessed_data.update_insdc_status('submitting')

    # we need to figure out whether the investigation type is a known one
    # or if we have to submit a "new_investigation_type" to EBI
    current_type = prep_template.investigation_type
    ena_ontology = Ontology(convert_to_id('ENA', 'ontology'))
    if current_type in ena_ontology.terms:
        investigation_type = current_type
    elif current_type in ena_ontology.user_defined_terms:
        investigation_type = 'Other'
        new_investigation_type = current_type
    else:
        # This should never happen
        raise ValueError("Unrecognized investigation type: '%s'. This term "
                         "is neither one of the official terms nor one of the "
                         "user-defined terms in the ENA ontology")

    if fastq_dir_fp is not None:
        # If the user specifies a FASTQ directory, use it

        # Set demux_samples to None so that MetadataTemplate.to_file will put
        # all samples in the template files
        demux_samples = None
    else:
        # If the user does not specify a FASTQ directory, create one and
        # re-serialize the per-sample FASTQs from the demux file
        fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir)
        demux = [
            path for _, path, ftype in preprocessed_data.get_filepaths()
            if ftype == 'preprocessed_demux'
        ][0]

        # Keep track of which files were actually in the demux file so that we
        # can write those rows to the prep and samples templates
        demux_samples = set()

        with open_file(demux) as demux_fh:
            for samp, iterator in to_per_sample_ascii(demux_fh,
                                                      list(sample_template)):
                demux_samples.add(samp)
                sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp)
                with gzopen(sample_fp, 'w') as fh:
                    for record in iterator:
                        fh.write(record)

    output_dir = fastq_dir_fp + '_submission'

    samp_fp = join(fastq_dir_fp, 'sample_metadata.txt')
    prep_fp = join(fastq_dir_fp, 'prep_metadata.txt')

    sample_template.to_file(samp_fp, demux_samples)
    prep_template.to_file(prep_fp, demux_samples)

    # Get specific output directory and set filepaths
    get_output_fp = partial(join, output_dir)
    study_fp = get_output_fp('study.xml')
    sample_fp = get_output_fp('sample.xml')
    experiment_fp = get_output_fp('experiment.xml')
    run_fp = get_output_fp('run.xml')
    submission_fp = get_output_fp('submission.xml')

    if not isdir(output_dir):
        makedirs(output_dir)
    else:
        raise IOError('The output folder already exists: %s' % output_dir)

    with open(samp_fp, 'U') as st, open(prep_fp, 'U') as pt:
        submission = EBISubmission.from_templates_and_per_sample_fastqs(
            preprocessed_data_id_str,
            study.title,
            study.info['study_abstract'],
            investigation_type,
            st,
            pt,
            fastq_dir_fp,
            new_investigation_type=new_investigation_type,
            pmids=study.pmids)

    submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp,
                                   submission_fp, action)

    if send:
        submission.send_sequences()
        study_accession, submission_accession = submission.send_xml()

        if study_accession is None or submission_accession is None:
            preprocessed_data.update_insdc_status('failed')

            raise ComputeError("EBI Submission failed!")
        else:
            preprocessed_data.update_insdc_status('success', study_accession,
                                                  submission_accession)
    else:
        study_accession, submission_accession = None, None

    return study_accession, submission_accession
Esempio n. 18
0
 def test_init(self):
     """Init successfully instantiates the object"""
     st = SampleTemplate(1)
     self.assertTrue(st.id, 1)