Beispiel #1
0
    def delete_prep_template(self, study, user, callback):
        """Delete the selected prep template

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        prep_template_id = int(self.get_argument('prep_template_id'))
        prep_id = PrepTemplate(prep_template_id).raw_data

        try:
            PrepTemplate.delete(prep_template_id)
            msg = ("Prep template %d has been deleted" % prep_template_id)
            msg_level = "success"
            prep_id = None
        except Exception as e:
            msg = ("Couldn't remove prep template: %s" % str(e))
            msg_level = "danger"

        callback((msg, msg_level, 'raw_data_tab', prep_id, None))
Beispiel #2
0
    def setUp(self):
        fd, self.seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        fd, self.barcodes_fp = mkstemp(suffix='_barcodes.fastq')
        close(fd)
        self.filetype = 2
        self.filepaths = [(self.seqs_fp, 1), (self.barcodes_fp, 2)]
        _, self.db_test_raw_dir = get_mountpoint('raw_data')[0]

        with open(self.seqs_fp, "w") as f:
            f.write("\n")
        with open(self.barcodes_fp, "w") as f:
            f.write("\n")
        self._clean_up_files = []

        # Create some new PrepTemplates
        metadata_dict = {
            'SKB8.640193': {'center_name': 'ANL',
                            'primer': 'GTGCCAGCMGCCGCGGTAA',
                            'barcode': 'GTCCGCAAGTTA',
                            'run_prefix': "s_G1_L001_sequences",
                            'platform': 'ILLUMINA',
                            'library_construction_protocol': 'AAAA',
                            'experiment_design_description': 'BBBB'}}
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
        self.pt1 = PrepTemplate.create(metadata, Study(1), "16S")
        self.pt2 = PrepTemplate.create(metadata, Study(1), "18S")
        self.prep_templates = [self.pt1, self.pt2]
Beispiel #3
0
    def delete_prep_template(self, study, user, callback):
        """Delete the selected prep template

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        prep_template_id = int(self.get_argument('prep_template_id'))
        prep_id = PrepTemplate(prep_template_id).raw_data

        try:
            PrepTemplate.delete(prep_template_id)
            msg = ("Prep template %d has been deleted" % prep_template_id)
            msg_level = "success"
            prep_id = None
        except Exception as e:
            msg = ("Couldn't remove prep template: %s" % str(e))
            msg_level = "danger"

        callback((msg, msg_level, 'prep_template_tab', prep_id, None))
    def test_get_qiime_minimal_mapping_single_reverse_primer(self):
        conn_handler = SQLConnectionHandler()
        conn_handler
        sql = """INSERT INTO qiita.prep_columns
                        (prep_template_id, column_name, column_type)
                    VALUES (1, 'reverselinkerprimer', 'varchar');
                 ALTER TABLE qiita.prep_1
                    ADD COLUMN reverselinkerprimer varchar;
                 DELETE FROM qiita.prep_columns
                 WHERE prep_template_id = 1 AND column_name = 'run_prefix';
                 ALTER TABLE qiita.prep_1 DROP COLUMN run_prefix;
                 UPDATE qiita.prep_1 SET reverselinkerprimer = %s
                 """
        conn_handler.execute(sql, ('GTGCCAGCM',))
        prep_template = PrepTemplate(1)
        prep_template.generate_files()
        out_dir = mkdtemp()

        obs_fps = _get_qiime_minimal_mapping(prep_template, out_dir)
        exp_fps = [join(out_dir, 'prep_1_MMF.txt')]

        # Check that the returned list is as expected
        self.assertEqual(obs_fps, exp_fps)
        # Check that the file exists
        self.assertTrue(exists(exp_fps[0]))
        # Check the contents of the file
        with open(exp_fps[0], "U") as f:
            self.assertEqual(f.read(), EXP_PREP_RLP)
Beispiel #5
0
    def test_dataframe_from_template(self):
        template = PrepTemplate(1)
        obs = template.to_dataframe()

        # 27 samples
        self.assertEqual(len(obs), 27)
        self.assertTrue(set(obs.index), {
            u'SKB1.640202', u'SKB2.640194', u'SKB3.640195', u'SKB4.640189',
            u'SKB5.640181', u'SKB6.640176', u'SKB7.640196', u'SKB8.640193',
            u'SKB9.640200', u'SKD1.640179', u'SKD2.640178', u'SKD3.640198',
            u'SKD4.640185', u'SKD5.640186', u'SKD6.640190', u'SKD7.640191',
            u'SKD8.640184', u'SKD9.640182', u'SKM1.640183', u'SKM2.640199',
            u'SKM3.640197', u'SKM4.640180', u'SKM5.640177', u'SKM6.640187',
            u'SKM7.640188', u'SKM8.640201', u'SKM9.640192'})

        self.assertTrue(set(obs.columns), {
            u'tot_org_carb', u'common_name', u'has_extracted_data',
            u'required_sample_info_status', u'water_content_soil',
            u'env_feature', u'assigned_from_geo', u'altitude', u'env_biome',
            u'texture', u'has_physical_specimen', u'description_duplicate',
            u'physical_location', u'latitude', u'ph', u'host_taxid',
            u'elevation', u'description', u'collection_timestamp',
            u'taxon_id', u'samp_salinity', u'host_subject_id', u'sample_type',
            u'season_environment', u'temp', u'country', u'longitude',
            u'tot_nitro', u'depth', u'anonymized_name', u'target_subfragment',
            u'sample_center', u'samp_size', u'run_date', u'experiment_center',
            u'pcr_primers', u'center_name', u'barcodesequence', u'run_center',
            u'run_prefix', u'library_construction_protocol', u'emp_status',
            u'linkerprimersequence', u'experiment_design_description',
            u'target_gene', u'center_project_name', u'illumina_technology',
            u'sequencing_meth', u'platform', u'experiment_title',
            u'study_center'})
Beispiel #6
0
    def add_raw_data(self, study, user, callback):
        """Adds an existing raw data to the study

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        msg = "Raw data successfully added"
        msg_level = "success"

        # Get the arguments to add the raw data
        pt_id = self.get_argument('prep_template_id')
        raw_data_id = self.get_argument('raw_data_id')

        prep_template = PrepTemplate(pt_id)
        raw_data = RawData(raw_data_id)

        try:
            prep_template.raw_data = raw_data
        except QiitaDBError as e:
            msg = html_error_message % ("adding the raw data",
                                        str(raw_data_id), str(e))
            msg = convert_text_html(msg)

        callback((msg, msg_level, 'prep_template_tab', pt_id, None))
    def update_prep_template(self, study, user, callback):
        """Update a prep template from the POST method

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done

        Raises
        ------
        HTTPError
            If the prep template file does not exists
        """
        # If we are on this function, the arguments "prep_template_id",
        # "update_prep_template_file" must defined. If not, let tornado
        # raise its error
        pt_id = int(self.get_argument('prep_template_id'))
        prep_template = self.get_argument('update_prep_template_file')

        # Define here the message and message level in case of success
        msg = "The prep template '%s' has been updated" % prep_template
        msg_level = "success"
        # Get the uploads folder
        _, base_fp = get_mountpoint("uploads")[0]
        # Get the path of the prep template in the uploads folder
        fp = join(base_fp, str(study.id), prep_template)

        if not exists(fp):
            # The file does not exist, fail nicely
            # Using 400 because we want the user to get the error in the GUI
            raise HTTPError(400, "This file doesn't exist: %s" % fp)
        try:
            with warnings.catch_warnings(record=True) as warns:
                pt = PrepTemplate(pt_id)
                pt.update(load_template_to_dataframe(fp))
                remove(fp)

                # join all the warning messages into one. Note that this info
                # will be ignored if an exception is raised
                if warns:
                    msg = '; '.join([str(w.message) for w in warns])
                    msg_level = 'warning'

        except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                QiitaDBDuplicateError, IOError, ValueError, KeyError,
                CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e:
            # Some error occurred while processing the sample template
            # Show the error to the user so they can fix the template
            msg = html_error_message % ('updating the prep template:',
                                        basename(fp), str(e))
            msg = convert_text_html(msg)
            msg_level = "danger"

        callback((msg, msg_level, 'prep_template_tab', pt_id, None))
Beispiel #8
0
def submit_VAMPS(preprocessed_data_id):
    """Submit preprocessed data to VAMPS

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    status = preprocessed_data.submitted_to_vamps_status()
    if status in ('submitting', 'success'):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

        preprocessed_data.update_vamps_status('submitting')

    # Generating a tgz
    targz_folder = mkdtemp(prefix=qiita_config.working_dir)
    targz_fp = join(targz_folder, '%d_%d_%d.tgz' % (study.id,
                                                    prep_template.id,
                                                    preprocessed_data.id))
    targz = taropen(targz_fp, mode='w:gz')

    # adding sample/prep
    samp_fp = join(targz_folder, 'sample_metadata.txt')
    sample_template.to_file(samp_fp)
    targz.add(samp_fp, arcname='sample_metadata.txt')
    prep_fp = join(targz_folder, 'prep_metadata.txt')
    prep_template.to_file(prep_fp)
    targz.add(prep_fp, arcname='prep_metadata.txt')

    # adding preprocessed data
    for _, fp, fp_type in preprocessed_data.get_filepaths():
        if fp_type == 'preprocessed_fasta':
            targz.add(fp, arcname='preprocessed_fasta.fna')

    targz.close()

    # submitting
    cmd = ("curl -F user=%s -F pass='******' -F uploadFile=@%s -F "
           "press=UploadFile %s" % (qiita_config.vamps_user,
                                    qiita_config.vamps_pass,
                                    targz_fp,
                                    qiita_config.vamps_url))
    obs, _, _ = system_call(cmd)

    exp = ("<html>\n<head>\n<title>Process Uploaded File</title>\n</head>\n"
           "<body>\n</body>\n</html>")

    if obs != exp:
        preprocessed_data.update_vamps_status('failure')
        return False
    else:
        preprocessed_data.update_vamps_status('success')
        return True
    def test_get_sample_names_by_run_prefix(self):
        metadata_dict = {
            'SKB8.640193': {'run_prefix': "s1", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'},
            'SKD8.640184': {'run_prefix': "s2", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'},
            'SKB7.640196': {'run_prefix': "s3", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'}}
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, Study(1), '16S')
        for _, fp in prep_template.get_filepaths():
            self.files_to_remove.append(fp)

        obs = _get_sample_names_by_run_prefix(prep_template)

        exp = {'s3': '1.SKB7.640196', 's2': '1.SKD8.640184',
               's1': '1.SKB8.640193'}
        self.assertEqual(obs, exp)

        # This should raise an error
        metadata_dict = {
            'SKB8.640193': {'run_prefix': "s1", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'},
            'SKD8.640184': {'run_prefix': "s1", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'},
            'SKB7.640196': {'run_prefix': "s3", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'}}
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, Study(1), '16S')
        for _, fp in prep_template.get_filepaths():
            self.files_to_remove.append(fp)

        with self.assertRaises(ValueError):
            _get_sample_names_by_run_prefix(prep_template)
Beispiel #10
0
def submit_VAMPS(preprocessed_data_id):
    """Submit preprocessed data to VAMPS

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    status = preprocessed_data.submitted_to_vamps_status()
    if status in ('submitting', 'success'):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

        preprocessed_data.update_vamps_status('submitting')

    # Generating a tgz
    targz_folder = mkdtemp(prefix=qiita_config.working_dir)
    targz_fp = join(
        targz_folder,
        '%d_%d_%d.tgz' % (study.id, prep_template.id, preprocessed_data.id))
    targz = taropen(targz_fp, mode='w:gz')

    # adding sample/prep
    samp_fp = join(targz_folder, 'sample_metadata.txt')
    sample_template.to_file(samp_fp)
    targz.add(samp_fp, arcname='sample_metadata.txt')
    prep_fp = join(targz_folder, 'prep_metadata.txt')
    prep_template.to_file(prep_fp)
    targz.add(prep_fp, arcname='prep_metadata.txt')

    # adding preprocessed data
    for _, fp, fp_type in preprocessed_data.get_filepaths():
        if fp_type == 'preprocessed_fasta':
            targz.add(fp, arcname='preprocessed_fasta.fna')

    targz.close()

    # submitting
    cmd = ("curl -F user=%s -F pass='******' -F uploadFile=@%s -F "
           "press=UploadFile %s" %
           (qiita_config.vamps_user, qiita_config.vamps_pass, targz_fp,
            qiita_config.vamps_url))
    obs, _, _ = system_call(cmd)

    exp = ("<html>\n<head>\n<title>Process Uploaded File</title>\n</head>\n"
           "<body>\n</body>\n</html>")

    if obs != exp:
        preprocessed_data.update_vamps_status('failure')
        return False
    else:
        preprocessed_data.update_vamps_status('success')
        return True
Beispiel #11
0
 def get_prep_templates(self, raw_data, callback):
     """Get all prep templates for a list of raw data objects"""
     d = {}
     for rd in raw_data:
         # We neeed this so PrepTemplate(p) doesn't fail if that raw
         # doesn't exist but raw data has the row: #554
         prep_templates = sorted(rd.prep_templates)
         d[rd.id] = [PrepTemplate(p) for p in prep_templates
                     if PrepTemplate.exists(p)]
     callback(d)
Beispiel #12
0
    def remove_add_prep_template(self, fp_rpt, raw_data_id, study,
                                 data_type_id, investigation_type, callback):
        """add prep templates
        """
        PrepTemplate.create(load_template_to_dataframe(fp_rpt),
                            RawData(raw_data_id), study, int(data_type_id),
                            investigation_type=investigation_type)
        remove(fp_rpt)

        callback()
Beispiel #13
0
    def setUp(self):
        metadata_dict = {
            'SKB8.640193': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status_id': 1,
                'data_type_id': 2,
                'str_column': 'Value for sample 1'
            },
            'SKD8.640184': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status_id': 1,
                'data_type_id': 2,
                'str_column': 'Value for sample 2'
            },
            'SKB7.640196': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status_id': 1,
                'data_type_id': 2,
                'str_column': 'Value for sample 3'
            }
        }
        self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
        self.test_raw_data = RawData(1)

        fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq')
        close(fd)
        filepaths = [(seqs_fp, 1), (barcodes_fp, 2)]
        with open(seqs_fp, "w") as f:
            f.write("\n")
        with open(barcodes_fp, "w") as f:
            f.write("\n")
        self.new_raw_data = RawData.create(2, filepaths, [Study(1)])
        db_test_raw_dir = join(get_db_files_base_dir(), 'raw_data')
        db_seqs_fp = join(db_test_raw_dir, "3_%s" % basename(seqs_fp))
        db_barcodes_fp = join(db_test_raw_dir, "3_%s" % basename(barcodes_fp))
        self._clean_up_files = [db_seqs_fp, db_barcodes_fp]

        self.tester = PrepTemplate(1)
        self.exp_sample_ids = {
            'SKB1.640202', 'SKB2.640194', 'SKB3.640195', 'SKB4.640189',
            'SKB5.640181', 'SKB6.640176', 'SKB7.640196', 'SKB8.640193',
            'SKB9.640200', 'SKD1.640179', 'SKD2.640178', 'SKD3.640198',
            'SKD4.640185', 'SKD5.640186', 'SKD6.640190', 'SKD7.640191',
            'SKD8.640184', 'SKD9.640182', 'SKM1.640183', 'SKM2.640199',
            'SKM3.640197', 'SKM4.640180', 'SKM5.640177', 'SKM6.640187',
            'SKM7.640188', 'SKM8.640201', 'SKM9.640192'
        }
Beispiel #14
0
    def remove_add_study_template(self, raw_data, study_id, fp_rsp):
        """Replace prep templates, raw data, and sample template with a new one
        """
        for rd in raw_data():
            rd = RawData(rd)
            for pt in rd.prep_templates:
                if PrepTemplate.exists(pt):
                    PrepTemplate.delete(pt)
        if SampleTemplate.exists(study_id):
            SampleTemplate.delete(study_id)

        SampleTemplate.create(load_template_to_dataframe(fp_rsp),
                              Study(study_id))
        remove(fp_rsp)
Beispiel #15
0
    def update_investigation_type(self, study, user, callback):
        """Updates the investigation type of a prep template

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        msg = "investigation type successfully updated"
        msg_level = "success"

        ppd_id = int(self.get_argument('ppd_id'))

        prep_id = self.get_argument('prep_id')
        edit_investigation_type = self.get_argument('edit-investigation-type',
                                                    None)
        edit_user_defined_investigation_type = self.get_argument(
            'edit-user-defined-investigation-type', None)
        edit_new_investigation_type = self.get_argument(
            'edit-new-investigation-type', None)

        pt = PrepTemplate(prep_id)

        investigation_type = self._process_investigation_type(
            edit_investigation_type, edit_user_defined_investigation_type,
            edit_new_investigation_type)

        try:
            pt.investigation_type = investigation_type
        except QiitaDBColumnError as e:
            msg = html_error_message % (", invalid investigation type: ",
                                        investigation_type, str(e))
            msg = convert_text_html(msg)
            msg_level = "danger"

        if ppd_id == 0:
            top_tab = "prep_template_tab"
            sub_tab = prep_id
            prep_tab = None
        else:
            top_tab = "preprocessed_data_tab"
            sub_tab = ppd_id
            prep_tab = None

        callback((msg, msg_level, top_tab, sub_tab, prep_tab))
Beispiel #16
0
    def update_investigation_type(self, study, user, callback):
        """Updates the investigation type of a prep template

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        msg = "investigation type successfully updated"
        msg_level = "success"

        ppd_id = int(self.get_argument('ppd_id'))

        prep_id = self.get_argument('prep_id')
        edit_investigation_type = self.get_argument('edit-investigation-type',
                                                    None)
        edit_user_defined_investigation_type = self.get_argument(
            'edit-user-defined-investigation-type', None)
        edit_new_investigation_type = self.get_argument(
            'edit-new-investigation-type', None)

        pt = PrepTemplate(prep_id)
        rd_id = pt.raw_data

        investigation_type = self._process_investigation_type(
            edit_investigation_type, edit_user_defined_investigation_type,
            edit_new_investigation_type)

        try:
            pt.investigation_type = investigation_type
        except QiitaDBColumnError as e:
            msg = html_error_message % (", invalid investigation type: ",
                                        investigation_type, str(e))
            msg_level = "danger"

        if ppd_id == 0:
            top_tab = "raw_data_tab"
            sub_tab = rd_id
            prep_tab = prep_id
        else:
            top_tab = "preprocessed_data_tab"
            sub_tab = ppd_id
            prep_tab = None

        callback((msg, msg_level, top_tab, sub_tab, prep_tab))
Beispiel #17
0
    def remove_add_study_template(self, raw_data, study_id, fp_rsp):
        """Replace prep templates, raw data, and sample template with a new one
        """
        for rd in raw_data():
            rd = RawData(rd)
            for pt in rd.prep_templates:
                if PrepTemplate.exists(pt):
                    PrepTemplate.delete(pt)
        if SampleTemplate.exists(study_id):
            SampleTemplate.delete(study_id)

        SampleTemplate.create(load_template_to_dataframe(fp_rsp),
                              Study(study_id))
        remove(fp_rsp)
Beispiel #18
0
def preprocessor(study_id, prep_template_id, param_id, param_constructor):
    """Dispatch for preprocessor work"""
    study = Study(study_id)
    prep_template = PrepTemplate(prep_template_id)
    params = param_constructor(param_id)

    sp = StudyPreprocessor()
    try:
        preprocess_out = sp(study, prep_template, params)
    except Exception as e:
        error_msg = ''.join(format_exception_only(e, exc_info()))
        prep_template.preprocessing_status = "failed: %s" % error_msg
        preprocess_out = None

    return preprocess_out
 def remove_add_prep_template(self, fp_rpt, study, data_type_id, investigation_type):
     """add prep templates"""
     pt_id = PrepTemplate.create(
         load_template_to_dataframe(fp_rpt), study, _to_int(data_type_id), investigation_type=investigation_type
     ).id
     remove(fp_rpt)
     return pt_id
    def test_create(self):
        """Creates a new PrepTemplate"""
        pt = PrepTemplate.create(self.metadata, self.new_raw_data)
        # The returned object has the correct id
        self.assertEqual(pt.id, 3)

        # The relevant rows to common_prep_info have been added.
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.common_prep_info WHERE raw_data_id=3")
        # raw_data_id, sample_id, center_name, center_project_name,
        # ebi_submission_accession, ebi_study_accession, emp_status_id,
        # data_type_id
        exp = [[3, 'SKB8.640193', 'ANL', 'Test Project', None, None, 1, 2],
               [3, 'SKD8.640184', 'ANL', 'Test Project', None, None, 1, 2],
               [3, 'SKB7.640196', 'ANL', 'Test Project', None, None, 1, 2]]
        self.assertEqual(sorted(obs), sorted(exp))

        # The relevant rows have been added to the raw_data_prep_columns
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_data_prep_columns WHERE raw_data_id=3")
        # raw_data_id, column_name, column_type
        exp = [[3, "str_column", "varchar"]]
        self.assertEqual(obs, exp)

        # The new table exists
        self.assertTrue(exists_table("prep_3", self.conn_handler))

        # The new table hosts the correct values
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.prep_3")
        # sample_id, str_column
        exp = [['SKB8.640193', "Value for sample 1"],
               ['SKD8.640184', "Value for sample 2"],
               ['SKB7.640196', "Value for sample 3"]]
        self.assertEqual(sorted(obs), sorted(exp))
    def test_insert_preprocessed_data(self):
        study = Study(1)
        params = PreprocessedIlluminaParams(1)
        prep_template = PrepTemplate(1)
        prep_out_dir = mkdtemp()
        self.dirs_to_remove.append(prep_out_dir)
        path_builder = partial(join, prep_out_dir)
        db_path_builder = partial(join, join(self.db_dir, "preprocessed_data"))

        file_suffixes = [
            'seqs.fna', 'seqs.fastq', 'seqs.demux', 'split_library_log.txt'
        ]
        db_files = []
        for f_suff in file_suffixes:
            fp = path_builder(f_suff)
            with open(fp, 'w') as f:
                f.write("\n")
            self.files_to_remove.append(fp)
            db_files.append(db_path_builder("3_%s" % f_suff))
        self.files_to_remove.extend(db_files)

        _insert_preprocessed_data(study, params, prep_template, prep_out_dir)

        # Check that the files have been copied
        for fp in db_files:
            self.assertTrue(exists(fp))

        # Check that a new preprocessed data has been created
        self.assertTrue(
            self.conn_handler.execute_fetchone(
                "SELECT EXISTS(SELECT * FROM qiita.preprocessed_data WHERE "
                "preprocessed_data_id=%s)", (3, ))[0])
    def test_get_preprocess_fastq_cmd(self):
        raw_data = RawData(1)
        params = PreprocessedIlluminaParams(1)
        prep_template = PrepTemplate(1)
        obs_cmd, obs_output_dir = _get_preprocess_fastq_cmd(
            raw_data, prep_template, params)

        get_raw_path = partial(join, self.db_dir, 'raw_data')
        seqs_fp = get_raw_path('1_s_G1_L001_sequences.fastq.gz')
        bc_fp = get_raw_path('1_s_G1_L001_sequences_barcodes.fastq.gz')

        exp_cmd_1 = ("split_libraries_fastq.py --store_demultiplexed_fastq -i "
                     "{} -b {} "
                     "-m ".format(seqs_fp, bc_fp))
        exp_cmd_2 = ("-o {0} --barcode_type golay_12 --max_bad_run_length 3 "
                     "--max_barcode_errors 1.5 "
                     "--min_per_read_length_fraction 0.75 "
                     "--phred_quality_threshold 3 "
                     "--sequence_max_n 0".format(obs_output_dir))

        # We are splitting the command into two parts because there is no way
        # that we can know the filepath of the mapping file. We thus split the
        # command on the mapping file path and we check that the two parts
        # of the commands is correct
        obs_cmd_1 = obs_cmd[:len(exp_cmd_1)]
        obs_cmd_2 = obs_cmd[len(exp_cmd_1):].split(" ", 1)[1]

        self.assertEqual(obs_cmd_1, exp_cmd_1)
        self.assertEqual(obs_cmd_2, exp_cmd_2)
Beispiel #23
0
    def test_create(self):
        """Creates a new PrepTemplate"""
        pt = PrepTemplate.create(self.metadata, self.new_raw_data)
        # The returned object has the correct id
        self.assertEqual(pt.id, 3)

        # The relevant rows to common_prep_info have been added.
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.common_prep_info WHERE raw_data_id=3")
        # raw_data_id, sample_id, center_name, center_project_name,
        # ebi_submission_accession, ebi_study_accession, emp_status_id,
        # data_type_id
        exp = [[3, 'SKB8.640193', 'ANL', 'Test Project', 1, 2],
               [3, 'SKD8.640184', 'ANL', 'Test Project', 1, 2],
               [3, 'SKB7.640196', 'ANL', 'Test Project', 1, 2]]
        self.assertEqual(sorted(obs), sorted(exp))

        # The relevant rows have been added to the raw_data_prep_columns
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.raw_data_prep_columns WHERE raw_data_id=3")
        # raw_data_id, column_name, column_type
        exp = [[3, 'str_column', 'varchar'],
               [3, 'ebi_submission_accession', 'varchar']]
        self.assertEqual(obs, exp)

        # The new table exists
        self.assertTrue(exists_table("prep_3", self.conn_handler))

        # The new table hosts the correct values
        obs = self.conn_handler.execute_fetchall("SELECT * FROM qiita.prep_3")
        # sample_id, str_column
        exp = [['SKB8.640193', "Value for sample 1", None],
               ['SKD8.640184', "Value for sample 2", None],
               ['SKB7.640196', "Value for sample 3", None]]
        self.assertEqual(sorted(obs), sorted(exp))
    def test_get_preprocess_fastq_cmd_per_sample_FASTQ_failure(self):
        metadata_dict = {
            'SKB8.640193': {'run_prefix': "sample1_failure", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'}}
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, Study(1), '16S')

        # This part should fail
        fp1 = self.path_builder('sample1_failure.fastq')
        with open(fp1, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp1)
        fp2 = self.path_builder('sample1_failure.barcodes.fastq.gz')
        with open(fp2, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp2)
        forward_filepath_id = convert_to_id('raw_forward_seqs',
                                            'filepath_type')
        barcode_filepath_id = convert_to_id('raw_barcodes', 'filepath_type')

        fps = [(fp1, forward_filepath_id), (fp2, barcode_filepath_id)]

        filetype_id = get_filetypes()['per_sample_FASTQ']
        raw_data = RawData.create(filetype_id, [prep_template], fps)
        params = [p for p in list(PreprocessedIlluminaParams.iter())
                  if p.name == 'per sample FASTQ defaults'][0]

        with self.assertRaises(ValueError):
            _get_preprocess_fastq_cmd(raw_data, prep_template, params)
    def test_get_qiime_minimal_mapping_multiple(self):
        # We need to create a prep template in which we have different run
        # prefix values, so we can test this case
        metadata_dict = {
            'SKB8.640193': {'center_name': 'ANL',
                            'center_project_name': 'Test Project',
                            'ebi_submission_accession': None,
                            'EMP_status': 'EMP',
                            'str_column': 'Value for sample 1',
                            'primer': 'GTGCCAGCMGCCGCGGTAA',
                            'barcode': 'GTCCGCAAGTTA',
                            'run_prefix': "s_G1_L001_sequences",
                            'platform': 'ILLUMINA',
                            'library_construction_protocol': 'AAA',
                            'experiment_design_description': 'BBB'},
            'SKD8.640184': {'center_name': 'ANL',
                            'center_project_name': 'Test Project',
                            'ebi_submission_accession': None,
                            'EMP_status': 'EMP',
                            'str_column': 'Value for sample 2',
                            'primer': 'GTGCCAGCMGCCGCGGTAA',
                            'barcode': 'CGTAGAGCTCTC',
                            'run_prefix': "s_G1_L001_sequences",
                            'platform': 'ILLUMINA',
                            'library_construction_protocol': 'AAA',
                            'experiment_design_description': 'BBB'},
            'SKB7.640196': {'center_name': 'ANL',
                            'center_project_name': 'Test Project',
                            'ebi_submission_accession': None,
                            'EMP_status': 'EMP',
                            'str_column': 'Value for sample 3',
                            'primer': 'GTGCCAGCMGCCGCGGTAA',
                            'barcode': 'CCTCTGAGAGCT',
                            'run_prefix': "s_G1_L002_sequences",
                            'platform': 'ILLUMINA',
                            'library_construction_protocol': 'AAA',
                            'experiment_design_description': 'BBB'}
            }
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, Study(1), '16S')
        for _, fp in prep_template.get_filepaths():
            self.files_to_remove.append(fp)

        out_dir = mkdtemp()

        obs_fps = sorted(_get_qiime_minimal_mapping(prep_template, out_dir))
        exp_fps = sorted([join(out_dir, 's_G1_L001_sequences_MMF.txt'),
                          join(out_dir, 's_G1_L002_sequences_MMF.txt')])

        # Check that the returned list is as expected
        self.assertEqual(obs_fps, exp_fps)
        # Check that the file exists
        for fp in exp_fps:
            self.assertTrue(exists(fp))
        # Check the contents of the file
        for fp, contents in zip(exp_fps, [EXP_PREP_1, EXP_PREP_2]):
            with open(fp, "U") as f:
                self.assertEqual(f.read(), contents)
    def test_get_preprocess_fasta_cmd_sff(self):
        raw_data = RawData(3)
        params = Preprocessed454Params(1)
        prep_template = PrepTemplate(1)
        obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
            raw_data, prep_template, params)

        get_raw_path = partial(join, self.db_dir, 'raw_data')
        seqs_fp = [
            get_raw_path('preprocess_test1.sff'),
            get_raw_path('preprocess_test2.sff')
        ]

        exp_cmd_1 = ' '.join(
            ["process_sff.py",
             "-i %s" % seqs_fp[0],
             "-o %s" % obs_output_dir])
        exp_cmd_2 = ' '.join(
            ["process_sff.py",
             "-i %s" % seqs_fp[1],
             "-o %s" % obs_output_dir])

        fasta_files = ','.join([
            join(obs_output_dir, "preprocess_test1.fna"),
            join(obs_output_dir, "preprocess_test2.fna")
        ])
        qual_files = ','.join([
            join(obs_output_dir, "preprocess_test1.qual"),
            join(obs_output_dir, "preprocess_test2.qual")
        ])
        exp_cmd_3a = ' '.join(["split_libraries.py", "-f %s" % fasta_files])

        exp_cmd_3b = ' '.join([
            "-q %s" % qual_files, "-d",
            "-o %s" % obs_output_dir,
            params.to_str()
        ])
        exp_cmd_4 = ' '.join([
            "convert_fastaqual_fastq.py",
            "-f %s/seqs.fna" % obs_output_dir,
            "-q %s/seqs_filtered.qual" % obs_output_dir,
            "-o %s" % obs_output_dir, "-F"
        ])

        obs_cmds = obs_cmd.split('; ')

        # We are splitting the command into two parts because there is no way
        # that we can know the filepath of the mapping file. We thus split the
        # command on the mapping file path and we check that the two parts
        # of the commands is correct
        obs_cmd_3a, obs_cmd_3b_temp = obs_cmds[2].split(' -m ', 1)
        obs_cmd_3b = obs_cmd_3b_temp.split(' ', 1)[1]
        self.assertEqual(obs_cmds[0], exp_cmd_1)
        self.assertEqual(obs_cmds[1], exp_cmd_2)
        self.assertEqual(obs_cmd_3a, exp_cmd_3a)
        self.assertEqual(obs_cmd_3b, exp_cmd_3b)
        self.assertEqual(obs_cmds[3], exp_cmd_4)
Beispiel #27
0
 def remove_add_prep_template(self, fp_rpt, raw_data_id, study,
                              data_type_id, investigation_type):
     """add prep templates"""
     pt_id = PrepTemplate.create(load_template_to_dataframe(fp_rpt),
                                 RawData(raw_data_id), study,
                                 _to_int(data_type_id),
                                 investigation_type=investigation_type).id
     remove(fp_rpt)
     return pt_id
 def test_to_file(self):
     """to file writes a tab delimited file with all the metadata"""
     fd, fp = mkstemp()
     close(fd)
     pt = PrepTemplate.create(self.metadata, self.new_raw_data)
     pt.to_file(fp)
     self._clean_up_files.append(fp)
     with open(fp, 'U') as f:
         obs = f.read()
     self.assertEqual(obs, EXP_PREP_TEMPLATE)
Beispiel #29
0
 def test_to_file(self):
     """to file writes a tab delimited file with all the metadata"""
     fd, fp = mkstemp()
     close(fd)
     pt = PrepTemplate.create(self.metadata, self.new_raw_data)
     pt.to_file(fp)
     self._clean_up_files.append(fp)
     with open(fp, 'U') as f:
         obs = f.read()
     self.assertEqual(obs, EXP_PREP_TEMPLATE)
 def get_prep_templates(self, raw_data, callback):
     """Get all prep templates for a list of raw data objects"""
     d = {}
     for rd in raw_data:
         # We neeed this so PrepTemplate(p) doesn't fail if that raw
         # doesn't exist but raw data has the row: #554
         prep_templates = sorted(rd.prep_templates)
         d[rd.id] = [PrepTemplate(p) for p in prep_templates
                     if PrepTemplate.exists(p)]
     callback(d)
    def remove_add_study_template(self, raw_data, study_id, fp_rsp, data_type, is_mapping_file):
        """Replace prep templates, raw data, and sample template with a new one
        """
        if is_mapping_file and data_type == "":
            raise ValueError("Please, choose a data type if uploading a QIIME " "mapping file")

        for rd in raw_data():
            rd = RawData(rd)
            for pt in rd.prep_templates:
                if PrepTemplate.exists(pt):
                    PrepTemplate.delete(pt)
        if SampleTemplate.exists(study_id):
            SampleTemplate.delete(study_id)

        if is_mapping_file:
            create_templates_from_qiime_mapping_file(fp_rsp, Study(study_id), int(data_type))
        else:
            SampleTemplate.create(load_template_to_dataframe(fp_rsp), Study(study_id))

        remove(fp_rsp)
    def test_get_qiime_minimal_mapping_single_no_run_prefix(self):
        conn_handler = SQLConnectionHandler()
        sql = """DELETE FROM qiita.prep_columns
                 WHERE prep_template_id = 1 AND column_name = 'run_prefix';
                 ALTER TABLE qiita.prep_1 DROP COLUMN run_prefix"""
        conn_handler.execute(sql)
        prep_template = PrepTemplate(1)
        prep_template.generate_files()
        out_dir = mkdtemp()

        obs_fps = _get_qiime_minimal_mapping(prep_template, out_dir)
        exp_fps = [join(out_dir, 'prep_1_MMF.txt')]

        # Check that the returned list is as expected
        self.assertEqual(obs_fps, exp_fps)
        # Check that the file exists
        self.assertTrue(exists(exp_fps[0]))
        # Check the contents of the file
        with open(exp_fps[0], "U") as f:
            self.assertEqual(f.read(), EXP_PREP)
Beispiel #33
0
def _template_generator(study, full_access):
    """Generates tuples of prep template information

    Parameters
    ----------
    study : Study
        The study to get all the prep templates
    full_access : boolean
        A boolean that indicates if the user has full access to the study

    Returns
    -------
    Generator of tuples of (int, str, PrepTemplate, (str, str, str))
        Each tuple contains the prep template id, the prep template data_type
        the PrepTemplate object and a tuple with 3 strings for the style of
        the prep template status icons
    """

    for pt_id in sorted(study.prep_templates()):
        pt = PrepTemplate(pt_id)
        if full_access or pt.status == 'public':
            yield (pt.id, pt.data_type(), pt, STATUS_STYLER[pt.status])
Beispiel #34
0
 def setUp(self):
     self.prep_template = PrepTemplate(1)
     self.sample_id = 'SKB8.640193'
     self.tester = PrepSample(self.sample_id, self.prep_template)
     self.exp_categories = {
         'center_name', 'center_project_name', 'emp_status_id',
         'data_type_id', 'barcodesequence', 'library_construction_protocol',
         'linkerprimersequence', 'target_subfragment', 'target_gene',
         'run_center', 'run_prefix', 'run_date', 'experiment_center',
         'experiment_design_description', 'experiment_title', 'platform',
         'samp_size', 'sequencing_meth', 'illumina_technology',
         'sample_center', 'pcr_primers', 'study_center'
     }
    def test_get_qiime_minimal_mapping_single(self):
        prep_template = PrepTemplate(1)
        out_dir = mkdtemp()

        obs_fps = _get_qiime_minimal_mapping(prep_template, out_dir)
        exp_fps = [join(out_dir, 's_G1_L001_sequences_MMF.txt')]

        # Check that the returned list is as expected
        self.assertEqual(obs_fps, exp_fps)
        # Check that the file exists
        self.assertTrue(exists(exp_fps[0]))
        # Check the contents of the file
        with open(exp_fps[0], "U") as f:
            self.assertEqual(f.read(), EXP_PREP)
    def setUp(self):
        metadata_dict = {
            'SKB8.640193': {'center_name': 'ANL',
                            'center_project_name': 'Test Project',
                            'ebi_submission_accession': None,
                            'EMP_status_id': 1,
                            'data_type_id': 2,
                            'str_column': 'Value for sample 1'},
            'SKD8.640184': {'center_name': 'ANL',
                            'center_project_name': 'Test Project',
                            'ebi_submission_accession': None,
                            'EMP_status_id': 1,
                            'data_type_id': 2,
                            'str_column': 'Value for sample 2'},
            'SKB7.640196': {'center_name': 'ANL',
                            'center_project_name': 'Test Project',
                            'ebi_submission_accession': None,
                            'EMP_status_id': 1,
                            'data_type_id': 2,
                            'str_column': 'Value for sample 3'}
            }
        self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
        self.test_raw_data = RawData(1)

        fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
        close(fd)
        fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq')
        close(fd)
        filepaths = [(seqs_fp, 1), (barcodes_fp, 2)]
        with open(seqs_fp, "w") as f:
            f.write("\n")
        with open(barcodes_fp, "w") as f:
            f.write("\n")
        self.new_raw_data = RawData.create(2, filepaths, [Study(1)])
        db_test_raw_dir = join(get_db_files_base_dir(), 'raw_data')
        db_seqs_fp = join(db_test_raw_dir, "3_%s" % basename(seqs_fp))
        db_barcodes_fp = join(db_test_raw_dir, "3_%s" % basename(barcodes_fp))
        self._clean_up_files = [db_seqs_fp, db_barcodes_fp]

        self.tester = PrepTemplate(1)
        self.exp_sample_ids = {'SKB1.640202', 'SKB2.640194', 'SKB3.640195',
                               'SKB4.640189', 'SKB5.640181', 'SKB6.640176',
                               'SKB7.640196', 'SKB8.640193', 'SKB9.640200',
                               'SKD1.640179', 'SKD2.640178', 'SKD3.640198',
                               'SKD4.640185', 'SKD5.640186', 'SKD6.640190',
                               'SKD7.640191', 'SKD8.640184', 'SKD9.640182',
                               'SKM1.640183', 'SKM2.640199', 'SKM3.640197',
                               'SKM4.640180', 'SKM5.640177', 'SKM6.640187',
                               'SKM7.640188', 'SKM8.640201', 'SKM9.640192'}
    def render(self, study_id, preprocessed_data):
        user = User(self.current_user)
        ppd_id = preprocessed_data.id
        ebi_status = preprocessed_data.submitted_to_insdc_status()
        ebi_study_accession = preprocessed_data.ebi_study_accession
        ebi_submission_accession = preprocessed_data.ebi_submission_accession
        vamps_status = preprocessed_data.submitted_to_vamps_status()
        filepaths = preprocessed_data.get_filepaths()
        is_local_request = self._is_local()
        show_ebi_btn = user.level == "admin"

        # Get all the ENA terms for the investigation type
        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        # make "Other" show at the bottom of the drop down menu
        ena_terms = []
        for v in sorted(ontology.terms):
            if v != 'Other':
                ena_terms.append('<option value="%s">%s</option>' % (v, v))
        ena_terms.append('<option value="Other">Other</option>')

        # New Type is for users to add a new user-defined investigation type
        user_defined_terms = ontology.user_defined_terms + ['New Type']

        if PrepTemplate.exists(preprocessed_data.prep_template):
            prep_template_id = preprocessed_data.prep_template
            prep_template = PrepTemplate(prep_template_id)
            raw_data_id = prep_template.raw_data
            inv_type = prep_template.investigation_type or "None Selected"
        else:
            prep_template_id = None
            raw_data_id = None
            inv_type = "None Selected"

        return self.render_string(
            "study_description_templates/preprocessed_data_info_tab.html",
            ppd_id=ppd_id,
            show_ebi_btn=show_ebi_btn,
            ebi_status=ebi_status,
            ebi_study_accession=ebi_study_accession,
            ebi_submission_accession=ebi_submission_accession,
            filepaths=filepaths,
            is_local_request=is_local_request,
            prep_template_id=prep_template_id,
            raw_data_id=raw_data_id,
            inv_type=inv_type,
            ena_terms=ena_terms,
            vamps_status=vamps_status,
            user_defined_terms=user_defined_terms)
    def test_get_preprocess_fastq_cmd_per_sample_FASTQ(self):
        metadata_dict = {
            'SKB8.640193': {'run_prefix': "sample1", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'},
            'SKD8.640184': {'run_prefix': "sample2", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'}}
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, Study(1), '16S')

        fp1 = self.path_builder('sample1.fastq')
        with open(fp1, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp1)
        fp2 = self.path_builder('sample2.fastq.gz')
        with open(fp2, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp2)
        filepath_id = convert_to_id('raw_forward_seqs', 'filepath_type')

        fps = [(fp1, filepath_id), (fp2, filepath_id)]

        filetype_id = get_filetypes()['per_sample_FASTQ']
        raw_data = RawData.create(filetype_id, [prep_template], fps)
        params = [p for p in list(PreprocessedIlluminaParams.iter())
                  if p.name == 'per sample FASTQ defaults'][0]

        obs_cmd, obs_output_dir = _get_preprocess_fastq_cmd(raw_data,
                                                            prep_template,
                                                            params)

        raw_fps = ','.join([fp for _, fp, _ in
                            sorted(raw_data.get_filepaths())])
        exp_cmd = (
            "split_libraries_fastq.py --store_demultiplexed_fastq -i "
            "{} --sample_ids 1.SKB8.640193,1.SKD8.640184 -o {} --barcode_type "
            "not-barcoded --max_bad_run_length 3 --max_barcode_errors 1.5 "
            "--min_per_read_length_fraction 0.75 --phred_quality_threshold 3 "
            "--sequence_max_n 0").format(raw_fps, obs_output_dir)
        self.assertEqual(obs_cmd, exp_cmd)
Beispiel #39
0
    def test_load_data_from_cmd(self):
        filepaths = [self.forward_fp, self.reverse_fp, self.barcodes_fp]
        filepath_types = ['raw_forward_seqs', 'raw_reverse_seqs',
                          'raw_barcodes']

        filetype = 'FASTQ'
        metadata_dict = {
            'SKB8.640193': {'center_name': 'ANL',
                            'primer': 'GTGCCAGCMGCCGCGGTAA',
                            'barcode': 'GTCCGCAAGTTA',
                            'run_prefix': "s_G1_L001_sequences",
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'AAAA',
                            'experiment_design_description': 'BBBB'}}
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
        pt1 = PrepTemplate.create(metadata, Study(1), "16S")
        prep_templates = [pt1.id]

        initial_raw_count = get_count('qiita.raw_data')
        initial_fp_count = get_count('qiita.filepath')
        initial_raw_fp_count = get_count('qiita.raw_filepath')

        new = load_raw_data_cmd(filepaths, filepath_types, filetype,
                                prep_templates)
        raw_data_id = new.id
        self.files_to_remove.append(
            join(self.db_test_raw_dir,
                 '%d_%s' % (raw_data_id, basename(self.forward_fp))))
        self.files_to_remove.append(
            join(self.db_test_raw_dir,
                 '%d_%s' % (raw_data_id, basename(self.reverse_fp))))
        self.files_to_remove.append(
            join(self.db_test_raw_dir,
                 '%d_%s' % (raw_data_id, basename(self.barcodes_fp))))

        self.assertTrue(check_count('qiita.raw_data', initial_raw_count + 1))
        self.assertTrue(check_count('qiita.filepath',
                                    initial_fp_count + 3))
        self.assertTrue(check_count('qiita.raw_filepath',
                                    initial_raw_fp_count + 3))

        # Ensure that the ValueError is raised when a filepath_type is not
        # provided for each and every filepath
        with self.assertRaises(ValueError):
            load_raw_data_cmd(filepaths, filepath_types[:-1], filetype,
                              prep_templates)
Beispiel #40
0
    def test_metadata_map_from_sample_and_prep_templates(self):
        obs = metadata_map_from_sample_and_prep_templates(
            SampleTemplate(1), PrepTemplate(1))

        # We don't test the specific values as this would blow up the size
        # of this file as the amount of lines would go to ~1000

        # 27 samples
        self.assertEqual(len(obs), 27)
        self.assertTrue(
            all(obs.index == pd.Index([
                u'SKB1.640202', u'SKB2.640194', u'SKB3.640195', u'SKB4.640189',
                u'SKB5.640181', u'SKB6.640176', u'SKB7.640196', u'SKB8.640193',
                u'SKB9.640200', u'SKD1.640179', u'SKD2.640178', u'SKD3.640198',
                u'SKD4.640185', u'SKD5.640186', u'SKD6.640190', u'SKD7.640191',
                u'SKD8.640184', u'SKD9.640182', u'SKM1.640183', u'SKM2.640199',
                u'SKM3.640197', u'SKM4.640180', u'SKM5.640177', u'SKM6.640187',
                u'SKM7.640188', u'SKM8.640201', u'SKM9.640192'
            ],
                                      dtype='object')))

        self.assertTrue(
            all(obs.columns == pd.Index([
                u'tot_org_carb', u'common_name', u'has_extracted_data',
                u'water_content_soil', u'env_feature', u'assigned_from_geo',
                u'altitude', u'env_biome', u'texture',
                u'has_physical_specimen', u'description_duplicate',
                u'physical_location', u'latitude', u'ph', u'host_taxid',
                u'elevation', u'description', u'collection_timestamp',
                u'taxon_id', u'samp_salinity', u'host_subject_id',
                u'sample_type', u'season_environment',
                u'required_sample_info_status_id', u'temp', u'country',
                u'longitude', u'tot_nitro', u'depth', u'anonymized_name',
                u'experiment_center', u'center_name', u'run_center',
                u'run_prefix', u'data_type_id', u'target_gene',
                u'sequencing_meth', u'run_date', u'pcr_primers',
                u'linkerprimersequence', u'platform',
                u'library_construction_protocol',
                u'experiment_design_description', u'study_center',
                u'center_project_name', u'sample_center', u'samp_size',
                u'illumina_technology', u'experiment_title', u'emp_status_id',
                u'target_subfragment', u'barcodesequence'
            ],
                                        dtype='object')))
Beispiel #41
0
    def display_template(self, preprocessed_data_id, msg, msg_level):
        """Simple function to avoid duplication of code"""
        preprocessed_data_id = int(preprocessed_data_id)
        try:
            preprocessed_data = PreprocessedData(preprocessed_data_id)
        except QiitaDBUnknownIDError:
            raise HTTPError(404, "PreprocessedData %d does not exist!" %
                                 preprocessed_data_id)
        else:
            user = self.current_user
            if user.level != 'admin':
                raise HTTPError(403, "No permissions of admin, "
                                     "get/EBISubmitHandler: %s!" % user.id)

        prep_template = PrepTemplate(preprocessed_data.prep_template)
        sample_template = SampleTemplate(preprocessed_data.study)
        study = Study(preprocessed_data.study)
        stats = [('Number of samples', len(prep_template)),
                 ('Number of metadata headers',
                  len(sample_template.metadata_headers()))]

        demux = [path for _, path, ftype in preprocessed_data.get_filepaths()
                 if ftype == 'preprocessed_demux']
        demux_length = len(demux)

        if not demux_length:
            msg = ("Study does not appear to have demultiplexed "
                   "sequences associated")
            msg_level = 'danger'
        elif demux_length > 1:
            msg = ("Study appears to have multiple demultiplexed files!")
            msg_level = 'danger'
        elif demux_length == 1:
            demux_file = demux[0]
            demux_file_stats = demux_stats(demux_file)
            stats.append(('Number of sequences', demux_file_stats.n))
            msg_level = 'success'

        self.render('ebi_submission.html',
                    study_title=study.title, stats=stats, message=msg,
                    study_id=study.id, level=msg_level,
                    preprocessed_data_id=preprocessed_data_id,
                    investigation_type=prep_template.investigation_type)
Beispiel #42
0
    def setUp(self):
        self.prep_template = PrepTemplate(1)
        self.study = Study(1)
        self.params_table = "preprocessed_sequence_illumina_params"
        self.params_id = 1
        fd, self.fna_fp = mkstemp(suffix='_seqs.fna')
        close(fd)
        fd, self.qual_fp = mkstemp(suffix='_seqs.qual')
        close(fd)
        self.filepaths = [(self.fna_fp, 4), (self.qual_fp, 5)]
        _, self.db_test_ppd_dir = get_mountpoint(
            'preprocessed_data')[0]
        self.ebi_submission_accession = "EBI123456-A"
        self.ebi_study_accession = "EBI123456-B"

        with open(self.fna_fp, "w") as f:
            f.write("\n")
        with open(self.qual_fp, "w") as f:
            f.write("\n")
        self._clean_up_files = []
Beispiel #43
0
    def setUp(self):
        self.prep_template = PrepTemplate(1)
        self.study = Study(1)
        self.params_table = "preprocessed_sequence_illumina_params"
        self.params_id = 1
        fd, self.fna_fp = mkstemp(suffix='_seqs.fna')
        close(fd)
        fd, self.qual_fp = mkstemp(suffix='_seqs.qual')
        close(fd)
        self.filepaths = [(self.fna_fp, 4), (self.qual_fp, 5)]
        _, self.db_test_ppd_dir = get_mountpoint(
            'preprocessed_data')[0]
        self.ebi_submission_accession = "EBI123456-A"
        self.ebi_study_accession = "EBI123456-B"

        with open(self.fna_fp, "w") as f:
            f.write("\n")
        with open(self.qual_fp, "w") as f:
            f.write("\n")
        self._clean_up_files = []
Beispiel #44
0
    def test_dataframe_from_template(self):
        template = PrepTemplate(1)
        obs = dataframe_from_template(template)

        # 27 samples
        self.assertEqual(len(obs), 27)
        self.assertTrue(
            set(obs.index), {
                u'SKB1.640202', u'SKB2.640194', u'SKB3.640195', u'SKB4.640189',
                u'SKB5.640181', u'SKB6.640176', u'SKB7.640196', u'SKB8.640193',
                u'SKB9.640200', u'SKD1.640179', u'SKD2.640178', u'SKD3.640198',
                u'SKD4.640185', u'SKD5.640186', u'SKD6.640190', u'SKD7.640191',
                u'SKD8.640184', u'SKD9.640182', u'SKM1.640183', u'SKM2.640199',
                u'SKM3.640197', u'SKM4.640180', u'SKM5.640177', u'SKM6.640187',
                u'SKM7.640188', u'SKM8.640201', u'SKM9.640192'
            })

        self.assertTrue(
            set(obs.columns), {
                u'tot_org_carb', u'common_name', u'has_extracted_data',
                u'required_sample_info_status', u'water_content_soil',
                u'env_feature', u'assigned_from_geo', u'altitude',
                u'env_biome', u'texture', u'has_physical_specimen',
                u'description_duplicate', u'physical_location', u'latitude',
                u'ph', u'host_taxid', u'elevation', u'description',
                u'collection_timestamp', u'taxon_id', u'samp_salinity',
                u'host_subject_id', u'sample_type', u'season_environment',
                u'temp', u'country', u'longitude', u'tot_nitro', u'depth',
                u'anonymized_name', u'target_subfragment', u'sample_center',
                u'samp_size', u'run_date', u'experiment_center',
                u'pcr_primers', u'center_name', u'barcodesequence',
                u'run_center', u'run_prefix', u'library_construction_protocol',
                u'emp_status', u'linkerprimersequence',
                u'experiment_design_description', u'target_gene',
                u'center_project_name', u'illumina_technology',
                u'sequencing_meth', u'platform', u'experiment_title',
                u'study_center'
            })
Beispiel #45
0
    def test_move_filepaths_to_upload_folder(self):
        # setting up test, done here as this is the only test that uses these
        # files
        fd, seqs_fp = mkstemp(suffix="_seqs.fastq")
        close(fd)
        st = Study(1)
        metadata_dict = {
            "SKB8.640193": {
                "center_name": "ANL",
                "primer": "GTGCCAGCMGCCGCGGTAA",
                "barcode": "GTCCGCAAGTTA",
                "run_prefix": "s_G1_L001_sequences",
                "platform": "ILLUMINA",
                "library_construction_protocol": "AAAA",
                "experiment_design_description": "BBBB",
            }
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient="index")
        pt = PrepTemplate.create(metadata, Study(1), "16S")

        rd = RawData.create(2, [pt], [(seqs_fp, 1)])
        filepaths = rd.get_filepaths()
        # deleting reference so we can directly call
        # move_filepaths_to_upload_folder
        for fid, _, _ in filepaths:
            self.conn_handler.execute("DELETE FROM qiita.raw_filepath WHERE filepath_id=%s", (fid,))

        # moving filepaths
        move_filepaths_to_upload_folder(st.id, filepaths)

        # check that they do not exist in the old path but do in the new one
        path_for_removal = join(get_mountpoint("uploads")[0][1], str(st.id))
        for _, fp, _ in filepaths:
            self.assertFalse(exists(fp))
            new_fp = join(path_for_removal, basename(fp).split("_", 1)[1])
            self.assertTrue(exists(new_fp))

            self.files_to_remove.append(new_fp)
    def post(self):
        study_id = int(self.get_argument('study_id'))
        prep_template_id = int(self.get_argument('prep_template_id'))
        raw_data = RawData(PrepTemplate(prep_template_id).raw_data)
        param_id = int(self.get_argument('preprocessing_parameters_id'))

        # Get the preprocessing parameters
        if raw_data.filetype == 'FASTQ':
            param_constructor = PreprocessedIlluminaParams
        elif raw_data.filetype in ('FASTA', 'SFF'):
            param_constructor = Preprocessed454Params
        else:
            raise ValueError('Unknown filetype')

        job_id = submit(self.current_user.id, preprocessor, study_id,
                        prep_template_id, param_id, param_constructor)

        self.render('compute_wait.html',
                    job_id=job_id, title='Preprocessing',
                    completion_redirect='/study/description/%d?top_tab='
                                        'raw_data_tab&sub_tab=%s&prep_tab=%s'
                                        % (study_id, raw_data.id,
                                           prep_template_id))
Beispiel #47
0
    def template_to_dict(self):
        template = PrepTemplate(1)
        obs = template_to_dict(template)

        # We don't test the specific values as this would blow up the size
        # of this file as the amount of lines would go to ~1000

        # twenty seven samples
        self.assertEqual(len(obs.keys()), 27)

        # the mapping file has 24 columns
        for key, value in obs.items():
            # check there are exatly these column names in the dictionary
            self.assertItemsEqual(value.keys(), [
                'experiment_center', 'center_name', 'run_center', 'run_prefix',
                'data_type_id', 'target_gene', 'sequencing_meth', 'run_date',
                'pcr_primers', 'ebi_submission_accession',
                'linkerprimersequence', 'platform',
                'library_construction_protocol',
                'experiment_design_description', 'study_center',
                'center_project_name', 'sample_center', 'samp_size',
                'illumina_technology', 'experiment_title', 'emp_status_id',
                'target_subfragment', 'barcodesequence', 'ebi_study_accession'
            ])
Beispiel #48
0
    def render(self, study_id, preprocessed_data):
        user = self.current_user
        ppd_id = preprocessed_data.id
        ebi_status = preprocessed_data.submitted_to_insdc_status()
        ebi_study_accession = preprocessed_data.ebi_study_accession
        ebi_submission_accession = preprocessed_data.ebi_submission_accession
        vamps_status = preprocessed_data.submitted_to_vamps_status()
        filepaths = preprocessed_data.get_filepaths()
        is_local_request = self._is_local()
        show_ebi_btn = user.level == "admin"
        processing_status = preprocessed_data.processing_status
        processed_data = preprocessed_data.processed_data

        # Get all the ENA terms for the investigation type
        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        # make "Other" show at the bottom of the drop down menu
        ena_terms = []
        for v in sorted(ontology.terms):
            if v != 'Other':
                ena_terms.append('<option value="%s">%s</option>' % (v, v))
        ena_terms.append('<option value="Other">Other</option>')

        # New Type is for users to add a new user-defined investigation type
        user_defined_terms = ontology.user_defined_terms + ['New Type']

        if PrepTemplate.exists(preprocessed_data.prep_template):
            prep_template_id = preprocessed_data.prep_template
            prep_template = PrepTemplate(prep_template_id)
            raw_data_id = prep_template.raw_data
            inv_type = prep_template.investigation_type or "None Selected"
        else:
            prep_template_id = None
            raw_data_id = None
            inv_type = "None Selected"

        process_params = {param.id: (generate_param_str(param), param.name)
                          for param in ProcessedSortmernaParams.iter()}
        # We just need to provide an ID for the default parameters,
        # so we can initialize the interface
        default_params = 1

        return self.render_string(
            "study_description_templates/preprocessed_data_info_tab.html",
            ppd_id=ppd_id,
            show_ebi_btn=show_ebi_btn,
            ebi_status=ebi_status,
            ebi_study_accession=ebi_study_accession,
            ebi_submission_accession=ebi_submission_accession,
            filepaths=filepaths,
            is_local_request=is_local_request,
            prep_template_id=prep_template_id,
            raw_data_id=raw_data_id,
            inv_type=inv_type,
            ena_terms=ena_terms,
            vamps_status=vamps_status,
            user_defined_terms=user_defined_terms,
            process_params=process_params,
            default_params=default_params,
            study_id=preprocessed_data.study,
            processing_status=processing_status,
            processed_data=processed_data)
Beispiel #49
0
from os.path import join
from time import strftime

from qiita_db.util import get_mountpoint
from qiita_db.sql_connection import SQLConnectionHandler
from qiita_db.metadata_template import SampleTemplate, PrepTemplate

conn_handler = SQLConnectionHandler()

_id, fp_base = get_mountpoint('templates')[0]

for study_id in conn_handler.execute_fetchall(
        "SELECT study_id FROM qiita.study"):
    study_id = study_id[0]
    if SampleTemplate.exists(study_id):
        st = SampleTemplate(study_id)
        fp = join(fp_base, '%d_%s.txt' % (study_id, strftime("%Y%m%d-%H%M%S")))
        st.to_file(fp)
        st.add_filepath(fp)

for prep_template_id in conn_handler.execute_fetchall(
        "SELECT prep_template_id FROM qiita.prep_template"):
    prep_template_id = prep_template_id[0]
    pt = PrepTemplate(prep_template_id)
    study_id = pt.study_id

    fp = join(fp_base, '%d_prep_%d_%s.txt' % (pt.study_id, prep_template_id,
              strftime("%Y%m%d-%H%M%S")))
    pt.to_file(fp)
    pt.add_filepath(fp)
Beispiel #50
0
# 23 Nov, 2014
# This patch creates all the qiime mapping files for the existing
# prep templates

from qiita_db.util import get_mountpoint
from qiita_db.sql_connection import SQLConnectionHandler
from qiita_db.metadata_template import PrepTemplate

conn_handler = SQLConnectionHandler()

_id, fp_base = get_mountpoint('templates')[0]

for prep_template_id in conn_handler.execute_fetchall(
        "SELECT prep_template_id FROM qiita.prep_template"):
    prep_template_id = prep_template_id[0]
    pt = PrepTemplate(prep_template_id)
    study_id = pt.study_id

    for _, fpt in pt.get_filepaths():
        pt.create_qiime_mapping_file(fpt)
Beispiel #51
0
 def test_create_duplicate(self):
     """Create raises an error when creating a duplicated PrepTemplate"""
     with self.assertRaises(QiitaDBDuplicateError):
         PrepTemplate.create(self.metadata, self.test_raw_data)
Beispiel #52
0
 def test_create_duplicate_header(self):
     """Create raises an error when duplicate headers are present"""
     self.metadata['STR_COLUMN'] = pd.Series(['', '', ''],
                                             index=self.metadata.index)
     with self.assertRaises(QiitaDBDuplicateHeaderError):
         PrepTemplate.create(self.metadata, self.new_raw_data)
Beispiel #53
0
 def test_exists_true(self):
     """Exists returns true when the PrepTemplate already exists"""
     self.assertTrue(PrepTemplate.exists(self.test_raw_data))
    def test_get_qiime_minimal_mapping_multiple(self):
        # We need to create a prep template in which we have different run
        # prefix values, so we can test this case
        metadata_dict = {
            'SKB8.640193': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status': 'EMP',
                'str_column': 'Value for sample 1',
                'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA',
                'barcodesequence': 'GTCCGCAAGTTA',
                'run_prefix': "s_G1_L001_sequences",
                'platform': 'ILLUMINA',
                'library_construction_protocol': 'AAA',
                'experiment_design_description': 'BBB'
            },
            'SKD8.640184': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status': 'EMP',
                'str_column': 'Value for sample 2',
                'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA',
                'barcodesequence': 'CGTAGAGCTCTC',
                'run_prefix': "s_G1_L001_sequences",
                'platform': 'ILLUMINA',
                'library_construction_protocol': 'AAA',
                'experiment_design_description': 'BBB'
            },
            'SKB7.640196': {
                'center_name': 'ANL',
                'center_project_name': 'Test Project',
                'ebi_submission_accession': None,
                'EMP_status': 'EMP',
                'str_column': 'Value for sample 3',
                'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA',
                'barcodesequence': 'CCTCTGAGAGCT',
                'run_prefix': "s_G1_L002_sequences",
                'platform': 'ILLUMINA',
                'library_construction_protocol': 'AAA',
                'experiment_design_description': 'BBB'
            }
        }
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, RawData(2), Study(1),
                                            '16S')

        out_dir = mkdtemp()

        obs_fps = sorted(_get_qiime_minimal_mapping(prep_template, out_dir))
        exp_fps = sorted([
            join(out_dir, 's_G1_L001_sequences_MMF.txt'),
            join(out_dir, 's_G1_L002_sequences_MMF.txt')
        ])

        # Check that the returned list is as expected
        self.assertEqual(obs_fps, exp_fps)
        # Check that the file exists
        for fp in exp_fps:
            self.assertTrue(exists(fp))
        # Check the contents of the file
        for fp, contents in zip(exp_fps, [EXP_PREP_1, EXP_PREP_2]):
            with open(fp, "U") as f:
                self.assertEqual(f.read(), contents)
Beispiel #55
0
 def test_exists_false(self):
     """Exists returns false when the PrepTemplate does not exists"""
     self.assertFalse(PrepTemplate.exists(self.new_raw_data))
Beispiel #56
0
 def test_metadata_stats_from_sample_and_prep_templates(self):
     obs = metadata_stats_from_sample_and_prep_templates(
         SampleTemplate(1), PrepTemplate(1))
     for k in obs:
         self.assertEqual(obs[k], SUMMARY_STATS[k])
Beispiel #57
0
def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None):
    """Submit a preprocessed data to EBI

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    action : %s
        The action to perform with this data
    send : bool
        True to actually send the files
    fastq_dir_fp : str, optional
        The fastq filepath
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    preprocessed_data_id_str = str(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    investigation_type = None
    new_investigation_type = None

    status = preprocessed_data.submitted_to_insdc_status()
    if status in ('submitting', 'success'):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

    if send:
        # If we intend actually to send the files, then change the status in
        # the database
        preprocessed_data.update_insdc_status('submitting')

    # we need to figure out whether the investigation type is a known one
    # or if we have to submit a "new_investigation_type" to EBI
    current_type = prep_template.investigation_type
    ena_ontology = Ontology(convert_to_id('ENA', 'ontology'))
    if current_type in ena_ontology.terms:
        investigation_type = current_type
    elif current_type in ena_ontology.user_defined_terms:
        investigation_type = 'Other'
        new_investigation_type = current_type
    else:
        # This should never happen
        raise ValueError("Unrecognized investigation type: '%s'. This term "
                         "is neither one of the official terms nor one of the "
                         "user-defined terms in the ENA ontology")

    if fastq_dir_fp is not None:
        # If the user specifies a FASTQ directory, use it

        # Set demux_samples to None so that MetadataTemplate.to_file will put
        # all samples in the template files
        demux_samples = None
    else:
        # If the user does not specify a FASTQ directory, create one and
        # re-serialize the per-sample FASTQs from the demux file
        fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir)
        demux = [
            path for _, path, ftype in preprocessed_data.get_filepaths()
            if ftype == 'preprocessed_demux'
        ][0]

        # Keep track of which files were actually in the demux file so that we
        # can write those rows to the prep and samples templates
        demux_samples = set()

        with open_file(demux) as demux_fh:
            for samp, iterator in to_per_sample_ascii(demux_fh,
                                                      list(sample_template)):
                demux_samples.add(samp)
                sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp)
                with gzopen(sample_fp, 'w') as fh:
                    for record in iterator:
                        fh.write(record)

    output_dir = fastq_dir_fp + '_submission'

    samp_fp = join(fastq_dir_fp, 'sample_metadata.txt')
    prep_fp = join(fastq_dir_fp, 'prep_metadata.txt')

    sample_template.to_file(samp_fp, demux_samples)
    prep_template.to_file(prep_fp, demux_samples)

    # Get specific output directory and set filepaths
    get_output_fp = partial(join, output_dir)
    study_fp = get_output_fp('study.xml')
    sample_fp = get_output_fp('sample.xml')
    experiment_fp = get_output_fp('experiment.xml')
    run_fp = get_output_fp('run.xml')
    submission_fp = get_output_fp('submission.xml')

    if not isdir(output_dir):
        makedirs(output_dir)
    else:
        raise IOError('The output folder already exists: %s' % output_dir)

    with open(samp_fp, 'U') as st, open(prep_fp, 'U') as pt:
        submission = EBISubmission.from_templates_and_per_sample_fastqs(
            preprocessed_data_id_str,
            study.title,
            study.info['study_abstract'],
            investigation_type,
            st,
            pt,
            fastq_dir_fp,
            new_investigation_type=new_investigation_type,
            pmids=study.pmids)

    submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp,
                                   submission_fp, action)

    if send:
        submission.send_sequences()
        study_accession, submission_accession = submission.send_xml()

        if study_accession is None or submission_accession is None:
            preprocessed_data.update_insdc_status('failed')

            raise ComputeError("EBI Submission failed!")
        else:
            preprocessed_data.update_insdc_status('success', study_accession,
                                                  submission_accession)
    else:
        study_accession, submission_accession = None, None

    return study_accession, submission_accession