Python PreprocessedData.get_filepathsの例、qiita_db.data.PreprocessedData.get_filepaths Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_data.py プロジェクト: teravest/qiita

 def test_get_filepaths(self):
     """Correctly returns the filepaths to the preprocessed files"""
     ppd = PreprocessedData(1)
     obs = ppd.get_filepaths()
     exp = [(join(self.db_test_ppd_dir, '1_seqs.fna'), 4),
            (join(self.db_test_ppd_dir, '1_seqs.qual'), 5)]
     self.assertEqual(obs, exp)

コード例 #2

0

ファイルを表示

ファイル: vamps_handlers.py プロジェクト: jwdebelius/qiita

    def post(self, preprocessed_data_id):
        # make sure user is admin and can therefore actually submit to VAMPS
        if self.current_user.level != 'admin':
            raise HTTPError(403, "User %s cannot submit to VAMPS!" %
                            self.current_user.id)
        msg = ''
        msg_level = 'success'
        preprocessed_data = PreprocessedData(preprocessed_data_id)
        state = preprocessed_data.submitted_to_vamps_status()

        demux = [path for _, path, ftype in preprocessed_data.get_filepaths()
                 if ftype == 'preprocessed_demux']
        demux_length = len(demux)

        if state in ('submitting',  'success'):
            msg = "Cannot resubmit! Current state is: %s" % state
            msg_level = 'danger'
        elif demux_length != 1:
            msg = "The study doesn't have demux files or have too many" % state
            msg_level = 'danger'
        else:
            channel = self.current_user.id
            job_id = submit(channel, submit_to_VAMPS,
                            int(preprocessed_data_id))

            self.render('compute_wait.html',
                        job_id=job_id, title='VAMPS Submission',
                        completion_redirect='/compute_complete/%s' % job_id)
            return

        self.display_template(preprocessed_data_id, msg, msg_level)

コード例 #3

0

ファイルを表示

ファイル: vamps_handlers.py プロジェクト: DarcyMyers/qiita

    def post(self, preprocessed_data_id):
        # make sure user is admin and can therefore actually submit to VAMPS
        if self.current_user.level != 'admin':
            raise HTTPError(403, "User %s cannot submit to VAMPS!" %
                            self.current_user.id)
        msg = ''
        msg_level = 'success'
        preprocessed_data = PreprocessedData(preprocessed_data_id)
        state = preprocessed_data.submitted_to_vamps_status()

        demux = [path for _, path, ftype in preprocessed_data.get_filepaths()
                 if ftype == 'preprocessed_demux']
        demux_length = len(demux)

        if state in ('submitting',  'success'):
            msg = "Cannot resubmit! Current state is: %s" % state
            msg_level = 'danger'
        elif demux_length != 1:
            msg = "The study doesn't have demux files or have too many" % state
            msg_level = 'danger'
        else:
            channel = self.current_user.id
            job_id = submit(channel, submit_to_VAMPS,
                            int(preprocessed_data_id))

            self.render('compute_wait.html',
                        job_id=job_id, title='VAMPS Submission',
                        completion_redirect='/compute_complete/%s' % job_id)
            return

        self.display_template(preprocessed_data_id, msg, msg_level)

コード例 #4

0

ファイルを表示

ファイル: commands.py プロジェクト: jenwei/qiita

def submit_VAMPS(preprocessed_data_id):
    """Submit preprocessed data to VAMPS

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    status = preprocessed_data.submitted_to_vamps_status()
    if status in ('submitting', 'success'):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

        preprocessed_data.update_vamps_status('submitting')

    # Generating a tgz
    targz_folder = mkdtemp(prefix=qiita_config.working_dir)
    targz_fp = join(targz_folder, '%d_%d_%d.tgz' % (study.id,
                                                    prep_template.id,
                                                    preprocessed_data.id))
    targz = taropen(targz_fp, mode='w:gz')

    # adding sample/prep
    samp_fp = join(targz_folder, 'sample_metadata.txt')
    sample_template.to_file(samp_fp)
    targz.add(samp_fp, arcname='sample_metadata.txt')
    prep_fp = join(targz_folder, 'prep_metadata.txt')
    prep_template.to_file(prep_fp)
    targz.add(prep_fp, arcname='prep_metadata.txt')

    # adding preprocessed data
    for _, fp, fp_type in preprocessed_data.get_filepaths():
        if fp_type == 'preprocessed_fasta':
            targz.add(fp, arcname='preprocessed_fasta.fna')

    targz.close()

    # submitting
    cmd = ("curl -F user=%s -F pass='******' -F uploadFile=@%s -F "
           "press=UploadFile %s" % (qiita_config.vamps_user,
                                    qiita_config.vamps_pass,
                                    targz_fp,
                                    qiita_config.vamps_url))
    obs, _, _ = system_call(cmd)

    exp = ("<html>\n<head>\n<title>Process Uploaded File</title>\n</head>\n"
           "<body>\n</body>\n</html>")

    if obs != exp:
        preprocessed_data.update_vamps_status('failure')
        return False
    else:
        preprocessed_data.update_vamps_status('success')
        return True

コード例 #5

0

ファイルを表示

def submit_VAMPS(preprocessed_data_id):
    """Submit preprocessed data to VAMPS

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    status = preprocessed_data.submitted_to_vamps_status()
    if status in ('submitting', 'success'):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

        preprocessed_data.update_vamps_status('submitting')

    # Generating a tgz
    targz_folder = mkdtemp(prefix=qiita_config.working_dir)
    targz_fp = join(
        targz_folder,
        '%d_%d_%d.tgz' % (study.id, prep_template.id, preprocessed_data.id))
    targz = taropen(targz_fp, mode='w:gz')

    # adding sample/prep
    samp_fp = join(targz_folder, 'sample_metadata.txt')
    sample_template.to_file(samp_fp)
    targz.add(samp_fp, arcname='sample_metadata.txt')
    prep_fp = join(targz_folder, 'prep_metadata.txt')
    prep_template.to_file(prep_fp)
    targz.add(prep_fp, arcname='prep_metadata.txt')

    # adding preprocessed data
    for _, fp, fp_type in preprocessed_data.get_filepaths():
        if fp_type == 'preprocessed_fasta':
            targz.add(fp, arcname='preprocessed_fasta.fna')

    targz.close()

    # submitting
    cmd = ("curl -F user=%s -F pass='******' -F uploadFile=@%s -F "
           "press=UploadFile %s" %
           (qiita_config.vamps_user, qiita_config.vamps_pass, targz_fp,
            qiita_config.vamps_url))
    obs, _, _ = system_call(cmd)

    exp = ("<html>\n<head>\n<title>Process Uploaded File</title>\n</head>\n"
           "<body>\n</body>\n</html>")

    if obs != exp:
        preprocessed_data.update_vamps_status('failure')
        return False
    else:
        preprocessed_data.update_vamps_status('success')
        return True

コード例 #6

0

ファイルを表示

 def test_get_filepaths(self):
     """Correctly returns the filepaths to the preprocessed files"""
     ppd = PreprocessedData(1)
     obs = ppd.get_filepaths()
     exp = [(join(self.db_test_ppd_dir,
                  '1_seqs.fna'), "preprocessed_sequences"),
            (join(self.db_test_ppd_dir,
                  '1_seqs.qual'), "preprocessed_sequences_qual")]
     self.assertEqual(obs, exp)

コード例 #7

0

ファイルを表示

ファイル: test_commands.py プロジェクト: MarkBruns/qiita

    def test_update_preprocessed_data_from_cmd(self):
        exp_ppd = PreprocessedData(Study(1).preprocessed_data()[0])
        exp_fps = exp_ppd.get_filepaths()

        # The original paths mush exist, but they're not included in the test
        # so create them here
        for _, fp, _ in exp_fps:
            with open(fp, 'w') as f:
                f.write("")

        next_fp_id = get_count('qiita.filepath') + 1
        exp_fps.append(
            (next_fp_id,
             join(self.db_ppd_dir, "%s_split_library_log.txt" % exp_ppd.id),
             'log'))

        ppd = update_preprocessed_data_from_cmd(self.test_slo, 1)

        # Check that the modified preprocessed data is the correct one
        self.assertEqual(ppd.id, exp_ppd.id)

        # Check that the filepaths returned are correct
        # We need to sort the list returned from the db because the ordering
        # on that list is based on db modification time, rather than id
        obs_fps = sorted(ppd.get_filepaths())
        self.assertEqual(obs_fps, sorted(exp_fps))

        # Check that the checksums have been updated
        sql = "SELECT checksum FROM qiita.filepath WHERE filepath_id=%s"

        # Checksum of the fasta file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[0][0],))[0]
        self.assertEqual(obs_checksum, '3532748626')

        # Checksum of the fastq file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[1][0],))[0]
        self.assertEqual(obs_checksum, '2958832064')

        # Checksum of the demux file
        # The checksum is generated dynamically, so the checksum changes
        # We are going to test that the checksum is not the one that was
        # before, which corresponds to an empty file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[2][0],))[0]
        self.assertTrue(isinstance(obs_checksum, str))
        self.assertNotEqual(obs_checksum, '852952723')
        self.assertTrue(len(obs_checksum) > 0)

        # Checksum of the log file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[3][0],))[0]
        self.assertEqual(obs_checksum, '626839734')

コード例 #8

0

ファイルを表示

ファイル: test_data.py プロジェクト: jwdebelius/qiita

 def test_get_filepaths(self):
     """Correctly returns the filepaths to the preprocessed files"""
     ppd = PreprocessedData(1)
     obs = ppd.get_filepaths()
     exp = [(5, join(self.db_test_ppd_dir, '1_seqs.fna'),
             "preprocessed_fasta"),
            (6, join(self.db_test_ppd_dir, '1_seqs.qual'),
             "preprocessed_fastq"),
            (7, join(self.db_test_ppd_dir, '1_seqs.demux'),
             "preprocessed_demux")]
     self.assertEqual(obs, exp)

コード例 #9

0

ファイルを表示

ファイル: test_data.py プロジェクト: BrindhaBioinfo/qiita

 def test_get_filepaths(self):
     """Correctly returns the filepaths to the preprocessed files"""
     ppd = PreprocessedData(1)
     obs = ppd.get_filepaths()
     exp = [(5, join(self.db_test_ppd_dir, '1_seqs.fna'),
             "preprocessed_fasta"),
            (6, join(self.db_test_ppd_dir, '1_seqs.qual'),
             "preprocessed_fastq"),
            (7, join(self.db_test_ppd_dir, '1_seqs.demux'),
             "preprocessed_demux")]
     self.assertEqual(obs, exp)

コード例 #10

0

ファイルを表示

    def test_update_preprocessed_data_from_cmd(self):
        exp_ppd = PreprocessedData(Study(1).preprocessed_data()[0])
        exp_fps = exp_ppd.get_filepaths()

        # The original paths mush exist, but they're not included in the test
        # so create them here
        for _, fp, _ in exp_fps:
            with open(fp, 'w') as f:
                f.write("")

        next_fp_id = get_count('qiita.filepath') + 1
        exp_fps.append((next_fp_id,
                        join(self.db_ppd_dir,
                             "%s_split_library_log.txt" % exp_ppd.id), 'log'))

        ppd = update_preprocessed_data_from_cmd(self.test_slo, 1)

        # Check that the modified preprocessed data is the correct one
        self.assertEqual(ppd.id, exp_ppd.id)

        # Check that the filepaths returned are correct
        # We need to sort the list returned from the db because the ordering
        # on that list is based on db modification time, rather than id
        obs_fps = sorted(ppd.get_filepaths())
        self.assertEqual(obs_fps, exp_fps)

        # Check that the checksums have been updated
        sql = "SELECT checksum FROM qiita.filepath WHERE filepath_id=%s"

        # Checksum of the fasta file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[0][0], ))[0]
        self.assertEqual(obs_checksum, '3532748626')

        # Checksum of the fastq file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[1][0], ))[0]
        self.assertEqual(obs_checksum, '2958832064')

        # Checksum of the demux file
        # The checksum is generated dynamically, so the checksum changes
        # We are going to test that the checksum is not the one that was
        # before, which corresponds to an empty file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[2][0], ))[0]
        self.assertTrue(isinstance(obs_checksum, str))
        self.assertNotEqual(obs_checksum, '852952723')
        self.assertTrue(len(obs_checksum) > 0)

        # Checksum of the log file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[3][0], ))[0]
        self.assertEqual(obs_checksum, '626839734')

コード例 #11

0

ファイルを表示

    def _get_template_variables(self, preprocessed_data_id, callback):
        """Generates all the variables needed to render the template

        Parameters
        ----------
        preprocessed_data_id : int
            The preprocessed data identifier
        callback : function
            The callback function to call with the results once the processing
            is done

        Raises
        ------
        HTTPError
            If the preprocessed data does not have a log file
        """
        # Get the objects and check user privileges
        ppd = PreprocessedData(preprocessed_data_id)
        study = Study(ppd.study)
        check_access(self.current_user, study, raise_error=True)

        # Get the return address
        back_button_path = self.get_argument(
            'back_button_path',
            '/study/description/%d?top_tab=preprocessed_data_tab&sub_tab=%s'
            % (study.id, preprocessed_data_id))

        # Get all the filepaths attached to the preprocessed data
        files_tuples = ppd.get_filepaths()

        # Group the files by filepath type
        files = defaultdict(list)
        for _, fp, fpt in files_tuples:
            files[fpt].append(fp)

        try:
            log_path = files['log'][0]
        except KeyError:
            raise HTTPError(500, "Log file not found in preprocessed data %s"
                                 % preprocessed_data_id)

        with open(log_path, 'U') as f:
            contents = f.read()
            contents = contents.replace('\n', '<br/>')
            contents = contents.replace('\t', '&nbsp;&nbsp;&nbsp;&nbsp;')

        title = 'Preprocessed Data: %d' % preprocessed_data_id

        callback((title, contents, back_button_path))

コード例 #12

0

ファイルを表示

ファイル: description_handlers.py プロジェクト: adamrp/qiita

    def _get_template_variables(self, preprocessed_data_id, callback):
        """Generates all the variables needed to render the template

        Parameters
        ----------
        preprocessed_data_id : int
            The preprocessed data identifier
        callback : function
            The callback function to call with the results once the processing
            is done

        Raises
        ------
        HTTPError
            If the preprocessed data does not have a log file
        """
        # Get the objects and check user privileges
        ppd = PreprocessedData(preprocessed_data_id)
        study = Study(ppd.study)
        check_access(self.current_user, study, raise_error=True)

        # Get the return address
        back_button_path = self.get_argument(
            'back_button_path',
            '/study/description/%d?top_tab=preprocessed_data_tab&sub_tab=%s'
            % (study.id, preprocessed_data_id))

        # Get all the filepaths attached to the preprocessed data
        files_tuples = ppd.get_filepaths()

        # Group the files by filepath type
        files = defaultdict(list)
        for _, fp, fpt in files_tuples:
            files[fpt].append(fp)

        try:
            log_path = files['log'][0]
        except KeyError:
            raise HTTPError(500, "Log file not found in preprocessed data %s"
                                 % preprocessed_data_id)

        with open(log_path, 'U') as f:
            contents = f.read()
            contents = contents.replace('\n', '<br/>')
            contents = contents.replace('\t', '&nbsp;&nbsp;&nbsp;&nbsp;')

        title = 'Preprocessed Data: %d' % preprocessed_data_id

        callback((title, contents, back_button_path))

コード例 #13

0

ファイルを表示

ファイル: vamps_handlers.py プロジェクト: jenwei/qiita

    def display_template(self, preprocessed_data_id, msg, msg_level):
        """Simple function to avoid duplication of code"""
        preprocessed_data_id = int(preprocessed_data_id)
        try:
            preprocessed_data = PreprocessedData(preprocessed_data_id)
        except QiitaDBUnknownIDError:
            raise HTTPError(404, "PreprocessedData %d does not exist!" % preprocessed_data_id)
        else:
            user = self.current_user
            if user.level != "admin":
                raise HTTPError(403, "No permissions of admin, " "get/VAMPSSubmitHandler: %s!" % user.id)

        prep_template = PrepTemplate(preprocessed_data.prep_template)
        sample_template = SampleTemplate(preprocessed_data.study)
        study = Study(preprocessed_data.study)
        stats = [
            ("Number of samples", len(prep_template)),
            ("Number of metadata headers", len(sample_template.categories())),
        ]

        demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == "preprocessed_demux"]
        demux_length = len(demux)

        if not demux_length:
            msg = "Study does not appear to have demultiplexed " "sequences associated"
            msg_level = "danger"
        elif demux_length > 1:
            msg = "Study appears to have multiple demultiplexed files!"
            msg_level = "danger"
        elif demux_length == 1:
            demux_file = demux[0]
            demux_file_stats = demux_stats(demux_file)
            stats.append(("Number of sequences", demux_file_stats.n))
            msg_level = "success"

        self.render(
            "vamps_submission.html",
            study_title=study.title,
            stats=stats,
            message=msg,
            study_id=study.id,
            level=msg_level,
            preprocessed_data_id=preprocessed_data_id,
        )

コード例 #14

0

ファイルを表示

ファイル: ebi_handlers.py プロジェクト: BrindhaBioinfo/qiita

    def display_template(self, preprocessed_data_id, msg, msg_level):
        """Simple function to avoid duplication of code"""
        preprocessed_data_id = int(preprocessed_data_id)
        try:
            preprocessed_data = PreprocessedData(preprocessed_data_id)
        except QiitaDBUnknownIDError:
            raise HTTPError(404, "PreprocessedData %d does not exist!" %
                                 preprocessed_data_id)
        else:
            user = User(self.current_user)
            if user.level != 'admin':
                raise HTTPError(403, "No permissions of admin, "
                                     "get/EBISubmitHandler: %s!" % user.id)

        prep_template = PrepTemplate(preprocessed_data.prep_template)
        sample_template = SampleTemplate(preprocessed_data.study)
        study = Study(preprocessed_data.study)
        stats = [('Number of samples', len(prep_template)),
                 ('Number of metadata headers',
                  len(sample_template.metadata_headers()))]

        demux = [path for _, path, ftype in preprocessed_data.get_filepaths()
                 if ftype == 'preprocessed_demux']
        demux_length = len(demux)

        if not demux_length:
            msg = ("Study does not appear to have demultiplexed "
                   "sequences associated")
            msg_level = 'danger'
        elif demux_length > 1:
            msg = ("Study appears to have multiple demultiplexed files!")
            msg_level = 'danger'
        elif demux_length == 1:
            demux_file = demux[0]
            demux_file_stats = demux_stats(demux_file)
            stats.append(('Number of sequences', demux_file_stats.n))
            msg_level = 'success'

        self.render('ebi_submission.html', user=self.current_user,
                    study_title=study.title, stats=stats, message=msg,
                    study_id=study.id, level=msg_level,
                    preprocessed_data_id=preprocessed_data_id,
                    investigation_type=prep_template.investigation_type)

コード例 #15

0

ファイルを表示

    def display_template(self, preprocessed_data_id, msg, msg_level):
        """Simple function to avoid duplication of code"""
        preprocessed_data_id = int(preprocessed_data_id)
        try:
            preprocessed_data = PreprocessedData(preprocessed_data_id)
        except QiitaDBUnknownIDError:
            raise HTTPError(404, "PreprocessedData %d does not exist!" %
                                 preprocessed_data_id)
        else:
            user = self.current_user
            if user.level != 'admin':
                raise HTTPError(403, "No permissions of admin, "
                                     "get/EBISubmitHandler: %s!" % user.id)

        prep_template = PrepTemplate(preprocessed_data.prep_template)
        sample_template = SampleTemplate(preprocessed_data.study)
        study = Study(preprocessed_data.study)
        stats = [('Number of samples', len(prep_template)),
                 ('Number of metadata headers',
                  len(sample_template.metadata_headers()))]

        demux = [path for _, path, ftype in preprocessed_data.get_filepaths()
                 if ftype == 'preprocessed_demux']
        demux_length = len(demux)

        if not demux_length:
            msg = ("Study does not appear to have demultiplexed "
                   "sequences associated")
            msg_level = 'danger'
        elif demux_length > 1:
            msg = ("Study appears to have multiple demultiplexed files!")
            msg_level = 'danger'
        elif demux_length == 1:
            demux_file = demux[0]
            demux_file_stats = demux_stats(demux_file)
            stats.append(('Number of sequences', demux_file_stats.n))
            msg_level = 'success'

        self.render('ebi_submission.html',
                    study_title=study.title, stats=stats, message=msg,
                    study_id=study.id, level=msg_level,
                    preprocessed_data_id=preprocessed_data_id,
                    investigation_type=prep_template.investigation_type)

コード例 #16

0

ファイルを表示

ファイル: study_handlers.py プロジェクト: gustabf/qiita

    def get(self, preprocessed_data_id):
        ppd_id = int(preprocessed_data_id)
        ppd = PreprocessedData(ppd_id)
        study = Study(ppd.study)
        check_access(User(self.current_user), study, raise_error=True)

        back_button_path = self.get_argument(
            'back_button_path', '/study/description/%d' % study.id)

        files_tuples = ppd.get_filepaths()
        files = defaultdict(list)

        for fpid, fp, fpt in files_tuples:
            files[fpt].append(fp)

        with open(files['log'][0], 'U') as f:
            contents = f.read()
            contents = contents.replace('\n', '<br/>')
            contents = contents.replace('\t', '&nbsp;&nbsp;&nbsp;&nbsp;')

        title = ('Preprocessed Data: %d' % ppd_id)

        self.render('text_file.html', title=title, contents=contents,
                    user=self.current_user, back_button_path=back_button_path)

コード例 #17

0

ファイルを表示

ファイル: ebi_handlers.py プロジェクト: MarkBruns/qiita

    def display_template(self, preprocessed_data_id, msg, msg_level):
        """Simple function to avoid duplication of code"""
        preprocessed_data_id = int(preprocessed_data_id)
        try:
            preprocessed_data = PreprocessedData(preprocessed_data_id)
        except QiitaDBUnknownIDError:
            raise HTTPError(404, "PreprocessedData %d does not exist!" %
                                 preprocessed_data_id)
        else:
            user = self.current_user
            if user.level != 'admin':
                raise HTTPError(403, "No permissions of admin, "
                                     "get/EBISubmitHandler: %s!" % user.id)

        prep_template = PrepTemplate(preprocessed_data.prep_template)
        sample_template = SampleTemplate(preprocessed_data.study)
        study = Study(preprocessed_data.study)
        stats = [('Number of samples', len(prep_template)),
                 ('Number of metadata headers',
                  len(sample_template.categories()))]

        demux = [path for _, path, ftype in preprocessed_data.get_filepaths()
                 if ftype == 'preprocessed_demux']
        demux_length = len(demux)

        if not demux_length:
            msg = ("Study does not appear to have demultiplexed "
                   "sequences associated")
            msg_level = 'danger'
        elif demux_length > 1:
            msg = ("Study appears to have multiple demultiplexed files!")
            msg_level = 'danger'
        elif demux_length == 1:
            demux_file = demux[0]
            demux_file_stats = demux_stats(demux_file)
            stats.append(('Number of sequences', demux_file_stats.n))
            msg_level = 'success'

        # Check if the templates have all the required columns for EBI
        pt_missing_cols = prep_template.check_restrictions(
            [PREP_TEMPLATE_COLUMNS['EBI']])
        st_missing_cols = sample_template.check_restrictions(
            [SAMPLE_TEMPLATE_COLUMNS['EBI']])
        allow_submission = (len(pt_missing_cols) == 0 and
                            len(st_missing_cols) == 0)

        if not allow_submission:
            msg_list = ["Submission to EBI disabled due to missing columns:"]
            if len(pt_missing_cols) > 0:
                msg_list.append("Columns missing in prep template: %s"
                                % ', '.join(pt_missing_cols))
            if len(st_missing_cols) > 0:
                msg_list.append("Columns missing in sample template: %s"
                                % ', '.join(st_missing_cols))
            ebi_disabled_msg = "<br/>".join(msg_list)
        else:
            ebi_disabled_msg = None

        self.render('ebi_submission.html',
                    study_title=study.title, stats=stats, message=msg,
                    study_id=study.id, level=msg_level,
                    preprocessed_data_id=preprocessed_data_id,
                    investigation_type=prep_template.investigation_type,
                    allow_submission=allow_submission,
                    ebi_disabled_msg=ebi_disabled_msg)

コード例 #18

0

ファイルを表示

ファイル: commands.py プロジェクト: MarkBruns/qiita

def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None):
    """Submit a preprocessed data to EBI

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    action : %s
        The action to perform with this data
    send : bool
        True to actually send the files
    fastq_dir_fp : str, optional
        The fastq filepath

    Notes
    -----
    If fastq_dir_fp is passed, it must not contain any empty files, or
    gzipped empty files
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    preprocessed_data_id_str = str(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    investigation_type = None
    new_investigation_type = None

    status = preprocessed_data.submitted_to_insdc_status()
    if status in ("submitting", "success"):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

    if send:
        # If we intend actually to send the files, then change the status in
        # the database
        preprocessed_data.update_insdc_status("submitting")

    # we need to figure out whether the investigation type is a known one
    # or if we have to submit a "new_investigation_type" to EBI
    current_type = prep_template.investigation_type
    ena_ontology = Ontology(convert_to_id("ENA", "ontology"))
    if current_type in ena_ontology.terms:
        investigation_type = current_type
    elif current_type in ena_ontology.user_defined_terms:
        investigation_type = "Other"
        new_investigation_type = current_type
    else:
        # This should never happen
        raise ValueError(
            "Unrecognized investigation type: '%s'. This term "
            "is neither one of the official terms nor one of the "
            "user-defined terms in the ENA ontology"
        )

    if fastq_dir_fp is not None:
        # If the user specifies a FASTQ directory, use it

        # Set demux_samples to None so that MetadataTemplate.to_file will put
        # all samples in the template files
        demux_samples = None
    else:
        # If the user does not specify a FASTQ directory, create one and
        # re-serialize the per-sample FASTQs from the demux file
        fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir)
        demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == "preprocessed_demux"][0]

        # Keep track of which files were actually in the demux file so that we
        # can write those rows to the prep and samples templates
        demux_samples = set()

        with open_file(demux) as demux_fh:
            for samp, iterator in to_per_sample_ascii(demux_fh, list(sample_template)):
                demux_samples.add(samp)
                sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp)
                wrote_sequences = False
                with gzopen(sample_fp, "w") as fh:
                    for record in iterator:
                        fh.write(record)
                        wrote_sequences = True

                if not wrote_sequences:
                    remove(sample_fp)

    output_dir = fastq_dir_fp + "_submission"

    samp_fp = join(fastq_dir_fp, "sample_metadata.txt")
    prep_fp = join(fastq_dir_fp, "prep_metadata.txt")

    sample_template.to_file(samp_fp, demux_samples)
    prep_template.to_file(prep_fp, demux_samples)

    # Get specific output directory and set filepaths
    get_output_fp = partial(join, output_dir)
    study_fp = get_output_fp("study.xml")
    sample_fp = get_output_fp("sample.xml")
    experiment_fp = get_output_fp("experiment.xml")
    run_fp = get_output_fp("run.xml")
    submission_fp = get_output_fp("submission.xml")

    if not isdir(output_dir):
        makedirs(output_dir)
    else:
        raise IOError("The output folder already exists: %s" % output_dir)

    with open(samp_fp, "U") as st, open(prep_fp, "U") as pt:
        submission = EBISubmission.from_templates_and_per_sample_fastqs(
            preprocessed_data_id_str,
            study.title,
            study.info["study_abstract"],
            investigation_type,
            st,
            pt,
            fastq_dir_fp,
            new_investigation_type=new_investigation_type,
            pmids=study.pmids,
        )

    submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp, submission_fp, action)

    if send:
        submission.send_sequences()
        study_accession, submission_accession = submission.send_xml()

        if study_accession is None or submission_accession is None:
            preprocessed_data.update_insdc_status("failed")

            raise ComputeError("EBI Submission failed!")
        else:
            preprocessed_data.update_insdc_status("success", study_accession, submission_accession)
    else:
        study_accession, submission_accession = None, None

    return study_accession, submission_accession

コード例 #19

0

ファイルを表示

def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None):
    """Submit a preprocessed data to EBI

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    action : %s
        The action to perform with this data
    send : bool
        True to actually send the files
    fastq_dir_fp : str, optional
        The fastq filepath
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    preprocessed_data_id_str = str(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    investigation_type = None
    new_investigation_type = None

    status = preprocessed_data.submitted_to_insdc_status()
    if status in ('submitting', 'success'):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

    if send:
        # If we intend actually to send the files, then change the status in
        # the database
        preprocessed_data.update_insdc_status('submitting')

    # we need to figure out whether the investigation type is a known one
    # or if we have to submit a "new_investigation_type" to EBI
    current_type = prep_template.investigation_type
    ena_ontology = Ontology(convert_to_id('ENA', 'ontology'))
    if current_type in ena_ontology.terms:
        investigation_type = current_type
    elif current_type in ena_ontology.user_defined_terms:
        investigation_type = 'Other'
        new_investigation_type = current_type
    else:
        # This should never happen
        raise ValueError("Unrecognized investigation type: '%s'. This term "
                         "is neither one of the official terms nor one of the "
                         "user-defined terms in the ENA ontology")

    if fastq_dir_fp is not None:
        # If the user specifies a FASTQ directory, use it

        # Set demux_samples to None so that MetadataTemplate.to_file will put
        # all samples in the template files
        demux_samples = None
    else:
        # If the user does not specify a FASTQ directory, create one and
        # re-serialize the per-sample FASTQs from the demux file
        fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir)
        demux = [
            path for _, path, ftype in preprocessed_data.get_filepaths()
            if ftype == 'preprocessed_demux'
        ][0]

        # Keep track of which files were actually in the demux file so that we
        # can write those rows to the prep and samples templates
        demux_samples = set()

        with open_file(demux) as demux_fh:
            for samp, iterator in to_per_sample_ascii(demux_fh,
                                                      list(sample_template)):
                demux_samples.add(samp)
                sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp)
                with gzopen(sample_fp, 'w') as fh:
                    for record in iterator:
                        fh.write(record)

    output_dir = fastq_dir_fp + '_submission'

    samp_fp = join(fastq_dir_fp, 'sample_metadata.txt')
    prep_fp = join(fastq_dir_fp, 'prep_metadata.txt')

    sample_template.to_file(samp_fp, demux_samples)
    prep_template.to_file(prep_fp, demux_samples)

    # Get specific output directory and set filepaths
    get_output_fp = partial(join, output_dir)
    study_fp = get_output_fp('study.xml')
    sample_fp = get_output_fp('sample.xml')
    experiment_fp = get_output_fp('experiment.xml')
    run_fp = get_output_fp('run.xml')
    submission_fp = get_output_fp('submission.xml')

    if not isdir(output_dir):
        makedirs(output_dir)
    else:
        raise IOError('The output folder already exists: %s' % output_dir)

    with open(samp_fp, 'U') as st, open(prep_fp, 'U') as pt:
        submission = EBISubmission.from_templates_and_per_sample_fastqs(
            preprocessed_data_id_str,
            study.title,
            study.info['study_abstract'],
            investigation_type,
            st,
            pt,
            fastq_dir_fp,
            new_investigation_type=new_investigation_type,
            pmids=study.pmids)

    submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp,
                                   submission_fp, action)

    if send:
        submission.send_sequences()
        study_accession, submission_accession = submission.send_xml()

        if study_accession is None or submission_accession is None:
            preprocessed_data.update_insdc_status('failed')

            raise ComputeError("EBI Submission failed!")
        else:
            preprocessed_data.update_insdc_status('success', study_accession,
                                                  submission_accession)
    else:
        study_accession, submission_accession = None, None

    return study_accession, submission_accession