Example #1
0
    def post(self, preprocessed_data_id):
        # make sure user is admin and can therefore actually submit to VAMPS
        if self.current_user.level != 'admin':
            raise HTTPError(403, "User %s cannot submit to VAMPS!" %
                            self.current_user.id)
        msg = ''
        msg_level = 'success'
        preprocessed_data = PreprocessedData(preprocessed_data_id)
        state = preprocessed_data.submitted_to_vamps_status()

        demux = [path for _, path, ftype in preprocessed_data.get_filepaths()
                 if ftype == 'preprocessed_demux']
        demux_length = len(demux)

        if state in ('submitting',  'success'):
            msg = "Cannot resubmit! Current state is: %s" % state
            msg_level = 'danger'
        elif demux_length != 1:
            msg = "The study doesn't have demux files or have too many" % state
            msg_level = 'danger'
        else:
            channel = self.current_user.id
            job_id = submit(channel, submit_to_VAMPS,
                            int(preprocessed_data_id))

            self.render('compute_wait.html',
                        job_id=job_id, title='VAMPS Submission',
                        completion_redirect='/compute_complete/%s' % job_id)
            return

        self.display_template(preprocessed_data_id, msg, msg_level)
Example #2
0
    def post(self, preprocessed_data_id):
        # make sure user is admin and can therefore actually submit to EBI
        if User(self.current_user).level != 'admin':
            raise HTTPError(403, "User %s cannot submit to EBI!" %
                            self.current_user)
        submission_type = self.get_argument('submission_type')

        if submission_type not in ['ADD', 'MODIFY']:
            raise HTTPError(403, "User: %s, %s is not a recognized submission "
                            "type" % (self.current_user, submission_type))

        msg = ''
        msg_level = 'success'
        preprocessed_data = PreprocessedData(preprocessed_data_id)
        state = preprocessed_data.submitted_to_insdc_status()
        if state == 'submitting':
            msg = "Cannot resubmit! Current state is: %s" % state
            msg_level = 'danger'
        elif state == 'success' and submission_type == "ADD":
            msg = "Cannot resubmit! Current state is: %s, use MODIFY" % state
            msg_level = 'danger'
        else:
            channel = self.current_user
            job_id = submit(channel, submit_to_ebi, int(preprocessed_data_id),
                            submission_type)

            self.render('compute_wait.html', user=self.current_user,
                        job_id=job_id, title='EBI Submission',
                        completion_redirect='/compute_complete/%s' % job_id)
            return

        self.display_template(preprocessed_data_id, msg, msg_level)
Example #3
0
    def post(self, preprocessed_data_id):
        # make sure user is admin and can therefore actually submit to VAMPS
        if self.current_user.level != 'admin':
            raise HTTPError(403, "User %s cannot submit to VAMPS!" %
                            self.current_user.id)
        msg = ''
        msg_level = 'success'
        preprocessed_data = PreprocessedData(preprocessed_data_id)
        state = preprocessed_data.submitted_to_vamps_status()

        demux = [path for _, path, ftype in preprocessed_data.get_filepaths()
                 if ftype == 'preprocessed_demux']
        demux_length = len(demux)

        if state in ('submitting',  'success'):
            msg = "Cannot resubmit! Current state is: %s" % state
            msg_level = 'danger'
        elif demux_length != 1:
            msg = "The study doesn't have demux files or have too many" % state
            msg_level = 'danger'
        else:
            channel = self.current_user.id
            job_id = submit(channel, submit_to_VAMPS,
                            int(preprocessed_data_id))

            self.render('compute_wait.html',
                        job_id=job_id, title='VAMPS Submission',
                        completion_redirect='/compute_complete/%s' % job_id)
            return

        self.display_template(preprocessed_data_id, msg, msg_level)
Example #4
0
 def test_get_filepaths(self):
     """Correctly returns the filepaths to the preprocessed files"""
     ppd = PreprocessedData(1)
     obs = ppd.get_filepaths()
     exp = [(join(self.db_test_ppd_dir, '1_seqs.fna'), 4),
            (join(self.db_test_ppd_dir, '1_seqs.qual'), 5)]
     self.assertEqual(obs, exp)
Example #5
0
    def post(self, preprocessed_data_id):
        user = self.current_user
        # make sure user is admin and can therefore actually submit to EBI
        if user.level != 'admin':
            raise HTTPError(403, "User %s cannot submit to EBI!" %
                            user.id)
        submission_type = self.get_argument('submission_type')

        if submission_type not in ['ADD', 'MODIFY']:
            raise HTTPError(403, "User: %s, %s is not a recognized submission "
                            "type" % (user.id, submission_type))

        msg = ''
        msg_level = 'success'
        preprocessed_data = PreprocessedData(preprocessed_data_id)
        state = preprocessed_data.submitted_to_insdc_status()
        if state == 'submitting':
            msg = "Cannot resubmit! Current state is: %s" % state
            msg_level = 'danger'
        elif state == 'success' and submission_type == "ADD":
            msg = "Cannot resubmit! Current state is: %s, use MODIFY" % state
            msg_level = 'danger'
        else:
            channel = user.id
            job_id = submit(channel, submit_to_ebi, int(preprocessed_data_id),
                            submission_type)

            self.render('compute_wait.html',
                        job_id=job_id, title='EBI Submission',
                        completion_redirect='/compute_complete/%s' % job_id)
            return

        self.display_template(preprocessed_data_id, msg, msg_level)
Example #6
0
    def delete_preprocessed_data(self, study, user, callback):
        """Delete the selected preprocessed data

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        ppd_id = int(self.get_argument('preprocessed_data_id'))

        try:
            PreprocessedData.delete(ppd_id)
            msg = ("Preprocessed data %d has been deleted" % ppd_id)
            msg_level = "success"
            ppd_id = None
        except Exception as e:
            msg = ("Couldn't remove preprocessed data %d: %s" %
                   (ppd_id, str(e)))
            msg_level = "danger"

        callback((msg, msg_level, 'preprocessed_data_tab', ppd_id, None))
Example #7
0
 def test_is_submitted_to_insdc(self):
     """is_submitted_to_insdc works correctly"""
     # False case
     pd = PreprocessedData(1)
     self.assertTrue(pd.is_submitted_to_insdc())
     # True case
     pd = PreprocessedData(2)
     self.assertFalse(pd.is_submitted_to_insdc())
Example #8
0
 def test_submitted_to_insdc_status(self):
     """submitted_to_insdc_status works correctly"""
     # False case
     pd = PreprocessedData(1)
     self.assertEqual(pd.submitted_to_insdc_status(), 'submitting')
     # True case
     pd = PreprocessedData(2)
     self.assertEqual(pd.submitted_to_insdc_status(), 'not submitted')
Example #9
0
 def test_get_filepaths(self):
     """Correctly returns the filepaths to the preprocessed files"""
     ppd = PreprocessedData(1)
     obs = ppd.get_filepaths()
     exp = [(join(self.db_test_ppd_dir,
                  '1_seqs.fna'), "preprocessed_sequences"),
            (join(self.db_test_ppd_dir,
                  '1_seqs.qual'), "preprocessed_sequences_qual")]
     self.assertEqual(obs, exp)
Example #10
0
def submit_VAMPS(preprocessed_data_id):
    """Submit preprocessed data to VAMPS

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    status = preprocessed_data.submitted_to_vamps_status()
    if status in ('submitting', 'success'):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

        preprocessed_data.update_vamps_status('submitting')

    # Generating a tgz
    targz_folder = mkdtemp(prefix=qiita_config.working_dir)
    targz_fp = join(
        targz_folder,
        '%d_%d_%d.tgz' % (study.id, prep_template.id, preprocessed_data.id))
    targz = taropen(targz_fp, mode='w:gz')

    # adding sample/prep
    samp_fp = join(targz_folder, 'sample_metadata.txt')
    sample_template.to_file(samp_fp)
    targz.add(samp_fp, arcname='sample_metadata.txt')
    prep_fp = join(targz_folder, 'prep_metadata.txt')
    prep_template.to_file(prep_fp)
    targz.add(prep_fp, arcname='prep_metadata.txt')

    # adding preprocessed data
    for _, fp, fp_type in preprocessed_data.get_filepaths():
        if fp_type == 'preprocessed_fasta':
            targz.add(fp, arcname='preprocessed_fasta.fna')

    targz.close()

    # submitting
    cmd = ("curl -F user=%s -F pass='******' -F uploadFile=@%s -F "
           "press=UploadFile %s" %
           (qiita_config.vamps_user, qiita_config.vamps_pass, targz_fp,
            qiita_config.vamps_url))
    obs, _, _ = system_call(cmd)

    exp = ("<html>\n<head>\n<title>Process Uploaded File</title>\n</head>\n"
           "<body>\n</body>\n</html>")

    if obs != exp:
        preprocessed_data.update_vamps_status('failure')
        return False
    else:
        preprocessed_data.update_vamps_status('success')
        return True
Example #11
0
def _insert_preprocessed_data(study, params, prep_template, slq_out,
                              **kwargs):
    """Inserts the preprocessed data to the database

    Parameters
    ----------
    study : Study
        The study to preprocess
    params : BaseParameters
        The parameters to use for preprocessing
    prep_template : PrepTemplate
        The prep template to use for the preprocessing
    slq_out : str
        Path to the split_libraries_fastq.py output directory
    kwargs: ignored
        Necessary to include to support execution via moi.

    Raises
    ------
    ValueError
        If the preprocessed output directory does not contain all the expected
        files
    """
    from os.path import exists, join
    from functools import partial
    from qiita_db.data import PreprocessedData

    # The filepaths that we are interested in are:
    #   1) seqs.fna -> demultiplexed fasta file
    #   2) seqs.fastq -> demultiplexed fastq file
    #   3) seqs.demux -> demultiplexed HDF5 file

    path_builder = partial(join, slq_out)
    fasta_fp = path_builder('seqs.fna')
    fastq_fp = path_builder('seqs.fastq')
    demux_fp = path_builder('seqs.demux')
    log_fp = path_builder('split_library_log.txt')

    # Check that all the files exist
    if not (exists(fasta_fp) and exists(demux_fp) and exists(log_fp)):
        raise ValueError("The output directory %s does not contain all the "
                         "expected files." % slq_out)

    filepaths = [(fasta_fp, "preprocessed_fasta"),
                 (demux_fp, "preprocessed_demux"),
                 (log_fp, "log")]

    if exists(fastq_fp):
        filepaths.append((fastq_fp, "preprocessed_fastq"))

    PreprocessedData.create(study, params._table, params.id, filepaths,
                            prep_template)

    # Change the prep_template status to success
    prep_template.preprocessing_status = 'success'
Example #12
0
    def test_update_preprocessed_data_from_cmd(self):
        exp_ppd = PreprocessedData(Study(1).preprocessed_data()[0])
        exp_fps = exp_ppd.get_filepaths()

        # The original paths mush exist, but they're not included in the test
        # so create them here
        for _, fp, _ in exp_fps:
            with open(fp, 'w') as f:
                f.write("")

        next_fp_id = get_count('qiita.filepath') + 1
        exp_fps.append(
            (next_fp_id,
             join(self.db_ppd_dir, "%s_split_library_log.txt" % exp_ppd.id),
             'log'))

        ppd = update_preprocessed_data_from_cmd(self.test_slo, 1)

        # Check that the modified preprocessed data is the correct one
        self.assertEqual(ppd.id, exp_ppd.id)

        # Check that the filepaths returned are correct
        # We need to sort the list returned from the db because the ordering
        # on that list is based on db modification time, rather than id
        obs_fps = sorted(ppd.get_filepaths())
        self.assertEqual(obs_fps, sorted(exp_fps))

        # Check that the checksums have been updated
        sql = "SELECT checksum FROM qiita.filepath WHERE filepath_id=%s"

        # Checksum of the fasta file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[0][0],))[0]
        self.assertEqual(obs_checksum, '3532748626')

        # Checksum of the fastq file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[1][0],))[0]
        self.assertEqual(obs_checksum, '2958832064')

        # Checksum of the demux file
        # The checksum is generated dynamically, so the checksum changes
        # We are going to test that the checksum is not the one that was
        # before, which corresponds to an empty file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[2][0],))[0]
        self.assertTrue(isinstance(obs_checksum, str))
        self.assertNotEqual(obs_checksum, '852952723')
        self.assertTrue(len(obs_checksum) > 0)

        # Checksum of the log file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[3][0],))[0]
        self.assertEqual(obs_checksum, '626839734')
Example #13
0
 def test_get_filepaths(self):
     """Correctly returns the filepaths to the preprocessed files"""
     ppd = PreprocessedData(1)
     obs = ppd.get_filepaths()
     exp = [(5, join(self.db_test_ppd_dir, '1_seqs.fna'),
             "preprocessed_fasta"),
            (6, join(self.db_test_ppd_dir, '1_seqs.qual'),
             "preprocessed_fastq"),
            (7, join(self.db_test_ppd_dir, '1_seqs.demux'),
             "preprocessed_demux")]
     self.assertEqual(obs, exp)
Example #14
0
    def test_processing_status(self):
        """processing_status works correctly"""
        # Processed case
        ppd = PreprocessedData(1)
        self.assertEqual(ppd.processing_status, 'not_processed')

        # not processed case
        ppd = PreprocessedData.create(self.study, self.params_table,
                                      self.params_id, self.filepaths,
                                      data_type="18S")
        self.assertEqual(ppd.processing_status, 'not_processed')
Example #15
0
 def test_get_filepaths(self):
     """Correctly returns the filepaths to the preprocessed files"""
     ppd = PreprocessedData(1)
     obs = ppd.get_filepaths()
     exp = [(5, join(self.db_test_ppd_dir, '1_seqs.fna'),
             "preprocessed_fasta"),
            (6, join(self.db_test_ppd_dir, '1_seqs.qual'),
             "preprocessed_fastq"),
            (7, join(self.db_test_ppd_dir, '1_seqs.demux'),
             "preprocessed_demux")]
     self.assertEqual(obs, exp)
Example #16
0
    def test_update_preprocessed_data_from_cmd(self):
        exp_ppd = PreprocessedData(Study(1).preprocessed_data()[0])
        exp_fps = exp_ppd.get_filepaths()

        # The original paths mush exist, but they're not included in the test
        # so create them here
        for _, fp, _ in exp_fps:
            with open(fp, 'w') as f:
                f.write("")

        next_fp_id = get_count('qiita.filepath') + 1
        exp_fps.append((next_fp_id,
                        join(self.db_ppd_dir,
                             "%s_split_library_log.txt" % exp_ppd.id), 'log'))

        ppd = update_preprocessed_data_from_cmd(self.test_slo, 1)

        # Check that the modified preprocessed data is the correct one
        self.assertEqual(ppd.id, exp_ppd.id)

        # Check that the filepaths returned are correct
        # We need to sort the list returned from the db because the ordering
        # on that list is based on db modification time, rather than id
        obs_fps = sorted(ppd.get_filepaths())
        self.assertEqual(obs_fps, exp_fps)

        # Check that the checksums have been updated
        sql = "SELECT checksum FROM qiita.filepath WHERE filepath_id=%s"

        # Checksum of the fasta file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[0][0], ))[0]
        self.assertEqual(obs_checksum, '3532748626')

        # Checksum of the fastq file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[1][0], ))[0]
        self.assertEqual(obs_checksum, '2958832064')

        # Checksum of the demux file
        # The checksum is generated dynamically, so the checksum changes
        # We are going to test that the checksum is not the one that was
        # before, which corresponds to an empty file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[2][0], ))[0]
        self.assertTrue(isinstance(obs_checksum, str))
        self.assertNotEqual(obs_checksum, '852952723')
        self.assertTrue(len(obs_checksum) > 0)

        # Checksum of the log file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[3][0], ))[0]
        self.assertEqual(obs_checksum, '626839734')
Example #17
0
def submit_VAMPS(preprocessed_data_id):
    """Submit preprocessed data to VAMPS

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    status = preprocessed_data.submitted_to_vamps_status()
    if status in ('submitting', 'success'):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

        preprocessed_data.update_vamps_status('submitting')

    # Generating a tgz
    targz_folder = mkdtemp(prefix=qiita_config.working_dir)
    targz_fp = join(targz_folder, '%d_%d_%d.tgz' % (study.id,
                                                    prep_template.id,
                                                    preprocessed_data.id))
    targz = taropen(targz_fp, mode='w:gz')

    # adding sample/prep
    samp_fp = join(targz_folder, 'sample_metadata.txt')
    sample_template.to_file(samp_fp)
    targz.add(samp_fp, arcname='sample_metadata.txt')
    prep_fp = join(targz_folder, 'prep_metadata.txt')
    prep_template.to_file(prep_fp)
    targz.add(prep_fp, arcname='prep_metadata.txt')

    # adding preprocessed data
    for _, fp, fp_type in preprocessed_data.get_filepaths():
        if fp_type == 'preprocessed_fasta':
            targz.add(fp, arcname='preprocessed_fasta.fna')

    targz.close()

    # submitting
    cmd = ("curl -F user=%s -F pass='******' -F uploadFile=@%s -F "
           "press=UploadFile %s" % (qiita_config.vamps_user,
                                    qiita_config.vamps_pass,
                                    targz_fp,
                                    qiita_config.vamps_url))
    obs, _, _ = system_call(cmd)

    exp = ("<html>\n<head>\n<title>Process Uploaded File</title>\n</head>\n"
           "<body>\n</body>\n</html>")

    if obs != exp:
        preprocessed_data.update_vamps_status('failure')
        return False
    else:
        preprocessed_data.update_vamps_status('success')
        return True
Example #18
0
 def test_create_error_data_type(self):
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study,
                                 "preprocessed_sequence_illumina_params",
                                 self.params_id, self.filepaths,
                                 data_type="Metabolomics")
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study,
                                 "preprocessed_sequence_illumina_params",
                                 self.params_id, self.filepaths,
                                 data_type="Metabolomics",
                                 prep_template=self.prep_template)
Example #19
0
 def test_create_error_data_type(self):
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study,
                                 "preprocessed_sequence_illumina_params",
                                 self.params_id, self.filepaths,
                                 data_type="Metabolomics")
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study,
                                 "preprocessed_sequence_illumina_params",
                                 self.params_id, self.filepaths,
                                 data_type="Metabolomics",
                                 prep_template=self.prep_template)
Example #20
0
def _insert_preprocessed_data(study, params, prep_template, slq_out, **kwargs):
    """Inserts the preprocessed data to the database

    Parameters
    ----------
    study : Study
        The study to preprocess
    params : BaseParameters
        The parameters to use for preprocessing
    prep_template : PrepTemplate
        The prep template to use for the preprocessing
    slq_out : str
        Path to the split_libraries_fastq.py output directory
    kwargs: ignored
        Necessary to include to support execution via moi.

    Raises
    ------
    ValueError
        If the preprocessed output directory does not contain all the expected
        files
    """
    from os.path import exists, join
    from functools import partial
    from qiita_db.data import PreprocessedData

    # The filepaths that we are interested in are:
    #   1) seqs.fna -> demultiplexed fasta file
    #   2) seqs.fastq -> demultiplexed fastq file
    #   3) seqs.demux -> demultiplexed HDF5 file

    path_builder = partial(join, slq_out)
    fasta_fp = path_builder('seqs.fna')
    fastq_fp = path_builder('seqs.fastq')
    demux_fp = path_builder('seqs.demux')
    log_fp = path_builder('split_library_log.txt')

    # Check that all the files exist
    if not (exists(fasta_fp) and exists(demux_fp) and exists(log_fp)):
        raise ValueError("The output directory %s does not contain all the "
                         "expected files." % slq_out)

    filepaths = [(fasta_fp, "preprocessed_fasta"),
                 (demux_fp, "preprocessed_demux"), (log_fp, "log")]

    if exists(fastq_fp):
        filepaths.append((fastq_fp, "preprocessed_fastq"))

    PreprocessedData.create(study, params._table, params.id, filepaths,
                            prep_template)

    # Change the prep_template status to success
    prep_template.preprocessing_status = 'success'
Example #21
0
def processor(preprocessed_data_id, param_id, param_constructor):
    """Dispatch the processor work"""
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    params = param_constructor(param_id)

    sp = StudyProcessor()
    try:
        process_out = sp(preprocessed_data, params)
    except Exception as e:
        error_msg = ''.join(format_exception_only(e, exc_info()))
        preprocessed_data.processing_status = "failed: %s" % error_msg
        process_out = None

    return process_out
Example #22
0
 def test_create_error_dynamic_table(self):
     """Raises an error if the preprocessed_params_table does not exist"""
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study,
                                 "foo",
                                 self.params_id,
                                 self.filepaths,
                                 data_type="18S")
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study,
                                 "preprocessed_foo",
                                 self.params_id,
                                 self.filepaths,
                                 data_type="18S")
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study,
                                 "foo_params",
                                 self.params_id,
                                 self.filepaths,
                                 data_type="18S")
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study,
                                 "preprocessed_foo_params",
                                 self.params_id,
                                 self.filepaths,
                                 data_type="18S")
Example #23
0
    def _get_template_variables(self, preprocessed_data_id, callback):
        """Generates all the variables needed to render the template

        Parameters
        ----------
        preprocessed_data_id : int
            The preprocessed data identifier
        callback : function
            The callback function to call with the results once the processing
            is done

        Raises
        ------
        HTTPError
            If the preprocessed data does not have a log file
        """
        # Get the objects and check user privileges
        ppd = PreprocessedData(preprocessed_data_id)
        study = Study(ppd.study)
        check_access(self.current_user, study, raise_error=True)

        # Get the return address
        back_button_path = self.get_argument(
            'back_button_path',
            '/study/description/%d?top_tab=preprocessed_data_tab&sub_tab=%s'
            % (study.id, preprocessed_data_id))

        # Get all the filepaths attached to the preprocessed data
        files_tuples = ppd.get_filepaths()

        # Group the files by filepath type
        files = defaultdict(list)
        for _, fp, fpt in files_tuples:
            files[fpt].append(fp)

        try:
            log_path = files['log'][0]
        except KeyError:
            raise HTTPError(500, "Log file not found in preprocessed data %s"
                                 % preprocessed_data_id)

        with open(log_path, 'U') as f:
            contents = f.read()
            contents = contents.replace('\n', '<br/>')
            contents = contents.replace('\t', '&nbsp;&nbsp;&nbsp;&nbsp;')

        title = 'Preprocessed Data: %d' % preprocessed_data_id

        callback((title, contents, back_button_path))
Example #24
0
    def _get_template_variables(self, preprocessed_data_id, callback):
        """Generates all the variables needed to render the template

        Parameters
        ----------
        preprocessed_data_id : int
            The preprocessed data identifier
        callback : function
            The callback function to call with the results once the processing
            is done

        Raises
        ------
        HTTPError
            If the preprocessed data does not have a log file
        """
        # Get the objects and check user privileges
        ppd = PreprocessedData(preprocessed_data_id)
        study = Study(ppd.study)
        check_access(self.current_user, study, raise_error=True)

        # Get the return address
        back_button_path = self.get_argument(
            'back_button_path',
            '/study/description/%d?top_tab=preprocessed_data_tab&sub_tab=%s'
            % (study.id, preprocessed_data_id))

        # Get all the filepaths attached to the preprocessed data
        files_tuples = ppd.get_filepaths()

        # Group the files by filepath type
        files = defaultdict(list)
        for _, fp, fpt in files_tuples:
            files[fpt].append(fp)

        try:
            log_path = files['log'][0]
        except KeyError:
            raise HTTPError(500, "Log file not found in preprocessed data %s"
                                 % preprocessed_data_id)

        with open(log_path, 'U') as f:
            contents = f.read()
            contents = contents.replace('\n', '<br/>')
            contents = contents.replace('\t', '&nbsp;&nbsp;&nbsp;&nbsp;')

        title = 'Preprocessed Data: %d' % preprocessed_data_id

        callback((title, contents, back_button_path))
Example #25
0
 def test_processing_status_setter_valueerror(self):
     """Raises an error if the processing status is not recognized"""
     ppd = PreprocessedData.create(self.study, self.params_table,
                                   self.params_id, self.filepaths,
                                   data_type="18S")
     with self.assertRaises(ValueError):
         ppd.processing_status = 'not a valid state'
Example #26
0
    def test_delete_advanced(self):
        # testing that we can not remove cause preprocessed data has been
        # submitted to EBI or VAMPS
        ppd = PreprocessedData.create(
            self.study, self.params_table,
            self.params_id, self.filepaths, prep_template=self.prep_template)

        # fails due to VAMPS submission
        ppd.update_vamps_status('success')
        with self.assertRaises(QiitaDBStatusError):
            PreprocessedData.delete(ppd.id)
        ppd.update_vamps_status('failed')

        ppd = PreprocessedData(1)
        with self.assertRaises(QiitaDBStatusError):
            PreprocessedData.delete(ppd.id)
Example #27
0
 def render(self, study):
     avail_ppd = [(ppd_id, PreprocessedData(ppd_id))
                  for ppd_id in study.preprocessed_data()]
     return self.render_string(
         "study_description_templates/preprocessed_data_tab.html",
         available_preprocessed_data=avail_ppd,
         study_id=study.id)
Example #28
0
 def test_processing_status_setter_valueerror(self):
     """Raises an error if the processing status is not recognized"""
     ppd = PreprocessedData.create(self.study, self.params_table,
                                   self.params_id, self.filepaths,
                                   data_type="18S")
     with self.assertRaises(ValueError):
         ppd.processing_status = 'not a valid state'
    def test_get_process_target_gene_cmd(self):
        preprocessed_data = PreprocessedData(1)
        params = ProcessedSortmernaParams(1)

        obs_cmd, obs_output_dir = _get_process_target_gene_cmd(
            preprocessed_data, params)

        _, ref_dir = get_mountpoint('reference')[0]
        _, preprocessed_dir = get_mountpoint('preprocessed_data')[0]

        exp_cmd = ("pick_closed_reference_otus.py -i {}1_seqs.fna -r "
                   "{}GreenGenes_13_8_97_otus.fasta -o {} -p placeholder -t "
                   "{}GreenGenes_13_8_97_otu_taxonomy.txt".format(
                       preprocessed_dir, ref_dir, obs_output_dir, ref_dir))

        obs_tokens = obs_cmd.split()[::-1]
        exp_tokens = exp_cmd.split()[::-1]
        self.assertEqual(len(obs_tokens), len(exp_tokens))
        while obs_tokens:
            o_t = obs_tokens.pop()
            e_t = exp_tokens.pop()
            if o_t == '-p':
                # skip parameters file
                obs_tokens.pop()
                exp_tokens.pop()
            else:
                self.assertEqual(o_t, e_t)
Example #30
0
 def test_link_filepaths_status_setter(self):
     ppd = PreprocessedData(1)
     self.assertEqual(ppd.link_filepaths_status, 'idle')
     ppd._set_link_filepaths_status('linking')
     self.assertEqual(ppd.link_filepaths_status, 'linking')
     ppd._set_link_filepaths_status('unlinking')
     self.assertEqual(ppd.link_filepaths_status, 'unlinking')
     ppd._set_link_filepaths_status('failed: error')
     self.assertEqual(ppd.link_filepaths_status, 'failed: error')
Example #31
0
    def test_create_data_type_only(self):
        # Check that the returned object has the correct id
        obs = PreprocessedData.create(self.study, self.params_table,
                                      self.params_id, self.filepaths,
                                      data_type="18S")
        self.assertEqual(obs.id, 3)

        # Check that the preprocessed data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_data WHERE "
            "preprocessed_data_id=3")
        # preprocessed_data_id, preprocessed_params_table,
        # preprocessed_params_id, submitted_to_insdc_status,
        # ebi_submission_accession, ebi_study_accession, data_type_id,
        # link_filepaths_status, vamps_status, processing_status
        exp = [[3, "preprocessed_sequence_illumina_params", 1,
                'not submitted', None, None, 2, 'idle', 'not submitted',
                'not_processed']]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data has been linked with its study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_preprocessed_data WHERE "
            "preprocessed_data_id=3")
        exp = [[1, 3]]
        self.assertEqual(obs, exp)

        # Check that the files have been copied to right location
        exp_fna_fp = join(self.db_test_ppd_dir,
                          "3_%s" % basename(self.fna_fp))
        self.assertTrue(exists(exp_fna_fp))
        self._clean_up_files.append(exp_fna_fp)

        exp_qual_fp = join(self.db_test_ppd_dir,
                           "3_%s" % basename(self.qual_fp))
        self.assertTrue(exists(exp_qual_fp))
        self._clean_up_files.append(exp_qual_fp)

        # Check that the filepaths have been correctly added to the DB
        obs_id = self.conn_handler.execute_fetchone(
            "SELECT count(1) from qiita.filepath")[0]
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=%d or "
            "filepath_id=%d" % (obs_id - 1, obs_id))
        exp_fna_fp = "3_%s" % basename(self.fna_fp)
        exp_qual_fp = "3_%s" % basename(self.qual_fp)
        # filepath_id, path, filepath_type_id
        exp = [[obs_id - 1, exp_fna_fp, 4, '852952723', 1, 3],
               [obs_id, exp_qual_fp, 5, '852952723', 1, 3]]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data have been correctly
        # linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_filepath WHERE "
            "preprocessed_data_id=3")
        # preprocessed_data_id, filepath_id
        self.assertEqual(obs, [[3, obs_id - 1], [3, obs_id]])
Example #32
0
    def test_create_data_type_only(self):
        # Check that the returned object has the correct id
        obs = PreprocessedData.create(self.study, self.params_table,
                                      self.params_id, self.filepaths,
                                      data_type="18S")
        self.assertEqual(obs.id, 3)

        # Check that the preprocessed data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_data WHERE "
            "preprocessed_data_id=3")
        # preprocessed_data_id, preprocessed_params_table,
        # preprocessed_params_id, submitted_to_insdc_status,
        # ebi_submission_accession, ebi_study_accession, data_type_id,
        # link_filepaths_status, vamps_status, processing_status
        exp = [[3, "preprocessed_sequence_illumina_params", 1,
                'not submitted', None, None, 2, 'idle', 'not submitted',
                'not_processed']]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data has been linked with its study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_preprocessed_data WHERE "
            "preprocessed_data_id=3")
        exp = [[1, 3]]
        self.assertEqual(obs, exp)

        # Check that the files have been copied to right location
        exp_fna_fp = join(self.db_test_ppd_dir,
                          "3_%s" % basename(self.fna_fp))
        self.assertTrue(exists(exp_fna_fp))
        self._clean_up_files.append(exp_fna_fp)

        exp_qual_fp = join(self.db_test_ppd_dir,
                           "3_%s" % basename(self.qual_fp))
        self.assertTrue(exists(exp_qual_fp))
        self._clean_up_files.append(exp_qual_fp)

        # Check that the filepaths have been correctly added to the DB
        obs_id = self.conn_handler.execute_fetchone(
            "SELECT count(1) from qiita.filepath")[0]
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=%d or "
            "filepath_id=%d" % (obs_id - 1, obs_id))
        exp_fna_fp = "3_%s" % basename(self.fna_fp)
        exp_qual_fp = "3_%s" % basename(self.qual_fp)
        # filepath_id, path, filepath_type_id
        exp = [[obs_id - 1, exp_fna_fp, 4, '852952723', 1, 3],
               [obs_id, exp_qual_fp, 5, '852952723', 1, 3]]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data have been correctly
        # linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_filepath WHERE "
            "preprocessed_data_id=3")
        # preprocessed_data_id, filepath_id
        self.assertEqual(obs, [[3, obs_id - 1], [3, obs_id]])
Example #33
0
    def test_set_ebi_submission_accession(self):
        new = PreprocessedData.create(
            self.study, self.params_table, self.params_id, self.filepaths,
            raw_data=self.raw_data,
            ebi_submission_accession=self.ebi_submission_accession,
            ebi_study_accession=self.ebi_study_accession)

        new.ebi_submission_accession = 'EBI12345-CC'
        self.assertEqual(new.ebi_submission_accession, 'EBI12345-CC')
Example #34
0
    def test_ebi_study_accession(self):
        new = PreprocessedData.create(
            self.study, self.params_table,
            self.params_id, self.filepaths, prep_template=self.prep_template,
            ebi_submission_accession=self.ebi_submission_accession,
            ebi_study_accession=self.ebi_study_accession)

        new.ebi_study_accession = 'EBI12345-DD'
        self.assertEqual(new.ebi_study_accession, 'EBI12345-DD')
Example #35
0
    def test_ebi_study_accession(self):
        new = PreprocessedData.create(
            self.study, self.params_table,
            self.params_id, self.filepaths, prep_template=self.prep_template,
            ebi_submission_accession=self.ebi_submission_accession,
            ebi_study_accession=self.ebi_study_accession)

        new.ebi_study_accession = 'EBI12345-DD'
        self.assertEqual(new.ebi_study_accession, 'EBI12345-DD')
Example #36
0
    def display_template(self, preprocessed_data_id, msg, msg_level):
        """Simple function to avoid duplication of code"""
        preprocessed_data_id = int(preprocessed_data_id)
        try:
            preprocessed_data = PreprocessedData(preprocessed_data_id)
        except QiitaDBUnknownIDError:
            raise HTTPError(404, "PreprocessedData %d does not exist!" % preprocessed_data_id)
        else:
            user = self.current_user
            if user.level != "admin":
                raise HTTPError(403, "No permissions of admin, " "get/VAMPSSubmitHandler: %s!" % user.id)

        prep_template = PrepTemplate(preprocessed_data.prep_template)
        sample_template = SampleTemplate(preprocessed_data.study)
        study = Study(preprocessed_data.study)
        stats = [
            ("Number of samples", len(prep_template)),
            ("Number of metadata headers", len(sample_template.categories())),
        ]

        demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == "preprocessed_demux"]
        demux_length = len(demux)

        if not demux_length:
            msg = "Study does not appear to have demultiplexed " "sequences associated"
            msg_level = "danger"
        elif demux_length > 1:
            msg = "Study appears to have multiple demultiplexed files!"
            msg_level = "danger"
        elif demux_length == 1:
            demux_file = demux[0]
            demux_file_stats = demux_stats(demux_file)
            stats.append(("Number of sequences", demux_file_stats.n))
            msg_level = "success"

        self.render(
            "vamps_submission.html",
            study_title=study.title,
            stats=stats,
            message=msg,
            study_id=study.id,
            level=msg_level,
            preprocessed_data_id=preprocessed_data_id,
        )
Example #37
0
    def display_template(self, preprocessed_data_id, msg, msg_level):
        """Simple function to avoid duplication of code"""
        preprocessed_data_id = int(preprocessed_data_id)
        try:
            preprocessed_data = PreprocessedData(preprocessed_data_id)
        except QiitaDBUnknownIDError:
            raise HTTPError(404, "PreprocessedData %d does not exist!" %
                                 preprocessed_data_id)
        else:
            user = User(self.current_user)
            if user.level != 'admin':
                raise HTTPError(403, "No permissions of admin, "
                                     "get/EBISubmitHandler: %s!" % user.id)

        prep_template = PrepTemplate(preprocessed_data.prep_template)
        sample_template = SampleTemplate(preprocessed_data.study)
        study = Study(preprocessed_data.study)
        stats = [('Number of samples', len(prep_template)),
                 ('Number of metadata headers',
                  len(sample_template.metadata_headers()))]

        demux = [path for _, path, ftype in preprocessed_data.get_filepaths()
                 if ftype == 'preprocessed_demux']
        demux_length = len(demux)

        if not demux_length:
            msg = ("Study does not appear to have demultiplexed "
                   "sequences associated")
            msg_level = 'danger'
        elif demux_length > 1:
            msg = ("Study appears to have multiple demultiplexed files!")
            msg_level = 'danger'
        elif demux_length == 1:
            demux_file = demux[0]
            demux_file_stats = demux_stats(demux_file)
            stats.append(('Number of sequences', demux_file_stats.n))
            msg_level = 'success'

        self.render('ebi_submission.html', user=self.current_user,
                    study_title=study.title, stats=stats, message=msg,
                    study_id=study.id, level=msg_level,
                    preprocessed_data_id=preprocessed_data_id,
                    investigation_type=prep_template.investigation_type)
Example #38
0
    def display_template(self, preprocessed_data_id, msg, msg_level):
        """Simple function to avoid duplication of code"""
        preprocessed_data_id = int(preprocessed_data_id)
        try:
            preprocessed_data = PreprocessedData(preprocessed_data_id)
        except QiitaDBUnknownIDError:
            raise HTTPError(404, "PreprocessedData %d does not exist!" %
                                 preprocessed_data_id)
        else:
            user = self.current_user
            if user.level != 'admin':
                raise HTTPError(403, "No permissions of admin, "
                                     "get/EBISubmitHandler: %s!" % user.id)

        prep_template = PrepTemplate(preprocessed_data.prep_template)
        sample_template = SampleTemplate(preprocessed_data.study)
        study = Study(preprocessed_data.study)
        stats = [('Number of samples', len(prep_template)),
                 ('Number of metadata headers',
                  len(sample_template.metadata_headers()))]

        demux = [path for _, path, ftype in preprocessed_data.get_filepaths()
                 if ftype == 'preprocessed_demux']
        demux_length = len(demux)

        if not demux_length:
            msg = ("Study does not appear to have demultiplexed "
                   "sequences associated")
            msg_level = 'danger'
        elif demux_length > 1:
            msg = ("Study appears to have multiple demultiplexed files!")
            msg_level = 'danger'
        elif demux_length == 1:
            demux_file = demux[0]
            demux_file_stats = demux_stats(demux_file)
            stats.append(('Number of sequences', demux_file_stats.n))
            msg_level = 'success'

        self.render('ebi_submission.html',
                    study_title=study.title, stats=stats, message=msg,
                    study_id=study.id, level=msg_level,
                    preprocessed_data_id=preprocessed_data_id,
                    investigation_type=prep_template.investigation_type)
Example #39
0
 def test_link_filepaths_status_setter(self):
     ppd = PreprocessedData(1)
     self.assertEqual(ppd.link_filepaths_status, 'idle')
     ppd._set_link_filepaths_status('linking')
     self.assertEqual(ppd.link_filepaths_status, 'linking')
     ppd._set_link_filepaths_status('unlinking')
     self.assertEqual(ppd.link_filepaths_status, 'unlinking')
     ppd._set_link_filepaths_status('failed: error')
     self.assertEqual(ppd.link_filepaths_status, 'failed: error')
Example #40
0
    def test_vamps_status(self):
        ppd = PreprocessedData(1)

        # verifying current value
        self.assertEqual(ppd.submitted_to_vamps_status(), 'not submitted')

        # changing value and then verifying new value
        ppd.update_vamps_status('failed')
        self.assertEqual(ppd.submitted_to_vamps_status(), 'failed')

        # checking failure
        with self.assertRaises(ValueError):
            ppd.update_vamps_status('not a valid status')
Example #41
0
    def test_processing_status(self):
        """processing_status works correctly"""
        # Processed case
        ppd = PreprocessedData(1)
        self.assertEqual(ppd.processing_status, 'not_processed')

        # not processed case
        ppd = PreprocessedData.create(self.study, self.params_table,
                                      self.params_id, self.filepaths,
                                      data_type="18S")
        self.assertEqual(ppd.processing_status, 'not_processed')
    def test_insert_processed_data_target_gene(self):
        fd, fna_fp = mkstemp(suffix='_seqs.fna')
        close(fd)
        fd, qual_fp = mkstemp(suffix='_seqs.qual')
        close(fd)
        filepaths = [
            (fna_fp, convert_to_id('preprocessed_fasta', 'filepath_type')),
            (qual_fp, convert_to_id('preprocessed_fastq', 'filepath_type'))
        ]

        preprocessed_data = PreprocessedData.create(
            Study(1),
            "preprocessed_sequence_illumina_params",
            1,
            filepaths,
            data_type="18S")

        params = ProcessedSortmernaParams(1)
        pick_dir = mkdtemp()
        path_builder = partial(join, pick_dir)
        db_path_builder = partial(join, get_mountpoint('processed_data')[0][1])

        # Create a placeholder for the otu table
        with open(path_builder('otu_table.biom'), 'w') as f:
            f.write('\n')

        # Create a placeholder for the directory
        mkdir(path_builder('sortmerna_picked_otus'))

        # Create the log file
        fd, fp = mkstemp(dir=pick_dir, prefix='log_', suffix='.txt')
        close(fd)
        with open(fp, 'w') as f:
            f.write('\n')

        _insert_processed_data_target_gene(preprocessed_data, params, pick_dir)

        new_id = get_count('qiita.processed_data')

        # Check that the files have been copied
        db_files = [
            db_path_builder("%s_otu_table.biom" % new_id),
            db_path_builder("%s_sortmerna_picked_otus" % new_id),
            db_path_builder("%s_%s" % (new_id, basename(fp)))
        ]
        for fp in db_files:
            self.assertTrue(exists(fp))

        # Check that a new preprocessed data has been created
        self.assertTrue(
            self.conn_handler.execute_fetchone(
                "SELECT EXISTS(SELECT * FROM qiita.processed_data WHERE "
                "processed_data_id=%s)", (new_id, ))[0])
Example #43
0
    def test_update_preprocessed_data_from_cmd_ppd(self):
        exp_ppd = PreprocessedData(2)

        next_fp_id = get_count('qiita.filepath') + 1
        exp_fps = []
        path_builder = partial(join, self.db_ppd_dir)
        suffix_types = [("seqs.fna", "preprocessed_fasta"),
                        ("seqs.fastq", "preprocessed_fastq"),
                        ("seqs.demux", "preprocessed_demux"),
                        ("split_library_log.txt", "log")]
        for id_, vals in enumerate(suffix_types, start=next_fp_id):
            suffix, fp_type = vals
            exp_fps.append(
                (id_, path_builder("%s_%s" % (exp_ppd.id, suffix)), fp_type))

        ppd = update_preprocessed_data_from_cmd(self.test_slo, 1, 2)

        # Check that the modified preprocessed data is the correct one
        self.assertEqual(ppd.id, exp_ppd.id)

        # Check that the filepaths returned are correct
        # We need to sort the list returned from the db because the ordering
        # on that list is based on db modification time, rather than id
        obs_fps = sorted(ppd.get_filepaths())
        self.assertEqual(obs_fps, exp_fps)

        # Check that the checksums have been updated
        sql = "SELECT checksum FROM qiita.filepath WHERE filepath_id=%s"

        # Checksum of the fasta file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[0][0], ))[0]
        self.assertEqual(obs_checksum, '3532748626')

        # Checksum of the fastq file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[1][0], ))[0]
        self.assertEqual(obs_checksum, '2958832064')

        # Checksum of the demux file
        # The checksum is generated dynamically, so the checksum changes
        # We are going to test that the checksum is not the one that was
        # before, which corresponds to an empty file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[2][0], ))[0]
        self.assertTrue(isinstance(obs_checksum, str))
        self.assertNotEqual(obs_checksum, '852952723')
        self.assertTrue(len(obs_checksum) > 0)

        # Checksum of the log file
        obs_checksum = self.conn_handler.execute_fetchone(
            sql, (obs_fps[3][0], ))[0]
        self.assertEqual(obs_checksum, '626839734')
Example #44
0
    def test_set_ebi_submission_accession(self):
        new = PreprocessedData.create(
            self.study,
            self.params_table,
            self.params_id,
            self.filepaths,
            raw_data=self.raw_data,
            ebi_submission_accession=self.ebi_submission_accession,
            ebi_study_accession=self.ebi_study_accession)

        new.ebi_submission_accession = 'EBI12345-CC'
        self.assertEqual(new.ebi_submission_accession, 'EBI12345-CC')
Example #45
0
 def test_create_error(self):
     """Raises an error if the preprocessed_params_table does not exist"""
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study, "foo", self.params_id,
                                 self.filepaths)
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study, "preprocessed_foo",
                                 self.params_id, self.filepaths)
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study, "foo_params", self.params_id,
                                 self.filepaths)
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study, "preprocessed_foo_params",
                                 self.params_id, self.filepaths)
Example #46
0
    def setUp(self):
        self.preprocessed_data = PreprocessedData(1)
        self.params_table = "processed_params_uclust"
        self.params_id = 1
        fd, self.biom_fp = mkstemp(suffix='_table.biom')
        close(fd)
        self.filepaths = [(self.biom_fp, 6)]
        self.date = datetime(2014, 5, 29, 12, 24, 51)
        self.db_test_pd_dir = join(get_db_files_base_dir(), 'processed_data')

        with open(self.biom_fp, "w") as f:
            f.write("\n")
        self._clean_up_files = []
Example #47
0
 def test_processing_status_setter(self):
     """Able to update the processing status"""
     ppd = PreprocessedData.create(self.study, self.params_table,
                                   self.params_id, self.filepaths,
                                   data_type="18S")
     self.assertEqual(ppd.processing_status, 'not_processed')
     ppd.processing_status = 'processing'
     self.assertEqual(ppd.processing_status, 'processing')
     ppd.processing_status = 'processed'
     self.assertEqual(ppd.processing_status, 'processed')
     state = 'failed: some error message'
     ppd.processing_status = state
     self.assertEqual(ppd.processing_status, state)
Example #48
0
 def test_processing_status_setter(self):
     """Able to update the processing status"""
     ppd = PreprocessedData.create(self.study, self.params_table,
                                   self.params_id, self.filepaths,
                                   data_type="18S")
     self.assertEqual(ppd.processing_status, 'not_processed')
     ppd.processing_status = 'processing'
     self.assertEqual(ppd.processing_status, 'processing')
     ppd.processing_status = 'processed'
     self.assertEqual(ppd.processing_status, 'processed')
     state = 'failed: some error message'
     ppd.processing_status = state
     self.assertEqual(ppd.processing_status, state)
Example #49
0
    def test_create_data_type_only(self):
        # Check that the returned object has the correct id
        obs = PreprocessedData.create(self.study,
                                      self.params_table,
                                      self.params_id,
                                      self.filepaths,
                                      data_type="18S")
        self.assertEqual(obs.id, 3)

        # Check that the preprocessed data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_data WHERE "
            "preprocessed_data_id=3")
        exp = [[
            3, "preprocessed_sequence_illumina_params", 1, False, None, None, 2
        ]]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data has been linked with its study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_preprocessed_data WHERE "
            "preprocessed_data_id=3")
        exp = [[1, 3]]
        self.assertEqual(obs, exp)

        # Check that the files have been copied to right location
        exp_fna_fp = join(self.db_test_ppd_dir, "3_%s" % basename(self.fna_fp))
        self.assertTrue(exists(exp_fna_fp))
        self._clean_up_files.append(exp_fna_fp)

        exp_qual_fp = join(self.db_test_ppd_dir,
                           "3_%s" % basename(self.qual_fp))
        self.assertTrue(exists(exp_qual_fp))
        self._clean_up_files.append(exp_qual_fp)

        # Check that the filepaths have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=15 or "
            "filepath_id=16")
        # filepath_id, path, filepath_type_id
        exp = [[15, exp_fna_fp, 4, '852952723', 1],
               [16, exp_qual_fp, 5, '852952723', 1]]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data have been correctly
        # linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_filepath WHERE "
            "preprocessed_data_id=3")
        # preprocessed_data_id, filepath_id
        self.assertEqual(obs, [[3, 15], [3, 16]])
Example #50
0
    def test_create(self):
        """Correctly creates all the rows in the DB for preprocessed data"""
        # Check that the returned object has the correct id
        obs = PreprocessedData.create(self.study, self.params_table,
                                      self.params_id, self.filepaths,
                                      raw_data=self.raw_data)
        self.assertEqual(obs.id, 3)

        # Check that the preprocessed data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_data WHERE "
            "preprocessed_data_id=3")
        # preprocessed_data_id, raw_data_id, preprocessed_params_tables,
        # preprocessed_params_id
        exp = [[3, "preprocessed_sequence_illumina_params", 1, False]]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data has been linked with its study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_preprocessed_data WHERE "
            "preprocessed_data_id=3")
        exp = [[1, 3]]
        self.assertEqual(obs, exp)

        # Check that the files have been copied to right location
        exp_fna_fp = join(self.db_test_ppd_dir,
                          "3_%s" % basename(self.fna_fp))
        self.assertTrue(exists(exp_fna_fp))
        self._clean_up_files.append(exp_fna_fp)

        exp_qual_fp = join(self.db_test_ppd_dir,
                           "3_%s" % basename(self.qual_fp))
        self.assertTrue(exists(exp_qual_fp))
        self._clean_up_files.append(exp_qual_fp)

        # Check that the filepaths have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=10 or "
            "filepath_id=11")
        # filepath_id, path, filepath_type_id
        exp = [[10, exp_fna_fp, 4, '852952723', 1],
               [11, exp_qual_fp, 5, '852952723', 1]]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data have been correctly
        # linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_filepath WHERE "
            "preprocessed_data_id=3")
        # preprocessed_data_id, filepath_id
        self.assertEqual(obs, [[3, 10], [3, 11]])
Example #51
0
    def test_create(self):
        """Correctly creates all the rows in the DB for preprocessed data"""
        # Check that the returned object has the correct id
        obs = PreprocessedData.create(
            self.study, self.params_table,
            self.params_id, self.filepaths, prep_template=self.prep_template,
            ebi_submission_accession=self.ebi_submission_accession,
            ebi_study_accession=self.ebi_study_accession)
        self.assertEqual(obs.id, 3)

        # Check that the preprocessed data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_data WHERE "
            "preprocessed_data_id=3")
        # preprocessed_data_id, preprocessed_params_table,
        # preprocessed_params_id, submitted_to_insdc_status,
        # ebi_submission_accession, ebi_study_accession, data_type_id,
        # link_filepaths_status, vamps_status, processing_status
        exp = [[3, "preprocessed_sequence_illumina_params", 1,
                'not submitted', "EBI123456-A", "EBI123456-B", 2, 'idle',
                'not submitted', 'not_processed']]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data has been linked with its study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_preprocessed_data WHERE "
            "preprocessed_data_id=3")
        exp = [[1, 3]]
        self.assertEqual(obs, exp)

        # Check that the files have been copied to right location
        exp_fna_fp = join(self.db_test_ppd_dir,
                          "3_%s" % basename(self.fna_fp))
        self.assertTrue(exists(exp_fna_fp))
        self._clean_up_files.append(exp_fna_fp)

        exp_qual_fp = join(self.db_test_ppd_dir,
                           "3_%s" % basename(self.qual_fp))
        self.assertTrue(exists(exp_qual_fp))
        self._clean_up_files.append(exp_qual_fp)

        # Check that the filepaths have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=17 or "
            "filepath_id=18")
        exp_fna_fp = "3_%s" % basename(self.fna_fp)
        exp_qual_fp = "3_%s" % basename(self.qual_fp)
        # filepath_id, path, filepath_type_id
        exp = [[17, exp_fna_fp, 4, '852952723', 1, 3],
               [18, exp_qual_fp, 5, '852952723', 1, 3]]
        self.assertEqual(obs, exp)
Example #52
0
 def test_submitted_to_insdc_status(self):
     """submitted_to_insdc_status works correctly"""
     # False case
     pd = PreprocessedData(1)
     self.assertEqual(pd.submitted_to_insdc_status(), 'submitting')
     # True case
     pd = PreprocessedData(2)
     self.assertEqual(pd.submitted_to_insdc_status(), 'not submitted')
Example #53
0
 def test_is_submitted_to_insdc(self):
     """is_submitted_to_insdc works correctly"""
     # False case
     pd = PreprocessedData(1)
     self.assertTrue(pd.is_submitted_to_insdc())
     # True case
     pd = PreprocessedData(2)
     self.assertFalse(pd.is_submitted_to_insdc())
Example #54
0
    def test_vamps_status(self):
        ppd = PreprocessedData(1)

        # verifying current value
        self.assertEqual(ppd.submitted_to_vamps_status(), 'not submitted')

        # changing value and then verifying new value
        ppd.update_vamps_status('failed')
        self.assertEqual(ppd.submitted_to_vamps_status(), 'failed')

        # checking failure
        with self.assertRaises(ValueError):
            ppd.update_vamps_status('not a valid status')
Example #55
0
    def get(self, preprocessed_data_id):
        ppd_id = int(preprocessed_data_id)
        ppd = PreprocessedData(ppd_id)
        study = Study(ppd.study)
        check_access(User(self.current_user), study, raise_error=True)

        back_button_path = self.get_argument(
            'back_button_path', '/study/description/%d' % study.id)

        files_tuples = ppd.get_filepaths()
        files = defaultdict(list)

        for fpid, fp, fpt in files_tuples:
            files[fpt].append(fp)

        with open(files['log'][0], 'U') as f:
            contents = f.read()
            contents = contents.replace('\n', '<br/>')
            contents = contents.replace('\t', '&nbsp;&nbsp;&nbsp;&nbsp;')

        title = ('Preprocessed Data: %d' % ppd_id)

        self.render('text_file.html', title=title, contents=contents,
                    user=self.current_user, back_button_path=back_button_path)
Example #56
0
    def test_ebi_run_accessions_setter(self):
        new_vals = {
            '1.SKB1.640202': 'ERR1000001',
            '1.SKB2.640194': 'ERR1000002',
            '1.SKB3.640195': 'ERR1000003',
            '1.SKB4.640189': 'ERR1000004',
            '1.SKB5.640181': 'ERR1000005',
            '1.SKB6.640176': 'ERR1000006',
            '1.SKB7.640196': 'ERR1000007',
            '1.SKB8.640193': 'ERR1000008',
            '1.SKB9.640200': 'ERR1000009',
            '1.SKD1.640179': 'ERR1000010',
            '1.SKD2.640178': 'ERR1000011',
            '1.SKD3.640198': 'ERR1000012',
            '1.SKD4.640185': 'ERR1000013',
            '1.SKD5.640186': 'ERR1000014',
            '1.SKD6.640190': 'ERR1000015',
            '1.SKD7.640191': 'ERR1000016',
            '1.SKD8.640184': 'ERR1000017',
            '1.SKD9.640182': 'ERR1000018',
            '1.SKM1.640183': 'ERR1000019',
            '1.SKM2.640199': 'ERR1000020',
            '1.SKM3.640197': 'ERR1000021',
            '1.SKM4.640180': 'ERR1000022',
            '1.SKM5.640177': 'ERR1000023',
            '1.SKM6.640187': 'ERR1000024',
            '1.SKM7.640188': 'ERR1000025',
            '1.SKM8.640201': 'ERR1000026',
            '1.SKM9.640192': 'ERR1000027'}
        with self.assertRaises(QiitaDBError):
            PreprocessedData(1).ebi_run_accessions = new_vals

        ppd = PreprocessedData(2)
        self.assertEqual(ppd.ebi_run_accessions, [])
        ppd.ebi_run_accessions = new_vals
        self.assertEqual(ppd.ebi_run_accessions, sorted(new_vals.values()))
Example #57
0
    def test_create(self):
        """Correctly creates all the rows in the DB for preprocessed data"""
        # Check that the returned object has the correct id
        obs = PreprocessedData.create(
            self.study,
            self.params_table,
            self.params_id,
            self.filepaths,
            raw_data=self.raw_data,
            ebi_submission_accession=self.ebi_submission_accession,
            ebi_study_accession=self.ebi_study_accession)
        self.assertEqual(obs.id, 3)

        # Check that the preprocessed data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_data WHERE "
            "preprocessed_data_id=3")
        exp = [[
            3, "preprocessed_sequence_illumina_params", 1, False,
            "EBI123456-A", "EBI123456-B", 2
        ]]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data has been linked with its study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_preprocessed_data WHERE "
            "preprocessed_data_id=3")
        exp = [[1, 3]]
        self.assertEqual(obs, exp)

        # Check that the files have been copied to right location
        exp_fna_fp = join(self.db_test_ppd_dir, "3_%s" % basename(self.fna_fp))
        self.assertTrue(exists(exp_fna_fp))
        self._clean_up_files.append(exp_fna_fp)

        exp_qual_fp = join(self.db_test_ppd_dir,
                           "3_%s" % basename(self.qual_fp))
        self.assertTrue(exists(exp_qual_fp))
        self._clean_up_files.append(exp_qual_fp)

        # Check that the filepaths have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=15 or "
            "filepath_id=16")
        # filepath_id, path, filepath_type_id
        exp = [[15, exp_fna_fp, 4, '852952723', 1],
               [16, exp_qual_fp, 5, '852952723', 1]]
        self.assertEqual(obs, exp)
Example #58
0
    def test_insert_processed_data_target_gene(self):
        fd, fna_fp = mkstemp(suffix='_seqs.fna')
        close(fd)
        fd, qual_fp = mkstemp(suffix='_seqs.qual')
        close(fd)
        filepaths = [
            (fna_fp, convert_to_id('preprocessed_fasta', 'filepath_type')),
            (qual_fp, convert_to_id('preprocessed_fastq', 'filepath_type'))]

        preprocessed_data = PreprocessedData.create(
            Study(1), "preprocessed_sequence_illumina_params", 1,
            filepaths, data_type="18S")

        params = ProcessedSortmernaParams(1)
        pick_dir = mkdtemp()
        path_builder = partial(join, pick_dir)
        db_path_builder = partial(join, get_mountpoint('processed_data')[0][1])

        # Create a placeholder for the otu table
        with open(path_builder('otu_table.biom'), 'w') as f:
            f.write('\n')

        # Create a placeholder for the directory
        mkdir(path_builder('sortmerna_picked_otus'))

        # Create the log file
        fd, fp = mkstemp(dir=pick_dir, prefix='log_', suffix='.txt')
        close(fd)
        with open(fp, 'w') as f:
            f.write('\n')

        _insert_processed_data_target_gene(preprocessed_data, params, pick_dir)

        new_id = get_count('qiita.processed_data')

        # Check that the files have been copied
        db_files = [db_path_builder("%s_otu_table.biom" % new_id),
                    db_path_builder("%s_sortmerna_picked_otus" % new_id),
                    db_path_builder("%s_%s" % (new_id, basename(fp)))]
        for fp in db_files:
            self.assertTrue(exists(fp))

        # Check that a new preprocessed data has been created
        self.assertTrue(self.conn_handler.execute_fetchone(
            "SELECT EXISTS(SELECT * FROM qiita.processed_data WHERE "
            "processed_data_id=%s)", (new_id, ))[0])
Example #59
0
def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None):
    """Submit a preprocessed data to EBI

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    action : %s
        The action to perform with this data
    send : bool
        True to actually send the files
    fastq_dir_fp : str, optional
        The fastq filepath
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    preprocessed_data_id_str = str(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    investigation_type = None
    new_investigation_type = None

    status = preprocessed_data.submitted_to_insdc_status()
    if status in ('submitting', 'success'):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

    if send:
        # If we intend actually to send the files, then change the status in
        # the database
        preprocessed_data.update_insdc_status('submitting')

    # we need to figure out whether the investigation type is a known one
    # or if we have to submit a "new_investigation_type" to EBI
    current_type = prep_template.investigation_type
    ena_ontology = Ontology(convert_to_id('ENA', 'ontology'))
    if current_type in ena_ontology.terms:
        investigation_type = current_type
    elif current_type in ena_ontology.user_defined_terms:
        investigation_type = 'Other'
        new_investigation_type = current_type
    else:
        # This should never happen
        raise ValueError("Unrecognized investigation type: '%s'. This term "
                         "is neither one of the official terms nor one of the "
                         "user-defined terms in the ENA ontology")

    if fastq_dir_fp is not None:
        # If the user specifies a FASTQ directory, use it

        # Set demux_samples to None so that MetadataTemplate.to_file will put
        # all samples in the template files
        demux_samples = None
    else:
        # If the user does not specify a FASTQ directory, create one and
        # re-serialize the per-sample FASTQs from the demux file
        fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir)
        demux = [
            path for _, path, ftype in preprocessed_data.get_filepaths()
            if ftype == 'preprocessed_demux'
        ][0]

        # Keep track of which files were actually in the demux file so that we
        # can write those rows to the prep and samples templates
        demux_samples = set()

        with open_file(demux) as demux_fh:
            for samp, iterator in to_per_sample_ascii(demux_fh,
                                                      list(sample_template)):
                demux_samples.add(samp)
                sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp)
                with gzopen(sample_fp, 'w') as fh:
                    for record in iterator:
                        fh.write(record)

    output_dir = fastq_dir_fp + '_submission'

    samp_fp = join(fastq_dir_fp, 'sample_metadata.txt')
    prep_fp = join(fastq_dir_fp, 'prep_metadata.txt')

    sample_template.to_file(samp_fp, demux_samples)
    prep_template.to_file(prep_fp, demux_samples)

    # Get specific output directory and set filepaths
    get_output_fp = partial(join, output_dir)
    study_fp = get_output_fp('study.xml')
    sample_fp = get_output_fp('sample.xml')
    experiment_fp = get_output_fp('experiment.xml')
    run_fp = get_output_fp('run.xml')
    submission_fp = get_output_fp('submission.xml')

    if not isdir(output_dir):
        makedirs(output_dir)
    else:
        raise IOError('The output folder already exists: %s' % output_dir)

    with open(samp_fp, 'U') as st, open(prep_fp, 'U') as pt:
        submission = EBISubmission.from_templates_and_per_sample_fastqs(
            preprocessed_data_id_str,
            study.title,
            study.info['study_abstract'],
            investigation_type,
            st,
            pt,
            fastq_dir_fp,
            new_investigation_type=new_investigation_type,
            pmids=study.pmids)

    submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp,
                                   submission_fp, action)

    if send:
        submission.send_sequences()
        study_accession, submission_accession = submission.send_xml()

        if study_accession is None or submission_accession is None:
            preprocessed_data.update_insdc_status('failed')

            raise ComputeError("EBI Submission failed!")
        else:
            preprocessed_data.update_insdc_status('success', study_accession,
                                                  submission_accession)
    else:
        study_accession, submission_accession = None, None

    return study_accession, submission_accession