Ejemplo n.º 1
0
    def get(self, analysis_id):
        user = self.current_user
        analysis_id = int(analysis_id)
        check_analysis_access(User(user), analysis_id)

        analysis = Analysis(analysis_id)
        jobres = defaultdict(list)
        for job in analysis.jobs:
            jobject = Job(job)
            jobres[jobject.datatype].append(
                (jobject.command[0], jobject.results))

        dropped = {}
        for proc_data_id, samples in viewitems(analysis.dropped_samples):
            proc_data = ProcessedData(proc_data_id)
            key = "Data type %s, Study: %s" % (proc_data.data_type(),
                                               proc_data.study)
            dropped[key] = samples

        self.render("analysis_results.html",
                    user=self.current_user,
                    jobres=jobres,
                    aname=analysis.name,
                    dropped=dropped,
                    basefolder=get_db_files_base_dir())

        # wipe out cached messages for this analysis
        r_server = Redis()
        key = '%s:messages' % self.current_user
        oldmessages = r_server.lrange(key, 0, -1)
        if oldmessages is not None:
            for message in oldmessages:
                if '"analysis": %d' % analysis_id in message:
                    r_server.lrem(key, message, 1)
Ejemplo n.º 2
0
    def get(self, analysis_id):
        user = self.current_user
        analysis_id = int(analysis_id)
        check_analysis_access(User(user), analysis_id)

        analysis = Analysis(analysis_id)
        jobres = defaultdict(list)
        for job in analysis.jobs:
            jobject = Job(job)
            jobres[jobject.datatype].append((jobject.command[0],
                                             jobject.results))

        dropped = {}
        for proc_data_id, samples in viewitems(analysis.dropped_samples):
            proc_data = ProcessedData(proc_data_id)
            key = "Data type %s, Study: %s" % (proc_data.data_type(),
                                               proc_data.study)
            dropped[key] = samples

        self.render("analysis_results.html", user=self.current_user,
                    jobres=jobres, aname=analysis.name, dropped=dropped,
                    basefolder=get_db_files_base_dir())

        # wipe out cached messages for this analysis
        r_server = Redis()
        key = '%s:messages' % self.current_user
        oldmessages = r_server.lrange(key, 0, -1)
        if oldmessages is not None:
            for message in oldmessages:
                if '"analysis": %d' % analysis_id in message:
                    r_server.lrem(key, message, 1)
Ejemplo n.º 3
0
    def test_status_setter_error(self):
        pd = ProcessedData(1)
        pd.status = 'public'
        self.assertEqual(pd.status, 'public')

        with self.assertRaises(QiitaDBStatusError):
            pd.status = 'sandbox'
Ejemplo n.º 4
0
    def post(self, analysis_id):
        command_args = self.get_arguments("commands")
        split = [x.split("#") for x in command_args]
        analysis = Analysis(analysis_id)

        commands = []
        # HARD CODED HACKY THING FOR DEMO, FIX  Issue #164
        fp, mapping_file = mkstemp(suffix="_map_file.txt")
        close(fp)
        SampleTemplate(1).to_file(mapping_file)
        study_fps = {}
        for pd in Study(1).processed_data:
            processed = ProcessedData(pd)
            study_fps[processed.data_type] = processed.get_filepaths()[0][0]
        for data_type, command in split:
            opts = {
                "--otu_table_fp": study_fps[data_type],
                "--mapping_fp": mapping_file
            }
            if command == "Beta Diversity" and data_type in {'16S', '18S'}:
                opts["--tree_fp"] = join(get_db_files_base_dir(), "reference",
                                         "gg_97_otus_4feb2011.tre")
            elif command == "Beta Diversity":
                opts["--parameter_fp"] = join(get_db_files_base_dir(),
                                              "reference", "params_qiime.txt")
            Job.create(data_type, command, opts, analysis)
            commands.append("%s: %s" % (data_type, command))
        user = self.get_current_user()
        self.render("analysis_waiting.html", user=user,
                    aid=analysis_id, aname=analysis.name,
                    commands=commands)
        # fire off analysis run here
        # currently synch run so redirect done here. Will remove after demo
        run_analysis(user, analysis)
Ejemplo n.º 5
0
    def get(self, analysis_id):
        analysis_id = int(analysis_id.split("/")[0])
        analysis = Analysis(analysis_id)
        check_analysis_access(self.current_user, analysis)

        jobres = defaultdict(list)
        for job in analysis.jobs:
            jobject = Job(job)
            jobres[jobject.datatype].append((jobject.command[0],
                                             jobject.results))

        dropped_samples = analysis.dropped_samples
        dropped = defaultdict(list)
        for proc_data_id, samples in viewitems(dropped_samples):
            if not samples:
                continue
            proc_data = ProcessedData(proc_data_id)
            data_type = proc_data.data_type()
            study = proc_data.study
            dropped[data_type].append((Study(study).title, len(samples),
                                       ', '.join(samples)))

        self.render("analysis_results.html", analysis_id=analysis_id,
                    jobres=jobres, aname=analysis.name, dropped=dropped,
                    basefolder=get_db_files_base_dir())
Ejemplo n.º 6
0
    def get(self, analysis_id):
        analysis_id = int(analysis_id.split("/")[0])
        analysis = Analysis(analysis_id)
        check_analysis_access(self.current_user, analysis)

        jobres = defaultdict(list)
        for job in analysis.jobs:
            jobject = Job(job)
            jobres[jobject.datatype].append(
                (jobject.command[0], jobject.results))

        dropped = {}
        dropped_samples = analysis.dropped_samples
        if dropped_samples:
            for proc_data_id, samples in viewitems(dropped_samples):
                proc_data = ProcessedData(proc_data_id)
                key = "Data type %s, Study: %s" % (proc_data.data_type(),
                                                   proc_data.study)
                dropped[key] = samples

        self.render("analysis_results.html",
                    jobres=jobres,
                    aname=analysis.name,
                    dropped=dropped,
                    basefolder=get_db_files_base_dir())
Ejemplo n.º 7
0
 def test_create_preprocessed_and_study_error(self):
     with self.assertRaises(IncompetentQiitaDeveloperError):
         ProcessedData.create(self.params_table,
                              self.params_id,
                              self.filepaths,
                              preprocessed_data=self.preprocessed_data,
                              study=Study(1))
Ejemplo n.º 8
0
    def delete_processed_data(self, study, user, callback):
        """Delete the selected processed data

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        pd_id = int(self.get_argument('processed_data_id'))

        try:
            ProcessedData.delete(pd_id)
            msg = ("Processed data %d has been deleted" % pd_id)
            msg_level = "success"
            pd_id = None
        except Exception as e:
            msg = ("Couldn't remove processed data %d: %s" %
                   (pd_id, str(e)))
            msg_level = "danger"

        callback((msg, msg_level, 'processed_data_tab', pd_id, None))
Ejemplo n.º 9
0
 def test_get_filepath(self):
     """Correctly returns the filepaths to the processed files"""
     # check the test data
     pd = ProcessedData(1)
     obs = pd.get_filepaths()
     exp = [(join(self.db_test_pd_dir,
                  '1_study_1001_closed_reference_otu_table.biom'), "biom")]
     self.assertEqual(obs, exp)
Ejemplo n.º 10
0
 def test_get_filepath(self):
     """Correctly returns the filepaths to the processed files"""
     # check the test data
     pd = ProcessedData(1)
     obs = pd.get_filepaths()
     exp = [(11, join(self.db_test_pd_dir,
             '1_study_1001_closed_reference_otu_table.biom'), "biom")]
     self.assertEqual(obs, exp)
Ejemplo n.º 11
0
 def test_create_preprocessed_and_data_type_error(self):
     with self.assertRaises(IncompetentQiitaDeveloperError):
         ProcessedData.create(
             self.params_table,
             self.params_id,
             self.filepaths,
             preprocessed_data=self.preprocessed_data,
             data_type="Metabolomics",
         )
Ejemplo n.º 12
0
    def test_get_by_status_grouped_by_study(self):
        obs = ProcessedData.get_by_status_grouped_by_study('sandbox')
        self.assertEqual(obs, dict())

        obs = ProcessedData.get_by_status_grouped_by_study('private')
        self.assertEqual(obs, {1: [1]})

        ProcessedData.create(self.params_table, self.params_id,
                             self.filepaths,
                             preprocessed_data=self.preprocessed_data)
        obs = ProcessedData.get_by_status_grouped_by_study('sandbox')
        self.assertEqual(obs, {1: [2]})
Ejemplo n.º 13
0
    def filter_by_processed_data(self, datatypes=None):
        """Filters results to what is available in each processed data

        Parameters
        ----------
        datatypes : list of str, optional
            Datatypes to selectively return. Default all datatypes available

        Returns
        -------
        study_proc_ids : dict of dicts of lists
            Processed data ids with samples for each study, in the format
            {study_id: {datatype: [proc_id, proc_id, ...], ...}, ...}
        proc_data_samples : dict of lists
            Samples available in each processed data id, in the format
            {proc_data_id: [samp_id1, samp_id2, ...], ...}
        samples_meta : dict of pandas DataFrames
            metadata for the found samples, keyed by study. Pandas indexed on
            sample_id, column headers are the metadata categories searched
            over
        """
        with TRN:
            if datatypes is not None:
                # convert to set for easy lookups
                datatypes = set(datatypes)
            study_proc_ids = {}
            proc_data_samples = {}
            samples_meta = {}
            headers = {c: val for c, val in enumerate(self.meta_headers)}
            for study_id, study_meta in viewitems(self.results):
                # add metadata to dataframe and dict
                # use from_dict because pandas doesn't like cursor objects
                samples_meta[study_id] = pd.DataFrame.from_dict(
                    {s[0]: s[1:] for s in study_meta}, orient='index')
                samples_meta[study_id].rename(columns=headers, inplace=True)
                # set up study-based data needed
                study = Study(study_id)
                study_sample_ids = {s[0] for s in study_meta}
                study_proc_ids[study_id] = defaultdict(list)
                for proc_data_id in study.processed_data():
                    proc_data = ProcessedData(proc_data_id)
                    datatype = proc_data.data_type()
                    # skip processed data if it doesn't fit the given datatypes
                    if datatypes is not None and datatype not in datatypes:
                        continue
                    filter_samps = proc_data.samples.intersection(
                        study_sample_ids)
                    if filter_samps:
                        proc_data_samples[proc_data_id] = sorted(filter_samps)
                        study_proc_ids[study_id][datatype].append(proc_data_id)

            return study_proc_ids, proc_data_samples, samples_meta
Ejemplo n.º 14
0
 def get(self):
     # Format sel_data to get study IDs for the processed data
     sel_data = defaultdict(dict)
     proc_data_info = {}
     sel_samps = Analysis(self.current_user.default_analysis).samples
     for pid, samps in viewitems(sel_samps):
         proc_data = ProcessedData(pid)
         sel_data[proc_data.study][pid] = samps
         # Also get processed data info
         proc_data_info[pid] = proc_data.processing_info
         proc_data_info[pid]['data_type'] = proc_data.data_type()
     self.render("analysis_selected.html", sel_data=sel_data,
                 proc_info=proc_data_info)
Ejemplo n.º 15
0
def _insert_processed_data_target_gene(preprocessed_data, params,
                                       pick_otus_out, **kwargs):
    """Inserts the preprocessed data to the database

    Parameters
    ----------
    preprocessed_data : PreprocessedData
        The preprocessed_data to process
    params : ProcessedSortmernaParams
        The parameters to use for the processing
    pick_otus_out : str
        Path to the pick_closed_reference_otus.py output directory
    kwargs: ignored
        Necessary to include to support execution via moi.

    Raises
    ------
    ValueError
        If the processed output directory does not contain all the expected
        files
    """
    from os.path import exists, join, isdir
    from glob import glob
    from functools import partial
    from qiita_db.data import ProcessedData

    # The filepaths that we are interested in are:
    #   1) otu_table.biom -> the output OTU table
    #   2) sortmerna_picked_otus -> intermediate output of pick_otus.py
    #   3) log_20141217091339.log -> log file

    path_builder = partial(join, pick_otus_out)
    biom_fp = path_builder('otu_table.biom')
    otus_dp = path_builder('sortmerna_picked_otus')
    log_fp = glob(path_builder('log_*.txt'))[0]

    # Check that all the files exist
    if not (exists(biom_fp) and isdir(otus_dp) and exists(log_fp)):
        raise ValueError("The output directory %s does not contain all the "
                         "expected files." % pick_otus_out)

    filepaths = [(biom_fp, "biom"),
                 (otus_dp, "directory"),
                 (log_fp, "log")]

    ProcessedData.create(params._table, params.id, filepaths,
                         preprocessed_data=preprocessed_data)

    # Change the preprocessed_data status to processed
    preprocessed_data.processing_status = 'processed'
Ejemplo n.º 16
0
def _insert_processed_data_target_gene(preprocessed_data, params,
                                       pick_otus_out, **kwargs):
    """Inserts the preprocessed data to the database

    Parameters
    ----------
    preprocessed_data : PreprocessedData
        The preprocessed_data to process
    params : ProcessedSortmernaParams
        The parameters to use for the processing
    pick_otus_out : str
        Path to the pick_closed_reference_otus.py output directory
    kwargs: ignored
        Necessary to include to support execution via moi.

    Raises
    ------
    ValueError
        If the processed output directory does not contain all the expected
        files
    """
    from os.path import exists, join, isdir
    from glob import glob
    from functools import partial
    from qiita_db.data import ProcessedData

    # The filepaths that we are interested in are:
    #   1) otu_table.biom -> the output OTU table
    #   2) sortmerna_picked_otus -> intermediate output of pick_otus.py
    #   3) log_20141217091339.log -> log file

    path_builder = partial(join, pick_otus_out)
    biom_fp = path_builder('otu_table.biom')
    otus_dp = path_builder('sortmerna_picked_otus')
    log_fp = glob(path_builder('log_*.txt'))[0]

    # Check that all the files exist
    if not (exists(biom_fp) and isdir(otus_dp) and exists(log_fp)):
        raise ValueError("The output directory %s does not contain all the "
                         "expected files." % pick_otus_out)

    filepaths = [(biom_fp, "biom"), (otus_dp, "directory"), (log_fp, "log")]

    ProcessedData.create(params._table,
                         params.id,
                         filepaths,
                         preprocessed_data=preprocessed_data)

    # Change the preprocessed_data status to processed
    preprocessed_data.processing_status = 'processed'
Ejemplo n.º 17
0
    def test_status(self):
        rd = RawData(1)
        s = Study(1)
        self.assertEqual(rd.status(s), 'private')

        # Since the status is inferred from the processed data, change the
        # status of the processed data so we can check how it changes in the
        # preprocessed data
        pd = ProcessedData(1)
        pd.status = 'public'
        self.assertEqual(rd.status(s), 'public')

        # Check that new raw data has sandbox as status since no
        # processed data exists for them
        rd = RawData.create(self.filetype, self.studies, self.filepaths)
        self.assertEqual(rd.status(s), 'sandbox')
Ejemplo n.º 18
0
 def render(self, study):
     avail_pd = [(pd_id, ProcessedData(pd_id))
                 for pd_id in study.processed_data()]
     return self.render_string(
         "study_description_templates/processed_data_tab.html",
         available_processed_data=avail_pd,
         study_id=study.id)
Ejemplo n.º 19
0
 def test_link_filepaths_status_setter(self):
     pd = ProcessedData(1)
     self.assertEqual(pd.link_filepaths_status, 'idle')
     pd._set_link_filepaths_status('linking')
     self.assertEqual(pd.link_filepaths_status, 'linking')
     pd._set_link_filepaths_status('unlinking')
     self.assertEqual(pd.link_filepaths_status, 'unlinking')
     pd._set_link_filepaths_status('failed: error')
     self.assertEqual(pd.link_filepaths_status, 'failed: error')
Ejemplo n.º 20
0
    def test_status(self):
        ppd = PreprocessedData(1)
        self.assertEqual(ppd.status, 'private')

        # Since the status is inferred from the processed data, change the
        # status of the processed data so we can check how it changes in the
        # preprocessed data
        pd = ProcessedData(1)
        pd.status = 'public'
        self.assertEqual(ppd.status, 'public')

        # Check that new preprocessed data has sandbox as status since no
        # processed data exists for them
        ppd = PreprocessedData.create(self.study, self.params_table,
                                      self.params_id, self.filepaths,
                                      data_type="16S")
        self.assertEqual(ppd.status, 'sandbox')
Ejemplo n.º 21
0
    def test_status(self):
        pd = ProcessedData(1)
        self.assertEqual(pd.status, 'private')

        pd = ProcessedData.create(self.params_table, self.params_id,
                                  self.filepaths,
                                  preprocessed_data=self.preprocessed_data)
        self.assertEqual(pd.status, 'sandbox')
Ejemplo n.º 22
0
 def _selected_parser(self, analysis):
     """builds dictionaries of selected samples from analysis object"""
     selsamples = {}
     selproc_data = defaultdict(list)
     for proc_data_id, samps in viewitems(analysis.samples):
         study = ProcessedData(proc_data_id).study
         selproc_data[study].append(proc_data_id)
         selsamples[study] = set(samps)
     return selproc_data, selsamples
Ejemplo n.º 23
0
    def test_create_no_date(self):
        """Correctly adds a processed data with no date on it"""
        # All the other settings have been already tested on test_create
        # here we will only check that the code added a good date
        before = datetime.now()
        ProcessedData.create(self.params_table, self.params_id, self.filepaths,
                             preprocessed_data=self.preprocessed_data)
        after = datetime.now()
        obs = self.conn_handler.execute_fetchone(
            "SELECT processed_date FROM qiita.processed_data WHERE "
            "processed_data_id=2")[0]

        # Make sure that we clean up the environment
        exp_biom_fp = join(self.db_test_pd_dir,
                           "2_%s" % basename(self.biom_fp))
        self._clean_up_files.append(exp_biom_fp)

        self.assertTrue(before <= obs <= after)
Ejemplo n.º 24
0
    def test_create_no_date(self):
        """Correctly adds a processed data with no date on it"""
        # All the other settings have been already tested on test_create
        # here we will only check that the code added a good date
        before = datetime.now()
        ProcessedData.create(self.params_table, self.params_id, self.filepaths,
                             preprocessed_data=self.preprocessed_data)
        after = datetime.now()
        obs = self.conn_handler.execute_fetchone(
            "SELECT processed_date FROM qiita.processed_data WHERE "
            "processed_data_id=2")[0]

        # Make sure that we clean up the environment
        exp_biom_fp = join(self.db_test_pd_dir,
                           "2_%s" % basename(self.biom_fp))
        self._clean_up_files.append(exp_biom_fp)

        self.assertTrue(before <= obs <= after)
Ejemplo n.º 25
0
    def test_create(self):
        """Correctly creates all the rows in the DB for the processed data"""
        # Check that the returned object has the correct id
        obs = ProcessedData.create(self.params_table, self.params_id,
                                   self.filepaths,
                                   preprocessed_data=self.preprocessed_data,
                                   processed_date=self.date)
        self.assertEqual(obs.id, 2)

        # Check that the processed data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.processed_data WHERE processed_data_id=2")
        # processed_data_id, processed_params_table, processed_params_id,
        # processed_date, data_type_id, link_filepaths_status,
        # processed_data_status_id
        exp = [[2, "processed_params_uclust", 1, self.date, 2, 'idle', 4]]
        self.assertEqual(obs, exp)

        # Check that the files have been copied to right location
        exp_biom_fp = join(self.db_test_pd_dir,
                           "2_%s" % basename(self.biom_fp))
        self.assertTrue(exists(exp_biom_fp))
        self._clean_up_files.append(exp_biom_fp)

        # Check that the filepaths have been correctly added to the DB
        obs_id = self.conn_handler.execute_fetchone(
            "SELECT count(1) from qiita.filepath")[0]
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=%d" % obs_id)
        exp_biom_fp = "2_%s" % basename(self.biom_fp)
        # Filepath_id, path, filepath_type_id
        exp = [[obs_id, exp_biom_fp, 6, '852952723', 1, 4]]
        self.assertEqual(obs, exp)

        # Check that the processed data have been correctly linked
        # with the fileapths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.processed_filepath WHERE processed_data_id=2")
        # processed_data_id, filepath_id
        self.assertEqual(obs, [[2, obs_id]])

        # Check that the processed data have been correctly linked with the
        # study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_processed_data WHERE "
            "processed_data_id=2")
        # study_id, processed_data
        self.assertEqual(obs, [[1, 2]])

        # Check that the processed data have been correctly linked with the
        # preprocessed data
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_processed_data WHERE "
            "processed_data_id=2")
        # preprocessed_data_id, processed_Data_id
        self.assertEqual(obs, [[1, 2]])
Ejemplo n.º 26
0
    def make_public(self, study, user, callback):
        """Makes the current study public

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        pd_id = int(self.get_argument('pd_id'))
        pd = ProcessedData(pd_id)
        pd.status = 'public'
        msg = "Processed data set to public"
        msg_level = "success"
        callback((msg, msg_level, "processed_data_tab", pd_id, None))
Ejemplo n.º 27
0
    def make_sandbox(self, study, user, callback):
        """Reverts the current study to the 'sandbox' status

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        pd_id = int(self.get_argument('pd_id'))
        pd = ProcessedData(pd_id)
        pd.status = 'sandbox'
        msg = "Processed data reverted to sandbox"
        msg_level = "success"
        callback((msg, msg_level, "processed_data_tab", pd_id, None))
Ejemplo n.º 28
0
    def request_approval(self, study, user, callback):
        """Changes the status of the current study to "awaiting_approval"

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        pd_id = int(self.get_argument('pd_id'))
        pd = ProcessedData(pd_id)
        pd.status = 'awaiting_approval'
        msg = "Processed data sent to admin for approval"
        msg_level = "success"
        callback((msg, msg_level, "processed_data_tab", pd_id, None))
Ejemplo n.º 29
0
    def test_create(self):
        """Correctly creates all the rows in the DB for the processed data"""
        # Check that the returned object has the correct id
        obs = ProcessedData.create(self.params_table, self.params_id,
                                   self.filepaths,
                                   preprocessed_data=self.preprocessed_data,
                                   processed_date=self.date)
        self.assertEqual(obs.id, 2)

        # Check that the processed data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.processed_data WHERE processed_data_id=2")
        # processed_data_id, processed_params_table, processed_params_id,
        # processed_date, data_type_id, link_filepaths_status
        exp = [[2, "processed_params_uclust", 1, self.date, 2, 'idle']]
        self.assertEqual(obs, exp)

        # Check that the files have been copied to right location
        exp_biom_fp = join(self.db_test_pd_dir,
                           "2_%s" % basename(self.biom_fp))
        self.assertTrue(exists(exp_biom_fp))
        self._clean_up_files.append(exp_biom_fp)

        # Check that the filepaths have been correctly added to the DB
        obs_id = self.conn_handler.execute_fetchone(
            "SELECT count(1) from qiita.filepath")[0]
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=%d" % obs_id)
        exp_biom_fp = "2_%s" % basename(self.biom_fp)
        # Filepath_id, path, filepath_type_id
        exp = [[obs_id, exp_biom_fp, 6, '852952723', 1, 4]]
        self.assertEqual(obs, exp)

        # Check that the processed data have been correctly linked
        # with the fileapths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.processed_filepath WHERE processed_data_id=2")
        # processed_data_id, filepath_id
        self.assertEqual(obs, [[2, obs_id]])

        # Check that the processed data have been correctly linked with the
        # study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_processed_data WHERE "
            "processed_data_id=2")
        # study_id, processed_data
        self.assertEqual(obs, [[1, 2]])

        # Check that the processed data have been correctly linked with the
        # preprocessed data
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_processed_data WHERE "
            "processed_data_id=2")
        # preprocessed_data_id, processed_Data_id
        self.assertEqual(obs, [[1, 2]])
Ejemplo n.º 30
0
 def test_link_filepaths_status_setter(self):
     pd = ProcessedData(1)
     self.assertEqual(pd.link_filepaths_status, 'idle')
     pd._set_link_filepaths_status('linking')
     self.assertEqual(pd.link_filepaths_status, 'linking')
     pd._set_link_filepaths_status('unlinking')
     self.assertEqual(pd.link_filepaths_status, 'unlinking')
     pd._set_link_filepaths_status('failed: error')
     self.assertEqual(pd.link_filepaths_status, 'failed: error')
Ejemplo n.º 31
0
    def get(self, analysis_id):
        user = self.current_user
        analysis_id = int(analysis_id.split("/")[0])
        analysis = Analysis(analysis_id)
        check_analysis_access(User(user), analysis)

        jobres = defaultdict(list)
        for job in analysis.jobs:
            jobject = Job(job)
            jobres[jobject.datatype].append((jobject.command[0],
                                             jobject.results))

        dropped = {}
        for proc_data_id, samples in viewitems(analysis.dropped_samples):
            proc_data = ProcessedData(proc_data_id)
            key = "Data type %s, Study: %s" % (proc_data.data_type(),
                                               proc_data.study)
            dropped[key] = samples

        self.render("analysis_results.html", user=self.current_user,
                    jobres=jobres, aname=analysis.name, dropped=dropped,
                    basefolder=get_db_files_base_dir())
Ejemplo n.º 32
0
    def get(self):
        user = self.current_user
        if user.level != 'admin':
            raise HTTPError(403, 'User %s is not admin' % self.current_user)

        result_generator = viewitems(
            ProcessedData.get_by_status_grouped_by_study('awaiting_approval'))
        study_generator = ((Study(sid), pds) for sid, pds in result_generator)
        parsed_studies = [(s.id, s.title, s.owner, pds)
                          for s, pds in study_generator]

        self.render('admin_approval.html',
                    study_info=parsed_studies)
Ejemplo n.º 33
0
    def approve_study(self, study, user, callback):
        """Approves the current study if and only if the current user is admin

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done
        """
        if _approve(user.level):
            pd_id = int(self.get_argument("pd_id"))
            pd = ProcessedData(pd_id)
            pd.status = "private"
            msg = "Processed data approved"
            msg_level = "success"
        else:
            msg = "The current user does not have permission to approve " "the processed data"
            msg_level = "danger"
        callback((msg, msg_level, "processed_data_tab", pd_id, None))
Ejemplo n.º 34
0
 def test_create_params_table_error(self):
     """Raises an error if the processed_params_table does not exist"""
     with self.assertRaises(IncompetentQiitaDeveloperError):
         ProcessedData.create("foo", self.params_id, self.filepaths,
                              preprocessed_data=self.preprocessed_data)
     with self.assertRaises(IncompetentQiitaDeveloperError):
         ProcessedData.create("processed_params_foo", self.params_id,
                              self.filepaths,
                              preprocessed_data=self.preprocessed_data)
     with self.assertRaises(IncompetentQiitaDeveloperError):
         ProcessedData.create("processed_params_", self.params_id,
                              self.filepaths,
                              preprocessed_data=self.preprocessed_data)
Ejemplo n.º 35
0
    def test_create_w_study(self):
        """Correctly adds a processed data passing a study"""
        obs = ProcessedData.create(self.params_table,
                                   self.params_id,
                                   self.filepaths,
                                   study=Study(1),
                                   processed_date=self.date,
                                   data_type="18S")

        # Check that the processed data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.processed_data WHERE processed_data_id=2")
        # processed_data_id, preprocessed_data_id, processed_params_table,
        # processed_params_id, processed_date
        exp = [[2, "processed_params_uclust", 1, self.date, 2]]
        self.assertEqual(obs, exp)

        # Check that the files have been copied to right location
        exp_biom_fp = join(self.db_test_pd_dir,
                           "2_%s" % basename(self.biom_fp))
        self.assertTrue(exists(exp_biom_fp))
        self._clean_up_files.append(exp_biom_fp)

        # Check that the filepaths have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=15")
        # Filepath_id, path, filepath_type_id
        exp = [[15, exp_biom_fp, 6, '852952723', 1]]
        self.assertEqual(obs, exp)

        # Check that the processed data have been correctly linked
        # with the fileapths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.processed_filepath WHERE processed_data_id=2")
        # processed_data_id, filepath_id
        self.assertTrue(obs, [[2, 10]])

        # Check that the processed data have been correctly linked with the
        # study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_processed_data WHERE "
            "processed_data_id=2")
        # study_id, processed_data
        self.assertEqual(obs, [[1, 2]])
Ejemplo n.º 36
0
 def test_create_params_table_error(self):
     """Raises an error if the processed_params_table does not exist"""
     with self.assertRaises(IncompetentQiitaDeveloperError):
         ProcessedData.create("foo", self.params_id, self.filepaths,
                              preprocessed_data=self.preprocessed_data)
     with self.assertRaises(IncompetentQiitaDeveloperError):
         ProcessedData.create("processed_params_foo", self.params_id,
                              self.filepaths,
                              preprocessed_data=self.preprocessed_data)
     with self.assertRaises(IncompetentQiitaDeveloperError):
         ProcessedData.create("processed_params_", self.params_id,
                              self.filepaths,
                              preprocessed_data=self.preprocessed_data)
Ejemplo n.º 37
0
    def test_create_w_study(self):
        """Correctly adds a processed data passing a study"""
        obs = ProcessedData.create(self.params_table, self.params_id,
                                   self.filepaths, study=Study(1),
                                   processed_date=self.date, data_type="18S")

        # Check that the processed data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.processed_data WHERE processed_data_id=2")
        # processed_data_id, processed_params_table, processed_params_id,
        # processed_date, data_type_id, link_filepaths_status
        exp = [[2, "processed_params_uclust", 1, self.date, 2, 'idle']]
        self.assertEqual(obs, exp)

        # Check that the files have been copied to right location
        exp_biom_fp = join(self.db_test_pd_dir,
                           "2_%s" % basename(self.biom_fp))
        self.assertTrue(exists(exp_biom_fp))
        self._clean_up_files.append(exp_biom_fp)

        # Check that the filepaths have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=17")
        exp_biom_fp = "2_%s" % basename(self.biom_fp)
        # Filepath_id, path, filepath_type_id
        exp = [[17, exp_biom_fp, 6, '852952723', 1, 4]]
        self.assertEqual(obs, exp)

        # Check that the processed data have been correctly linked
        # with the fileapths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.processed_filepath WHERE processed_data_id=2")
        # processed_data_id, filepath_id
        self.assertTrue(obs, [[2, 10]])

        # Check that the processed data have been correctly linked with the
        # study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_processed_data WHERE "
            "processed_data_id=2")
        # study_id, processed_data
        self.assertEqual(obs, [[1, 2]])
Ejemplo n.º 38
0
    def test_get_by_status(self):
        pds = ProcessedData.get_by_status('sandbox')
        self.assertEqual(pds, set())

        pds = ProcessedData.get_by_status('private')
        self.assertEqual(pds, set([1]))

        ProcessedData.create(self.params_table, self.params_id,
                             self.filepaths,
                             preprocessed_data=self.preprocessed_data)
        pds = ProcessedData.get_by_status('sandbox')
        self.assertEqual(pds, set([2]))

        pds = ProcessedData.get_by_status('private')
        self.assertEqual(pds, set([1]))
Ejemplo n.º 39
0
 def test_get_filepath_ids(self):
     pd = ProcessedData(1)
     self.assertEqual(pd.get_filepath_ids(), [11])
Ejemplo n.º 40
0
 def test_link_filepaths_status(self):
     pd = ProcessedData(1)
     self.assertEqual(pd.link_filepaths_status, 'idle')
Ejemplo n.º 41
0
 def test_data_type_id(self):
     pd = ProcessedData(1)
     self.assertEqual(pd.data_type(ret_id=True), 2)
Ejemplo n.º 42
0
 def test_data_type(self):
     pd = ProcessedData(1)
     self.assertEqual(pd.data_type(), "18S")
Ejemplo n.º 43
0
 def test_get_filepath_ids(self):
     pd = ProcessedData(1)
     self.assertEqual(pd.get_filepath_ids(), [10])
Ejemplo n.º 44
0
 def test_data_type(self):
     """Correctly returns the data_type of preprocessed_data"""
     pd = ProcessedData(1)
     self.assertEqual(pd.data_type(), "18S")
Ejemplo n.º 45
0
 def test_data_type_id(self):
     """Correctly returns the data_type of preprocessed_data"""
     pd = ProcessedData(1)
     self.assertEqual(pd.data_type(ret_id=True), 2)
Ejemplo n.º 46
0
 def test_data_type(self):
     pd = ProcessedData(1)
     self.assertEqual(pd.data_type(), "18S")
Ejemplo n.º 47
0
 def test_preprocessed_data(self):
     """Correctly returns the preprocessed_data"""
     pd = ProcessedData(1)
     self.assertEqual(pd.preprocessed_data, 1)
Ejemplo n.º 48
0
 def test_data_type_id(self):
     pd = ProcessedData(1)
     self.assertEqual(pd.data_type(ret_id=True), 2)
Ejemplo n.º 49
0
 def test_processed_date(self):
     pd = ProcessedData(1)
     self.assertEqual(pd.processed_date, datetime(2012, 10, 1, 9, 30, 27))
Ejemplo n.º 50
0
def _build_study_info(user, study_proc=None, proc_samples=None):
    """Builds list of dicts for studies table, with all HTML formatted

    Parameters
    ----------
    user : User object
        logged in user
    study_proc : dict of lists, optional
        Dictionary keyed on study_id that lists all processed data associated
        with that study. Required if proc_samples given.
    proc_samples : dict of lists, optional
        Dictionary keyed on proc_data_id that lists all samples associated with
        that processed data. Required if study_proc given.

    Returns
    -------
    infolist: list of dict of lists and dicts
        study and processed data info for JSON serialiation for datatables
        Each dict in the list is a single study, and contains the text

    Notes
    -----
    Both study_proc and proc_samples must be passed, or neither passed.
    """
    build_samples = False
    # Logic check to make sure both needed parts passed
    if study_proc is not None and proc_samples is None:
        raise IncompetentQiitaDeveloperError(
            'Must pass proc_samples when study_proc given')
    elif proc_samples is not None and study_proc is None:
        raise IncompetentQiitaDeveloperError(
            'Must pass study_proc when proc_samples given')
    elif study_proc is None:
        build_samples = True

    # get list of studies for table
    study_set = user.user_studies.union(
        Study.get_by_status('public')).union(user.shared_studies)
    if study_proc is not None:
        study_set = study_set.intersection(study_proc)
    if not study_set:
        # No studies left so no need to continue
        return []

    # get info for the studies
    cols = ['study_id', 'email', 'principal_investigator_id',
            'pmid', 'study_title', 'metadata_complete',
            'number_samples_collected', 'study_abstract']
    study_info = Study.get_info(study_set, cols)

    infolist = []
    for info in study_info:
        # Convert DictCursor to proper dict
        info = dict(info)
        study = Study(info['study_id'])
        # Build the processed data info for the study if none passed
        if build_samples:
            proc_data_list = study.processed_data()
            proc_samples = {}
            study_proc = {study.id: defaultdict(list)}
            for pid in proc_data_list:
                proc_data = ProcessedData(pid)
                study_proc[study.id][proc_data.data_type()].append(pid)
                proc_samples[pid] = proc_data.samples

        study_info = _build_single_study_info(study, info, study_proc,
                                              proc_samples)
        infolist.append(study_info)
    return infolist
Ejemplo n.º 51
0
 def test_create_no_preprocessed_no_study_error(self):
     with self.assertRaises(IncompetentQiitaDeveloperError):
         ProcessedData.create(self.params_table, self.params_id,
                              self.filepaths)
Ejemplo n.º 52
0
    def test_retrieve_dropped_samples(self):
        # Create and populate second study to do test with
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 25,
            "number_samples_promised": 28,
            "portal_type_id": 3,
            "study_alias": "FCM",
            "study_description": "Microbiome of people who eat nothing but "
                                 "fried chicken",
            "study_abstract": "Exploring how a high fat diet changes the "
                              "gut microbiome",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        metadata_dict = {
            'SKB8.640193': {'physical_location': 'location1',
                            'has_physical_specimen': True,
                            'has_extracted_data': True,
                            'sample_type': 'type1',
                            'required_sample_info_status': 'received',
                            'collection_timestamp':
                            datetime(2014, 5, 29, 12, 24, 51),
                            'host_subject_id': 'NotIdentified',
                            'Description': 'Test Sample 1',
                            'str_column': 'Value for sample 1',
                            'latitude': 42.42,
                            'longitude': 41.41},
            'SKD8.640184': {'physical_location': 'location1',
                            'has_physical_specimen': True,
                            'has_extracted_data': True,
                            'sample_type': 'type1',
                            'required_sample_info_status': 'received',
                            'collection_timestamp':
                            datetime(2014, 5, 29, 12, 24, 51),
                            'host_subject_id': 'NotIdentified',
                            'Description': 'Test Sample 2',
                            'str_column': 'Value for sample 2',
                            'latitude': 4.2,
                            'longitude': 1.1},
            'SKB7.640196': {'physical_location': 'location1',
                            'has_physical_specimen': True,
                            'has_extracted_data': True,
                            'sample_type': 'type1',
                            'required_sample_info_status': 'received',
                            'collection_timestamp':
                            datetime(2014, 5, 29, 12, 24, 51),
                            'host_subject_id': 'NotIdentified',
                            'Description': 'Test Sample 3',
                            'str_column': 'Value for sample 3',
                            'latitude': 4.8,
                            'longitude': 4.41},
            }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')

        Study.create(User("*****@*****.**"), "Test study 2", [1], info)

        SampleTemplate.create(metadata, Study(2))

        mp = get_mountpoint("processed_data")[0][1]
        study_fp = join(mp, "2_study_1001_closed_reference_otu_table.biom")
        ProcessedData.create("processed_params_uclust", 1, [(study_fp, 6)],
                             study=Study(2), data_type="16S")
        self.conn_handler.execute(
            "INSERT INTO qiita.analysis_sample (analysis_id, "
            "processed_data_id, sample_id) VALUES "
            "(1,2,'2.SKB8.640193'), (1,2,'2.SKD8.640184'), "
            "(1,2,'2.SKB7.640196')")

        samples = {1: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'],
                   2: ['2.SKB8.640193', '2.SKD8.640184']}
        self.analysis._build_biom_tables(samples, 10000,
                                         conn_handler=self.conn_handler)
        exp = {1: {'1.SKM4.640180', '1.SKM9.640192'},
               2: {'2.SKB7.640196'}}
        self.assertEqual(self.analysis.dropped_samples, exp)
Ejemplo n.º 53
0
    def test_retrieve_dropped_samples(self):
        # Create and populate second study to do test with
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 25,
            "number_samples_promised": 28,
            "portal_type_id": 3,
            "study_alias": "FCM",
            "study_description": "Microbiome of people who eat nothing but "
            "fried chicken",
            "study_abstract": "Exploring how a high fat diet changes the "
            "gut microbiome",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        metadata_dict = {
            'SKB8.640193': {
                'physical_location': 'location1',
                'has_physical_specimen': True,
                'has_extracted_data': True,
                'sample_type': 'type1',
                'required_sample_info_status': 'received',
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 1',
                'str_column': 'Value for sample 1',
                'latitude': 42.42,
                'longitude': 41.41
            },
            'SKD8.640184': {
                'physical_location': 'location1',
                'has_physical_specimen': True,
                'has_extracted_data': True,
                'sample_type': 'type1',
                'required_sample_info_status': 'received',
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 2',
                'str_column': 'Value for sample 2',
                'latitude': 4.2,
                'longitude': 1.1
            },
            'SKB7.640196': {
                'physical_location': 'location1',
                'has_physical_specimen': True,
                'has_extracted_data': True,
                'sample_type': 'type1',
                'required_sample_info_status': 'received',
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 3',
                'str_column': 'Value for sample 3',
                'latitude': 4.8,
                'longitude': 4.41
            },
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')

        Study.create(User("*****@*****.**"), "Test study 2", [1], info)

        SampleTemplate.create(metadata, Study(2))

        mp = get_mountpoint("processed_data")[0][1]
        study_fp = join(mp, "2_study_1001_closed_reference_otu_table.biom")
        ProcessedData.create("processed_params_uclust",
                             1, [(study_fp, 6)],
                             study=Study(2),
                             data_type="16S")
        self.conn_handler.execute(
            "INSERT INTO qiita.analysis_sample (analysis_id, "
            "processed_data_id, sample_id) VALUES "
            "(1,2,'2.SKB8.640193'), (1,2,'2.SKD8.640184'), "
            "(1,2,'2.SKB7.640196')")

        samples = {
            1: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'],
            2: ['2.SKB8.640193', '2.SKD8.640184']
        }
        self.analysis._build_biom_tables(samples,
                                         10000,
                                         conn_handler=self.conn_handler)
        exp = {1: {'1.SKM4.640180', '1.SKM9.640192'}, 2: {'2.SKB7.640196'}}
        self.assertEqual(self.analysis.dropped_samples, exp)