Exemple #1
0
    def post(self, analysis_id):
        command_args = self.get_arguments("commands")
        split = [x.split("#") for x in command_args]
        analysis = Analysis(analysis_id)

        commands = []
        # HARD CODED HACKY THING FOR DEMO, FIX  Issue #164
        fp, mapping_file = mkstemp(suffix="_map_file.txt")
        close(fp)
        SampleTemplate(1).to_file(mapping_file)
        study_fps = {}
        for pd in Study(1).processed_data:
            processed = ProcessedData(pd)
            study_fps[processed.data_type] = processed.get_filepaths()[0][0]
        for data_type, command in split:
            opts = {
                "--otu_table_fp": study_fps[data_type],
                "--mapping_fp": mapping_file
            }
            if command == "Beta Diversity" and data_type in {'16S', '18S'}:
                opts["--tree_fp"] = join(get_db_files_base_dir(), "reference",
                                         "gg_97_otus_4feb2011.tre")
            elif command == "Beta Diversity":
                opts["--parameter_fp"] = join(get_db_files_base_dir(),
                                              "reference", "params_qiime.txt")
            Job.create(data_type, command, opts, analysis)
            commands.append("%s: %s" % (data_type, command))
        user = self.get_current_user()
        self.render("analysis_waiting.html", user=user,
                    aid=analysis_id, aname=analysis.name,
                    commands=commands)
        # fire off analysis run here
        # currently synch run so redirect done here. Will remove after demo
        run_analysis(user, analysis)
Exemple #2
0
    def test_create(self):
        """Makes sure creation works as expected"""
        # make first job
        new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1))
        self.assertEqual(new.id, 4)
        # make sure job inserted correctly
        obs = self.conn_handler.execute_fetchall("SELECT * FROM qiita.job "
                                                 "WHERE job_id = 4")
        exp = [[4, 2, 1, 3, '{"opt1":4}', None]]
        self.assertEqual(obs, exp)
        # make sure job added to analysis correctly
        obs = self.conn_handler.execute_fetchall("SELECT * FROM "
                                                 "qiita.analysis_job WHERE "
                                                 "job_id = 4")
        exp = [[1, 4]]
        self.assertEqual(obs, exp)

        # make second job with diff datatype and command to test column insert
        new = Job.create("16S", "Beta Diversity", {"opt1": 4}, Analysis(1))
        self.assertEqual(new.id, 5)
        # make sure job inserted correctly
        obs = self.conn_handler.execute_fetchall("SELECT * FROM qiita.job "
                                                 "WHERE job_id = 5")
        exp = [[5, 1, 1, 2, '{"opt1":4}', None]]
        self.assertEqual(obs, exp)
        # make sure job added to analysis correctly
        obs = self.conn_handler.execute_fetchall("SELECT * FROM "
                                                 "qiita.analysis_job WHERE "
                                                 "job_id = 5")
        exp = [[1, 5]]
        self.assertEqual(obs, exp)
Exemple #3
0
    def test_delete_files(self):
        try:
            Job.delete(1)
            with self.assertRaises(QiitaDBUnknownIDError):
                Job(1)

            obs = self.conn_handler.execute_fetchall(
                "SELECT * FROM qiita.filepath WHERE filepath_id = 12 OR "
                "filepath_id = 19")
            self.assertEqual(obs, [])

            obs = self.conn_handler.execute_fetchall(
                "SELECT * FROM qiita.job_results_filepath WHERE job_id = 1")
            self.assertEqual(obs, [])

            obs = self.conn_handler.execute_fetchall(
                "SELECT * FROM qiita.analysis_job WHERE job_id = 1")
            self.assertEqual(obs, [])

            self.assertFalse(exists(join(self._job_folder,
                             "1_job_result.txt")))
        finally:
            f = join(self._job_folder, "1_job_result.txt")
            if not exists(f):
                with open(f, 'w') as f:
                    f.write("job1result.txt")
Exemple #4
0
def _build_analysis_files(analysis, r_depth=None):
    """Creates the biom tables and mapping file, then adds to jobs

    Parameters
    ----------
    analysis : Analysis object
        The analysis to build files for
    r_depth : int, optional
        Rarefaction depth for biom table creation. Default None
    """
    # create the biom tables and add jobs to the analysis
    analysis.status = "running"
    analysis.build_files(r_depth)
    mapping_file = analysis.mapping_file
    biom_tables = analysis.biom_tables

    # add files to existing jobs
    for job_id in analysis.jobs:
        job = Job(job_id)
        if job.status == 'queued':
            opts = {
                "--otu_table_fp": biom_tables[job.datatype],
                "--mapping_fp": mapping_file
            }
            job_opts = job.options
            job_opts.update(opts)
            job.options = job_opts
def _build_analysis_files(analysis, r_depth=None, **kwargs):
    """Creates the biom tables and mapping file, then adds to jobs

    Parameters
    ----------
    analysis : Analysis object
        The analysis to build files for
    r_depth : int, optional
        Rarefaction depth for biom table creation. Default None
    """
    # create the biom tables and add jobs to the analysis
    analysis.status = "running"
    analysis.build_files(r_depth)
    mapping_file = analysis.mapping_file
    biom_tables = analysis.biom_tables

    # add files to existing jobs
    for job_id in analysis.jobs:
        job = Job(job_id)
        if job.status == 'queued':
            opts = {
                "--otu_table_fp": biom_tables[job.datatype],
                "--mapping_fp": mapping_file
            }
            job_opts = job.options
            job_opts.update(opts)
            job.options = job_opts
Exemple #6
0
    def test_delete_folders(self):
        try:
            Job.delete(2)
            with self.assertRaises(QiitaDBUnknownIDError):
                Job(2)

            obs = self.conn_handler.execute_fetchall(
                "SELECT * FROM qiita.filepath WHERE filepath_id = 13")
            self.assertEqual(obs, [])

            obs = self.conn_handler.execute_fetchall(
                "SELECT * FROM qiita.job_results_filepath WHERE job_id = 2")
            self.assertEqual(obs, [])

            obs = self.conn_handler.execute_fetchall(
                "SELECT * FROM qiita.analysis_job WHERE job_id = 2")
            self.assertEqual(obs, [])

            self.assertFalse(exists(join(self._job_folder, "2_test_folder")))
        finally:
            # put the test data back
            basedir = self._job_folder
            if not exists(join(basedir, "2_test_folder")):
                mkdir(join(basedir, "2_test_folder"))
                mkdir(join(basedir, "2_test_folder", "subdir"))
                with open(join(basedir, "2_test_folder",
                               "testfile.txt"), 'w') as f:
                    f.write("DATA")
                with open(join(basedir, "2_test_folder",
                               "testres.htm"), 'w') as f:
                    f.write("DATA")
                with open(join(basedir, "2_test_folder",
                               "subdir", "subres.html"), 'w') as f:
                    f.write("DATA")
Exemple #7
0
 def test_create_exists(self):
     """Makes sure creation doesn't duplicate a job"""
     with self.assertRaises(QiitaDBDuplicateError):
         Job.create("18S", "Beta Diversity", {
             "--otu_table_fp": 1,
             "--mapping_fp": 1
         }, Analysis(1))
Exemple #8
0
    def _failure_callback(self, msg=None):
        """Executed if something fails"""
        # set the analysis to errored
        self.analysis.status = 'error'

        if self._update_status is not None:
            self._update_status("Failed")

        # set any jobs to errored if they didn't execute
        for job_id in self.analysis.jobs:
            job = Job(job_id)
            if job.status not in {'error', 'completed'}:
                job.status = 'error'

        LogEntry.create('Runtime', msg, info={'analysis': self.analysis.id})
    def _failure_callback(self, msg=None):
        """Executed if something fails"""
        # set the analysis to errored
        self.analysis.status = 'error'

        if self._update_status is not None:
            self._update_status("Failed")

        # set any jobs to errored if they didn't execute
        for job_id in self.analysis.jobs:
            job = Job(job_id)
            if job.status not in {'error', 'completed'}:
                job.status = 'error'

        LogEntry.create('Runtime', msg, info={'analysis': self.analysis.id})
 def test_add_jobs_in_construct_job_graphs(self):
     analysis = Analysis(2)
     RunAnalysis()._construct_job_graph(
         analysis, [('18S', 'Summarize Taxa')],
         comm_opts={'Summarize Taxa': {'opt1': 5}})
     self.assertEqual(analysis.jobs, [Job(3), Job(4)])
     job = Job(4)
     self.assertEqual(job.datatype, '18S')
     self.assertEqual(job.command,
                      ['Summarize Taxa', 'summarize_taxa_through_plots.py'])
     expopts = {
         '--output_dir': join(
             get_db_files_base_dir(), 'job',
             '4_summarize_taxa_through_plots.py_output_dir'),
         'opt1': 5}
     self.assertEqual(job.options, expopts)
Exemple #11
0
 def test_set_options(self):
     new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1))
     new.options = self.options
     self.options['--output_dir'] = join(get_db_files_base_dir(),
                                         'job/4_alpha_rarefaction.'
                                         'py_output_dir')
     self.assertEqual(new.options, self.options)
Exemple #12
0
 def test_set_options(self):
     new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1))
     new.options = self.options
     self.options['--output_dir'] = join(self._job_folder,
                                         '4_alpha_rarefaction.'
                                         'py_output_dir')
     self.assertEqual(new.options, self.options)
Exemple #13
0
 def test_exists_return_jobid(self):
     """tests that existing job returns true"""
     # need to insert matching sample data into analysis 2
     self.conn_handler.execute(
         "DELETE FROM qiita.analysis_sample WHERE analysis_id = 2")
     self.conn_handler.execute(
         "INSERT INTO qiita.analysis_sample "
         "(analysis_id, processed_data_id, sample_id) VALUES "
         "(2, 1,'1.SKB8.640193'), (2, 1,'1.SKD8.640184'), "
         "(2, 1,'1.SKB7.640196'), (2, 1,'1.SKM9.640192'), "
         "(2, 1,'1.SKM4.640180')")
     exists, jid = Job.exists("18S", "Beta Diversity",
                              {"--otu_table_fp": 1, "--mapping_fp": 1},
                              Analysis(1), return_existing=True)
     self.assertTrue(exists)
     self.assertEqual(jid, Job(2))
Exemple #14
0
    def get(self, analysis_id):
        analysis_id = int(analysis_id.split("/")[0])
        analysis = Analysis(analysis_id)
        check_analysis_access(self.current_user, analysis)

        jobres = defaultdict(list)
        for job in analysis.jobs:
            jobject = Job(job)
            jobres[jobject.datatype].append(
                (jobject.command[0], jobject.results))

        dropped = {}
        dropped_samples = analysis.dropped_samples
        if dropped_samples:
            for proc_data_id, samples in viewitems(dropped_samples):
                proc_data = ProcessedData(proc_data_id)
                key = "Data type %s, Study: %s" % (proc_data.data_type(),
                                                   proc_data.study)
                dropped[key] = samples

        self.render("analysis_results.html",
                    jobres=jobres,
                    aname=analysis.name,
                    dropped=dropped,
                    basefolder=get_db_files_base_dir())
Exemple #15
0
 def test_set_options(self):
     new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1))
     new.options = self.options
     self.options['--output_dir'] = join(
         get_db_files_base_dir(), 'job/4_alpha_rarefaction.'
         'py_output_dir')
     self.assertEqual(new.options, self.options)
Exemple #16
0
    def get(self, analysis_id):
        user = self.current_user
        analysis_id = int(analysis_id)
        check_analysis_access(User(user), analysis_id)

        analysis = Analysis(analysis_id)
        jobres = defaultdict(list)
        for job in analysis.jobs:
            jobject = Job(job)
            jobres[jobject.datatype].append(
                (jobject.command[0], jobject.results))

        dropped = {}
        for proc_data_id, samples in viewitems(analysis.dropped_samples):
            proc_data = ProcessedData(proc_data_id)
            key = "Data type %s, Study: %s" % (proc_data.data_type(),
                                               proc_data.study)
            dropped[key] = samples

        self.render("analysis_results.html",
                    user=self.current_user,
                    jobres=jobres,
                    aname=analysis.name,
                    dropped=dropped,
                    basefolder=get_db_files_base_dir())

        # wipe out cached messages for this analysis
        r_server = Redis()
        key = '%s:messages' % self.current_user
        oldmessages = r_server.lrange(key, 0, -1)
        if oldmessages is not None:
            for message in oldmessages:
                if '"analysis": %d' % analysis_id in message:
                    r_server.lrem(key, message, 1)
Exemple #17
0
def _finish_analysis(user, analysis):
    """Checks job statuses and finalized analysis and redis communication

    Parameters
    ----------
    user : str
        user running this analysis.
    analysis: Analysis object
        Analysis to finalize.
    """
    from qiita_ware import r_server
    # check job exit statuses for analysis result status
    all_good = True
    for job_id in analysis.jobs:
        if Job(job_id).status == "error":
            all_good = False
            break

    # set final analysis status
    if all_good:
        analysis.status = "completed"
    else:
        analysis.status = "error"

    # send websockets message that we are done running all jobs
    msg = {"msg": "allcomplete", "analysis": analysis.id}
    r_server.rpush(user + ":messages", dumps(msg))
    r_server.publish(user, dumps(msg))
Exemple #18
0
 def test_exists_noexist_return_jobid(self):
     """tests that non-existant job with bad samples returns false"""
     exists, jid = Job.exists(
         "16S", "Beta Diversity",
         {"--otu_table_fp": 1, "--mapping_fp": 27}, Analysis(1),
         return_existing=True)
     self.assertFalse(exists)
     self.assertEqual(jid, None)
Exemple #19
0
 def test_create_exists_return_existing(self):
     """Makes sure creation doesn't duplicate a job by returning existing"""
     Analysis.create(User("*****@*****.**"), "new", "desc")
     self.conn_handler.execute(
         "INSERT INTO qiita.analysis_sample (analysis_id, "
         "processed_data_id, sample_id) VALUES (3,1,'SKB8.640193'), "
         "(3,1,'SKD8.640184'), (3,1,'SKB7.640196'), (3,1,'SKM9.640192'),"
         "(3,1,'SKM4.640180')")
     new = Job.create("18S", "Beta Diversity",
                      {"--otu_table_fp": 1, "--mapping_fp": 1},
                      Analysis(3), return_existing=True)
     self.assertEqual(new.id, 2)
Exemple #20
0
 def test_exists(self):
     # need to insert matching sample data into analysis 2
     self.conn_handler.execute(
         "DELETE FROM qiita.analysis_sample WHERE analysis_id = 2")
     self.conn_handler.execute(
         "INSERT INTO qiita.analysis_sample (analysis_id, "
         "processed_data_id, sample_id) VALUES (2,1,'SKB8.640193'), "
         "(2,1,'SKD8.640184'), (2,1,'SKB7.640196'), (2,1,'SKM9.640192'),"
         "(2,1,'SKM4.640180')")
     """tests that existing job returns true"""
     self.assertTrue(Job.exists("18S", "Beta Diversity",
                                {"--otu_table_fp": 1,
                                 "--mapping_fp": 1}, Analysis(1)))
 def test_add_jobs_in_construct_job_graphs(self):
     analysis = Analysis(2)
     npt.assert_warns(QiitaDBWarning, analysis.build_files)
     RunAnalysis()._construct_job_graph(
         analysis, [('18S', 'Summarize Taxa')],
         comm_opts={'Summarize Taxa': {'opt1': 5}})
     self.assertEqual(analysis.jobs, [Job(3), Job(4)])
     job = Job(4)
     self.assertEqual(job.datatype, '18S')
     self.assertEqual(job.command,
                      ['Summarize Taxa', 'summarize_taxa_through_plots.py'])
     expopts = {
         '--mapping_fp': join(
             get_db_files_base_dir(), 'analysis/2_analysis_mapping.txt'),
         '--otu_table_fp': join(
             get_db_files_base_dir(),
             'analysis/2_analysis_dt-18S_r-1_c-3.biom'),
         '--output_dir': join(
             get_db_files_base_dir(), 'job',
             '4_summarize_taxa_through_plots.py_output_dir'),
         'opt1': 5}
     self.assertEqual(job.options, expopts)
Exemple #22
0
 def test_create_exists_return_existing(self):
     """Makes sure creation doesn't duplicate a job by returning existing"""
     Analysis.create(User("*****@*****.**"), "new", "desc")
     self.conn_handler.execute(
         "INSERT INTO qiita.analysis_sample "
         "(analysis_id, processed_data_id, sample_id) VALUES "
         "(3, 1, '1.SKB8.640193'), (3, 1, '1.SKD8.640184'), "
         "(3, 1, '1.SKB7.640196'), (3, 1, '1.SKM9.640192'), "
         "(3, 1, '1.SKM4.640180')")
     new = Job.create("18S", "Beta Diversity",
                      {"--otu_table_fp": 1, "--mapping_fp": 1},
                      Analysis(3), return_existing=True)
     self.assertEqual(new.id, 2)
Exemple #23
0
def run_analysis(user, analysis):
    """Run the commands within an Analysis object and sends user messages"""
    analysis.status = "running"
    all_good = True
    pubsub = r_server.pubsub()
    pubsub.subscribe(user)
    for job_id in analysis.jobs:
        job = Job(job_id)
        if job.status == 'queued':
            name, command = job.command
            options = job.options
            # create json base for websocket messages
            msg = {
                "analysis": analysis.id,
                "msg": None,
                "command": "%s: %s" % (job.datatype, name)
            }

            o_fmt = ' '.join(['%s %s' % (k, v) for k, v in options.items()])
            c_fmt = str("%s %s" % (command, o_fmt))

            # send running message to user wait page
            job.status = 'running'
            msg["msg"] = "Running"
            r_server.rpush(user + ":messages", dumps(msg))
            r_server.publish(user, dumps(msg))

            # run the command
            try:
                qiita_compute.submit_sync(c_fmt)
            except Exception as e:
                all_good = False
                job.status = 'error'
                msg["msg"] = "ERROR"
                r_server.rpush(user + ":messages", dumps(msg))
                r_server.publish(user, dumps(msg))
                print("Failed compute on job id %d: %s\n%s" %
                      (job_id, e, c_fmt))
                continue

            msg["msg"] = "Completed"
            r_server.rpush(user + ":messages", dumps(msg))
            r_server.publish(user, dumps(msg))
            # FIX THIS Should not be hard coded
            job.add_results([(options["--output_dir"], "directory")])
            job.status = 'completed'

    # send websockets message that we are done
    msg["msg"] = "allcomplete"
    msg["command"] = ""
    r_server.rpush(user + ":messages", dumps(msg))
    r_server.publish(user, dumps(msg))
    pubsub.unsubscribe()
    # set final analysis status
    if all_good:
        analysis.status = "completed"
    else:
        analysis.status = "error"
Exemple #24
0
 def test_exists_noexist_options(self):
     # need to insert matching sample data into analysis 2
     # makes sure failure is because options and not samples
     self.conn_handler.execute(
         "DELETE FROM qiita.analysis_sample WHERE analysis_id = 2")
     self.conn_handler.execute(
         "INSERT INTO qiita.analysis_sample (analysis_id, "
         "processed_data_id, sample_id) VALUES (2,1,'SKB8.640193'), "
         "(2,1,'SKD8.640184'), (2,1,'SKB7.640196'), (2,1,'SKM9.640192'),"
         "(2,1,'SKM4.640180')")
     """tests that non-existant job with bad options returns false"""
     self.assertFalse(Job.exists("18S", "Beta Diversity",
                                 {"--otu_table_fp": 1,
                                  "--mapping_fp": 27}, Analysis(1)))
Exemple #25
0
 def test_exists_noexist_options(self):
     """tests that non-existant job with bad options returns false"""
     # need to insert matching sample data into analysis 2
     # makes sure failure is because options and not samples
     self.conn_handler.execute(
         "DELETE FROM qiita.analysis_sample WHERE analysis_id = 2")
     self.conn_handler.execute(
         "INSERT INTO qiita.analysis_sample "
         "(analysis_id, processed_data_id, sample_id) VALUES "
         "(2, 1,'1.SKB8.640193'), (2, 1,'1.SKD8.640184'), "
         "(2, 1,'1.SKB7.640196'), (2, 1,'1.SKM9.640192'), "
         "(2, 1,'1.SKM4.640180')")
     self.assertFalse(Job.exists("18S", "Beta Diversity",
                                 {"--otu_table_fp": 1,
                                  "--mapping_fp": 27}, Analysis(1)))
Exemple #26
0
 def test_exists(self):
     # need to insert matching sample data into analysis 2
     self.conn_handler.execute(
         "DELETE FROM qiita.analysis_sample WHERE analysis_id = 2")
     self.conn_handler.execute(
         "INSERT INTO qiita.analysis_sample (analysis_id, "
         "processed_data_id, sample_id) VALUES (2,1,'SKB8.640193'), "
         "(2,1,'SKD8.640184'), (2,1,'SKB7.640196'), (2,1,'SKM9.640192'),"
         "(2,1,'SKM4.640180')")
     """tests that existing job returns true"""
     self.assertTrue(
         Job.exists("18S", "Beta Diversity", {
             "--otu_table_fp": 1,
             "--mapping_fp": 1
         }, Analysis(1)))
Exemple #27
0
def system_call_from_job(job_id, **kwargs):
    """Executes a system call described by a Job

    Parameters
    ----------
    job_id : int
        The job object ID
    """
    job = Job(job_id)
    name, command = job.command
    options = job.options

    cmd = [command]
    cmd.extend(flatten(options.items()))
    cmd_fmt = ' '.join((str(i) for i in cmd))

    try:
        so, se, status = system_call(cmd_fmt)
    except Exception as e:
        job.set_error(str(e))
        raise

    # FIX THIS add_results should not be hard coded  Issue #269
    job.add_results([(job.options["--output_dir"], "directory")])
Exemple #28
0
    def get(self, analysis_id):
        user = self.current_user
        analysis_id = int(analysis_id)
        check_analysis_access(User(user), analysis_id)

        analysis = Analysis(analysis_id)

        commands = []
        for job in analysis.jobs:
            jobject = Job(job)
            commands.append("%s: %s" % (jobject.datatype, jobject.command[0]))

        self.render("analysis_waiting.html",
                    user=user,
                    aid=analysis_id,
                    aname=analysis.name,
                    commands=commands)
Exemple #29
0
 def test_get_commands(self):
     exp = [
         Command('Summarize Taxa', 'summarize_taxa_through_plots.py',
                 '{"--otu_table_fp":null}', '{}',
                 '{"--mapping_category":null, "--mapping_fp":null,'
                 '"--sort":null}', '{"--output_dir":null}'),
         Command('Beta Diversity', 'beta_diversity_through_plots.py',
                 '{"--otu_table_fp":null,"--mapping_fp":null}', '{}',
                 '{"--tree_fp":null,"--color_by_all_fields":null,'
                 '"--seqs_per_sample":null}', '{"--output_dir":null}'),
         Command('Alpha Rarefaction', 'alpha_rarefaction.py',
                 '{"--otu_table_fp":null,"--mapping_fp":null}', '{}',
                 '{"--tree_fp":null,"--num_steps":null,''"--min_rare_depth"'
                 ':null,"--max_rare_depth":null,'
                 '"--retain_intermediate_files":false}',
                 '{"--output_dir":null}')
         ]
     self.assertEqual(Job.get_commands(), exp)
Exemple #30
0
    def test_build_files_job_comm_wrapper(self):
        # basic setup needed for test
        job = Job(3)

        # create the files needed for job, testing _build_analysis_files
        analysis = Analysis(2)
        _build_analysis_files(analysis, 100)
        self._del_files.append(join(get_db_files_base_dir(), "analysis",
                                    "2_analysis_mapping.txt"))
        self._del_files.append(join(get_db_files_base_dir(), "analysis",
                                    "2_analysis_18S.biom"))
        self.assertTrue(exists(join(get_db_files_base_dir(), "analysis",
                                    "2_analysis_mapping.txt")))
        self.assertTrue(exists(join(get_db_files_base_dir(), "analysis",
                                    "2_analysis_18S.biom")))
        self.assertEqual([3], analysis.jobs)

        _job_comm_wrapper("*****@*****.**", 2, job)

        self.assertEqual(job.status, "error")
Exemple #31
0
 def test_failure_callback(self):
     """Make sure failure at file creation step doesn't hang everything"""
     # rename a needed file for creating the biom table
     base = get_db_files_base_dir()
     rename(join(base, "processed_data",
                 "1_study_1001_closed_reference_otu_table.biom"),
            join(base, "processed_data", "1_study_1001.bak"))
     analysis = Analysis(2)
     group = get_id_from_user("*****@*****.**")
     try:
         app = RunAnalysis(moi_context=ctx_default,
                           moi_parent_id=group)
         app(analysis, [], rarefaction_depth=100)
         self.assertEqual(analysis.status, 'error')
         for job_id in analysis.jobs:
             self.assertEqual(Job(job_id).status, 'error')
     finally:
         rename(join(base, "processed_data", "1_study_1001.bak"),
                join(base, "processed_data",
                     "1_study_1001_closed_reference_otu_table.biom"))
Exemple #32
0
def _finish_analysis(analysis, **kwargs):
    """Checks job statuses and finalized analysis and redis communication

    Parameters
    ----------
    analysis: Analysis
        Analysis to finalize.
    kwargs : ignored
        Necessary to have in parameters to support execution via moi.
    """
    # check job exit statuses for analysis result status
    all_good = True
    for job_id in analysis.jobs:
        if Job(job_id).status == "error":
            all_good = False
            break

    # set final analysis status
    if all_good:
        analysis.status = "completed"
    else:
        analysis.status = "error"
Exemple #33
0
def system_call_from_job(job_id, **kwargs):
    """Executes a system call described by a Job

    Parameters
    ----------
    job_id : int
        The job object ID
    """
    job = Job(job_id)
    name, command = job.command
    options = job.options

    cmd = [command]
    cmd.extend(flatten(options.items()))
    cmd_fmt = ' '.join((str(i) for i in cmd))

    try:
        so, se, status = system_call(cmd_fmt)
    except Exception as e:
        job.set_error(str(e))
        raise

    # FIX THIS add_results should not be hard coded  Issue #269
    job.add_results([(job.options["--output_dir"], "directory")])
Exemple #34
0
    def _construct_job_graph(self, user, analysis, commands, comm_opts=None,
                             rarefaction_depth=None):
        """Builds the job graph for running an analysis

        Parameters
        ----------
        user : str
            user running this analysis.
        analysis: Analysis object
            Analysis to finalize.
        commands : list of tuples
            Commands to add as jobs in the analysis.
            Format [(data_type, command name), ...]
        comm_opts : dict of dicts, optional
            Options for commands. Format {command name: {opt1: value,...},...}
            Default None (use default options).
        rarefaction_depth : int, optional
            Rarefaction depth for analysis' biom tables. Default None.
        """
        self._logger = stderr
        # Add jobs to analysis
        if comm_opts is None:
            comm_opts = {}
        for data_type, command in commands:
            # get opts set by user, else make it empty dict
            opts = comm_opts.get(command, {})
            # Add commands to analysis as jobs
            # HARD CODED HACKY THING FOR DEMO, FIX  Issue #164
            if (command == "Beta Diversity" or command == "Alpha Rarefaction"):
                if data_type in {'16S', '18S'}:
                    opts["--tree_fp"] = join(get_db_files_base_dir(),
                                             "reference",
                                             "gg_97_otus_4feb2011.tre")
                else:
                    opts["--parameter_fp"] = join(
                        get_db_files_base_dir(), "reference",
                        "params_qiime.txt")
            if command == "Alpha Rarefaction":
                opts["-n"] = 4
            Job.create(data_type, command, opts, analysis,
                       return_existing=True)

        # Create the files for the jobs
        files_node_name = "%d_ANALYSISFILES" % analysis.id
        self._job_graph.add_node(files_node_name,
                                 job=(_build_analysis_files,
                                      analysis, rarefaction_depth),
                                 requires_deps=False)
        # Add the jobs
        job_nodes = []
        for job_id in analysis.jobs:
            job = Job(job_id)
            node_name = "%d_JOB_%d" % (analysis.id, job.id)
            job_nodes.append(node_name)
            self._job_graph.add_node(node_name,
                                     job=(_job_comm_wrapper, user, analysis.id,
                                          job),
                                     requires_deps=False)
            # Adding the dependency edges to the graph
            self._job_graph.add_edge(files_node_name, node_name)

        # Finalize the analysis
        node_name = "FINISH_ANALYSIS_%d" % analysis.id
        self._job_graph.add_node(node_name,
                                 job=(_finish_analysis, user, analysis),
                                 requires_deps=False)
        # Adding the dependency edges to the graph
        for job_node_name in job_nodes:
            self._job_graph.add_edge(job_node_name, node_name)
Exemple #35
0
 def test_not_equal(self):
     commands = Command.create_list()
     self.assertFalse(commands[1] != commands[1])
     self.assertTrue(commands[1] != commands[2])
     self.assertTrue(commands[1] != Job(1))
Exemple #36
0
 def setUp(self):
     self.job = Job(1)
     self.options = {"option1": False, "option2": 25, "option3": "NEW"}
     self._delete_path = []
     self._delete_dir = []
     _, self._job_folder = get_mountpoint("job")[0]
Exemple #37
0
 def test_retrieve_results_empty(self):
     new = Job.create("18S", "Beta Diversity", {"opt1": 4}, Analysis(1))
     self.assertEqual(new.results, [])
Exemple #38
0
    def _construct_job_graph(self, analysis, commands, comm_opts=None,
                             rarefaction_depth=None,
                             merge_duplicated_sample_ids=False):
        """Builds the job graph for running an analysis

        Parameters
        ----------
        analysis: Analysis object
            Analysis to finalize.
        commands : list of tuples
            Commands to add as jobs in the analysis.
            Format [(data_type, command name), ...]
        comm_opts : dict of dicts, optional
            Options for commands. Format {command name: {opt1: value,...},...}
            Default None (use default options).
        rarefaction_depth : int, optional
            Rarefaction depth for analysis' biom tables. Default None.
        merge_duplicated_sample_ids : bool, optional
            If the duplicated sample ids in the selected studies should be
            merged or prepended with the artifact ids. False (default) prepends
            the artifact id
        """
        self._logger = stderr
        self.analysis = analysis
        analysis_id = analysis.id

        # Add jobs to analysis
        if comm_opts is None:
            comm_opts = {}

        analysis.status = "running"
        # creating bioms at this point cause all this section runs on a worker
        # node, currently an ipython job
        analysis.build_files(rarefaction_depth, merge_duplicated_sample_ids)
        mapping_file = analysis.mapping_file

        tree_commands = ["Beta Diversity", "Alpha Rarefaction"]
        for data_type, biom_fp in viewitems(analysis.biom_tables):
            biom_table = load_table(biom_fp)
            # getting reference_id and software_command_id from the first
            # sample of the biom. This decision was discussed on the qiita
            # meeting on 02/24/16
            metadata = biom_table.metadata(biom_table.ids()[0])
            rid = metadata['reference_id']
            sci = metadata['command_id']

            if rid != 'na':
                reference = Reference(rid)
                tree = reference.tree_fp
            else:
                reference = None
                tree = ''

            cmd = Command(sci) if sci != 'na' else None

            for cmd_data_type, command in commands:
                if data_type != cmd_data_type:
                    continue

                # get opts set by user, else make it empty dict
                opts = comm_opts.get(command, {})
                opts["--otu_table_fp"] = biom_fp
                opts["--mapping_fp"] = mapping_file

                if command in tree_commands:
                    if tree != '':
                        opts["--tree_fp"] = tree
                    else:
                        opts["--parameter_fp"] = join(
                            get_db_files_base_dir(), "reference",
                            "params_qiime.txt")

                if command == "Alpha Rarefaction":
                    opts["-n"] = 4

                Job.create(data_type, command, opts, analysis, reference, cmd,
                           return_existing=True)

        # Add the jobs
        job_nodes = []
        for job in analysis.jobs:
            node_name = "%d_JOB_%d" % (analysis_id, job.id)
            job_nodes.append(node_name)
            job_name = "%s: %s" % (job.datatype, job.command[0])
            self._job_graph.add_node(node_name,
                                     func=system_call_from_job,
                                     args=(job.id,),
                                     job_name=job_name,
                                     requires_deps=False)

        # tgz-ing the analysis results
        tgz_node_name = "TGZ_ANALYSIS_%d" % (analysis_id)
        job_name = "tgz_analysis_%d" % (analysis_id)
        self._job_graph.add_node(tgz_node_name,
                                 func=_generate_analysis_tgz,
                                 args=(analysis,),
                                 job_name=job_name,
                                 requires_deps=False)
        # Adding the dependency edges to the graph
        for job_node_name in job_nodes:
            self._job_graph.add_edge(job_node_name, tgz_node_name)

        # Finalize the analysis.
        node_name = "FINISH_ANALYSIS_%d" % analysis.id
        self._job_graph.add_node(node_name,
                                 func=_finish_analysis,
                                 args=(analysis,),
                                 job_name='Finalize analysis',
                                 requires_deps=False)
        self._job_graph.add_edge(tgz_node_name, node_name)
Exemple #39
0
class JobTest(TestCase):
    """Tests that the job object works as expected"""

    def setUp(self):
        self.job = Job(1)
        self.options = {"option1": False, "option2": 25, "option3": "NEW"}
        self._delete_path = []
        self._delete_dir = []
        _, self._job_folder = get_mountpoint("job")[0]

    def tearDown(self):
        # needs to be this way because map does not play well with remove and
        # rmtree for python3
        for item in self._delete_path:
            remove(item)
        for item in self._delete_dir:
            rmtree(item)

    def test_exists(self):
        """tests that existing job returns true"""
        # need to insert matching sample data into analysis 2
        self.conn_handler.execute(
            "DELETE FROM qiita.analysis_sample WHERE analysis_id = 2")
        self.conn_handler.execute(
            "INSERT INTO qiita.analysis_sample "
            "(analysis_id, processed_data_id, sample_id) VALUES "
            "(2, 1,'1.SKB8.640193'), (2, 1,'1.SKD8.640184'), "
            "(2, 1,'1.SKB7.640196'), (2, 1,'1.SKM9.640192'), "
            "(2, 1,'1.SKM4.640180')")
        self.assertTrue(Job.exists("18S", "Beta Diversity",
                                   {"--otu_table_fp": 1,
                                    "--mapping_fp": 1}, Analysis(1)))

    def test_exists_return_jobid(self):
        """tests that existing job returns true"""
        # need to insert matching sample data into analysis 2
        self.conn_handler.execute(
            "DELETE FROM qiita.analysis_sample WHERE analysis_id = 2")
        self.conn_handler.execute(
            "INSERT INTO qiita.analysis_sample "
            "(analysis_id, processed_data_id, sample_id) VALUES "
            "(2, 1,'1.SKB8.640193'), (2, 1,'1.SKD8.640184'), "
            "(2, 1,'1.SKB7.640196'), (2, 1,'1.SKM9.640192'), "
            "(2, 1,'1.SKM4.640180')")
        exists, jid = Job.exists("18S", "Beta Diversity",
                                 {"--otu_table_fp": 1, "--mapping_fp": 1},
                                 Analysis(1), return_existing=True)
        self.assertTrue(exists)
        self.assertEqual(jid, Job(2))

    def test_exists_noexist_options(self):
        """tests that non-existant job with bad options returns false"""
        # need to insert matching sample data into analysis 2
        # makes sure failure is because options and not samples
        self.conn_handler.execute(
            "DELETE FROM qiita.analysis_sample WHERE analysis_id = 2")
        self.conn_handler.execute(
            "INSERT INTO qiita.analysis_sample "
            "(analysis_id, processed_data_id, sample_id) VALUES "
            "(2, 1,'1.SKB8.640193'), (2, 1,'1.SKD8.640184'), "
            "(2, 1,'1.SKB7.640196'), (2, 1,'1.SKM9.640192'), "
            "(2, 1,'1.SKM4.640180')")
        self.assertFalse(Job.exists("18S", "Beta Diversity",
                                    {"--otu_table_fp": 1,
                                     "--mapping_fp": 27}, Analysis(1)))

    def test_exists_noexist_return_jobid(self):
        """tests that non-existant job with bad samples returns false"""
        exists, jid = Job.exists(
            "16S", "Beta Diversity",
            {"--otu_table_fp": 1, "--mapping_fp": 27}, Analysis(1),
            return_existing=True)
        self.assertFalse(exists)
        self.assertEqual(jid, None)

    def test_get_commands(self):
        exp = [
            Command('Summarize Taxa', 'summarize_taxa_through_plots.py',
                    '{"--otu_table_fp":null}', '{}',
                    '{"--mapping_category":null, "--mapping_fp":null,'
                    '"--sort":null}', '{"--output_dir":null}'),
            Command('Beta Diversity', 'beta_diversity_through_plots.py',
                    '{"--otu_table_fp":null,"--mapping_fp":null}', '{}',
                    '{"--tree_fp":null,"--color_by_all_fields":null,'
                    '"--seqs_per_sample":null}', '{"--output_dir":null}'),
            Command('Alpha Rarefaction', 'alpha_rarefaction.py',
                    '{"--otu_table_fp":null,"--mapping_fp":null}', '{}',
                    '{"--tree_fp":null,"--num_steps":null,''"--min_rare_depth"'
                    ':null,"--max_rare_depth":null,'
                    '"--retain_intermediate_files":false}',
                    '{"--output_dir":null}')
            ]
        self.assertEqual(Job.get_commands(), exp)

    def test_delete_files(self):
        try:
            Job.delete(1)
            with self.assertRaises(QiitaDBUnknownIDError):
                Job(1)

            obs = self.conn_handler.execute_fetchall(
                "SELECT * FROM qiita.filepath WHERE filepath_id = 12 OR "
                "filepath_id = 19")
            self.assertEqual(obs, [])

            obs = self.conn_handler.execute_fetchall(
                "SELECT * FROM qiita.job_results_filepath WHERE job_id = 1")
            self.assertEqual(obs, [])

            obs = self.conn_handler.execute_fetchall(
                "SELECT * FROM qiita.analysis_job WHERE job_id = 1")
            self.assertEqual(obs, [])

            self.assertFalse(exists(join(self._job_folder,
                             "1_job_result.txt")))
        finally:
            f = join(self._job_folder, "1_job_result.txt")
            if not exists(f):
                with open(f, 'w') as f:
                    f.write("job1result.txt")

    def test_delete_folders(self):
        try:
            Job.delete(2)
            with self.assertRaises(QiitaDBUnknownIDError):
                Job(2)

            obs = self.conn_handler.execute_fetchall(
                "SELECT * FROM qiita.filepath WHERE filepath_id = 13")
            self.assertEqual(obs, [])

            obs = self.conn_handler.execute_fetchall(
                "SELECT * FROM qiita.job_results_filepath WHERE job_id = 2")
            self.assertEqual(obs, [])

            obs = self.conn_handler.execute_fetchall(
                "SELECT * FROM qiita.analysis_job WHERE job_id = 2")
            self.assertEqual(obs, [])

            self.assertFalse(exists(join(self._job_folder, "2_test_folder")))
        finally:
            # put the test data back
            basedir = self._job_folder
            if not exists(join(basedir, "2_test_folder")):
                mkdir(join(basedir, "2_test_folder"))
                mkdir(join(basedir, "2_test_folder", "subdir"))
                with open(join(basedir, "2_test_folder",
                               "testfile.txt"), 'w') as f:
                    f.write("DATA")
                with open(join(basedir, "2_test_folder",
                               "testres.htm"), 'w') as f:
                    f.write("DATA")
                with open(join(basedir, "2_test_folder",
                               "subdir", "subres.html"), 'w') as f:
                    f.write("DATA")

    def test_create(self):
        """Makes sure creation works as expected"""
        # make first job
        new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1))
        self.assertEqual(new.id, 4)
        # make sure job inserted correctly
        obs = self.conn_handler.execute_fetchall("SELECT * FROM qiita.job "
                                                 "WHERE job_id = 4")
        exp = [[4, 2, 1, 3, '{"opt1":4}', None]]
        self.assertEqual(obs, exp)
        # make sure job added to analysis correctly
        obs = self.conn_handler.execute_fetchall("SELECT * FROM "
                                                 "qiita.analysis_job WHERE "
                                                 "job_id = 4")
        exp = [[1, 4]]
        self.assertEqual(obs, exp)

        # make second job with diff datatype and command to test column insert
        new = Job.create("16S", "Beta Diversity", {"opt1": 4}, Analysis(1))
        self.assertEqual(new.id, 5)
        # make sure job inserted correctly
        obs = self.conn_handler.execute_fetchall("SELECT * FROM qiita.job "
                                                 "WHERE job_id = 5")
        exp = [[5, 1, 1, 2, '{"opt1":4}', None]]
        self.assertEqual(obs, exp)
        # make sure job added to analysis correctly
        obs = self.conn_handler.execute_fetchall("SELECT * FROM "
                                                 "qiita.analysis_job WHERE "
                                                 "job_id = 5")
        exp = [[1, 5]]
        self.assertEqual(obs, exp)

    def test_create_exists(self):
        """Makes sure creation doesn't duplicate a job"""
        with self.assertRaises(QiitaDBDuplicateError):
            Job.create("18S", "Beta Diversity",
                       {"--otu_table_fp": 1, "--mapping_fp": 1},
                       Analysis(1))

    def test_create_exists_return_existing(self):
        """Makes sure creation doesn't duplicate a job by returning existing"""
        Analysis.create(User("*****@*****.**"), "new", "desc")
        self.conn_handler.execute(
            "INSERT INTO qiita.analysis_sample "
            "(analysis_id, processed_data_id, sample_id) VALUES "
            "(3, 1, '1.SKB8.640193'), (3, 1, '1.SKD8.640184'), "
            "(3, 1, '1.SKB7.640196'), (3, 1, '1.SKM9.640192'), "
            "(3, 1, '1.SKM4.640180')")
        new = Job.create("18S", "Beta Diversity",
                         {"--otu_table_fp": 1, "--mapping_fp": 1},
                         Analysis(3), return_existing=True)
        self.assertEqual(new.id, 2)

    def test_retrieve_datatype(self):
        """Makes sure datatype retrieval is correct"""
        self.assertEqual(self.job.datatype, '18S')

    def test_retrieve_command(self):
        """Makes sure command retrieval is correct"""
        self.assertEqual(self.job.command, ['Summarize Taxa',
                                            'summarize_taxa_through_plots.py'])

    def test_retrieve_options(self):
        self.assertEqual(self.job.options, {
            '--otu_table_fp': 1,
            '--output_dir': join(
                self._job_folder,
                '1_summarize_taxa_through_plots.py_output_dir')})

    def test_set_options(self):
        new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1))
        new.options = self.options
        self.options['--output_dir'] = join(self._job_folder,
                                            '4_alpha_rarefaction.'
                                            'py_output_dir')
        self.assertEqual(new.options, self.options)

    def test_retrieve_results(self):
        self.assertEqual(self.job.results, ["1_job_result.txt"])

    def test_retrieve_results_folder(self):
        job = Job(2)
        self.assertEqual(job.results, ['2_test_folder/testres.htm',
                                       '2_test_folder/subdir/subres.html'])

    def test_retrieve_results_empty(self):
        new = Job.create("18S", "Beta Diversity", {"opt1": 4}, Analysis(1))
        self.assertEqual(new.results, [])

    def test_set_error(self):
        before = datetime.now()
        self.job.set_error("TESTERROR")
        after = datetime.now()
        self.assertEqual(self.job.status, "error")

        error = self.job.error

        self.assertEqual(error.severity, 2)
        self.assertEqual(error.msg, 'TESTERROR')
        self.assertTrue(before < error.time < after)

    def test_retrieve_error_blank(self):
        self.assertEqual(self.job.error, None)

    def test_set_error_completed(self):
        self.job.status = "error"
        with self.assertRaises(QiitaDBStatusError):
            self.job.set_error("TESTERROR")

    def test_retrieve_error_exists(self):
        self.job.set_error("TESTERROR")
        self.assertEqual(self.job.error.msg, "TESTERROR")

    def test_add_results(self):
        self.job.add_results([(join(self._job_folder, "1_job_result.txt"),
                             "plain_text")])

        # make sure files attached to job properly
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.job_results_filepath WHERE job_id = 1")

        self.assertEqual(obs, [[1, 12], [1, 19]])

    def test_add_results_dir(self):
        # Create a test directory
        test_dir = join(self._job_folder, "2_test_folder")

        # add folder to job
        self.job.add_results([(test_dir, "directory")])

        # make sure files attached to job properly
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.job_results_filepath WHERE job_id = 1")
        self.assertEqual(obs, [[1, 12], [1, 19]])

    def test_add_results_completed(self):
        self.job.status = "completed"
        with self.assertRaises(QiitaDBStatusError):
            self.job.add_results([("/fake/dir/", "directory")])
Exemple #40
0
 def setUp(self):
     self.job = Job(1)
     self.options = {"option1": False, "option2": 25, "option3": "NEW"}
     self._delete_path = []
     self._delete_dir = []
Exemple #41
0
 def test_create_exists(self):
     """Makes sure creation doesn't duplicate a job"""
     with self.assertRaises(QiitaDBDuplicateError):
         Job.create("18S", "Beta Diversity",
                    {"--otu_table_fp": 1, "--mapping_fp": 1},
                    Analysis(1))
    def _construct_job_graph(self, analysis, commands, comm_opts=None,
                             rarefaction_depth=None,
                             merge_duplicated_sample_ids=False):
        """Builds the job graph for running an analysis

        Parameters
        ----------
        analysis: Analysis object
            Analysis to finalize.
        commands : list of tuples
            Commands to add as jobs in the analysis.
            Format [(data_type, command name), ...]
        comm_opts : dict of dicts, optional
            Options for commands. Format {command name: {opt1: value,...},...}
            Default None (use default options).
        rarefaction_depth : int, optional
            Rarefaction depth for analysis' biom tables. Default None.
        merge_duplicated_sample_ids : bool, optional
            If the duplicated sample ids in the selected studies should be
            merged or prepended with the artifact ids. False (default) prepends
            the artifact id
        """
        self._logger = stderr
        self.analysis = analysis

        # Add jobs to analysis
        if comm_opts is None:
            comm_opts = {}

        for data_type, command in commands:
            # get opts set by user, else make it empty dict
            opts = comm_opts.get(command, {})

            # Add commands to analysis as jobs
            # HARD CODED HACKY THING FOR DEMO, FIX  Issue #164
            if (command == "Beta Diversity" or command == "Alpha Rarefaction"):
                if data_type in {'16S', '18S'}:
                    opts["--tree_fp"] = join(get_db_files_base_dir(),
                                             "reference",
                                             "gg_97_otus_4feb2011.tre")
                else:
                    opts["--parameter_fp"] = join(
                        get_db_files_base_dir(), "reference",
                        "params_qiime.txt")

            if command == "Alpha Rarefaction":
                opts["-n"] = 4

            Job.create(data_type, command, opts, analysis,
                       return_existing=True)

        # Create the files for the jobs
        files_node_name = "%d_ANALYSISFILES" % analysis.id
        self._job_graph.add_node(files_node_name,
                                 func=_build_analysis_files,
                                 args=(analysis, rarefaction_depth,
                                       merge_duplicated_sample_ids),
                                 job_name='Build analysis',
                                 requires_deps=False)

        # Add the jobs
        job_nodes = []
        for job in analysis.jobs:
            node_name = "%d_JOB_%d" % (analysis.id, job.id)
            job_nodes.append(node_name)
            job_name = "%s: %s" % (job.datatype, job.command[0])
            self._job_graph.add_node(node_name,
                                     func=system_call_from_job,
                                     args=(job.id,),
                                     job_name=job_name,
                                     requires_deps=False)

            # Adding the dependency edges to the graph
            self._job_graph.add_edge(files_node_name, node_name)

        # Finalize the analysis.
        node_name = "FINISH_ANALYSIS_%d" % analysis.id
        self._job_graph.add_node(node_name,
                                 func=_finish_analysis,
                                 args=(analysis,),
                                 job_name='Finalize analysis',
                                 requires_deps=False)

        # Adding the dependency edges to the graph
        for job_node_name in job_nodes:
            self._job_graph.add_edge(job_node_name, node_name)
Exemple #43
0
 def test_retrieve_results_folder(self):
     job = Job(2)
     self.assertEqual(job.results, ['2_test_folder/testres.htm',
                                    '2_test_folder/subdir/subres.html'])