Exemple #1
0
    def test_create(self):
        """Makes sure creation works as expected"""
        # make first job
        new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1))
        self.assertEqual(new.id, 4)
        # make sure job inserted correctly
        obs = self.conn_handler.execute_fetchall("SELECT * FROM qiita.job "
                                                 "WHERE job_id = 4")
        exp = [[4, 2, 1, 3, '{"opt1":4}', None]]
        self.assertEqual(obs, exp)
        # make sure job added to analysis correctly
        obs = self.conn_handler.execute_fetchall("SELECT * FROM "
                                                 "qiita.analysis_job WHERE "
                                                 "job_id = 4")
        exp = [[1, 4]]
        self.assertEqual(obs, exp)

        # make second job with diff datatype and command to test column insert
        new = Job.create("16S", "Beta Diversity", {"opt1": 4}, Analysis(1))
        self.assertEqual(new.id, 5)
        # make sure job inserted correctly
        obs = self.conn_handler.execute_fetchall("SELECT * FROM qiita.job "
                                                 "WHERE job_id = 5")
        exp = [[5, 1, 1, 2, '{"opt1":4}', None]]
        self.assertEqual(obs, exp)
        # make sure job added to analysis correctly
        obs = self.conn_handler.execute_fetchall("SELECT * FROM "
                                                 "qiita.analysis_job WHERE "
                                                 "job_id = 5")
        exp = [[1, 5]]
        self.assertEqual(obs, exp)
Exemple #2
0
    def post(self, analysis_id):
        command_args = self.get_arguments("commands")
        split = [x.split("#") for x in command_args]
        analysis = Analysis(analysis_id)

        commands = []
        # HARD CODED HACKY THING FOR DEMO, FIX  Issue #164
        fp, mapping_file = mkstemp(suffix="_map_file.txt")
        close(fp)
        SampleTemplate(1).to_file(mapping_file)
        study_fps = {}
        for pd in Study(1).processed_data:
            processed = ProcessedData(pd)
            study_fps[processed.data_type] = processed.get_filepaths()[0][0]
        for data_type, command in split:
            opts = {
                "--otu_table_fp": study_fps[data_type],
                "--mapping_fp": mapping_file
            }
            if command == "Beta Diversity" and data_type in {'16S', '18S'}:
                opts["--tree_fp"] = join(get_db_files_base_dir(), "reference",
                                         "gg_97_otus_4feb2011.tre")
            elif command == "Beta Diversity":
                opts["--parameter_fp"] = join(get_db_files_base_dir(),
                                              "reference", "params_qiime.txt")
            Job.create(data_type, command, opts, analysis)
            commands.append("%s: %s" % (data_type, command))
        user = self.get_current_user()
        self.render("analysis_waiting.html", user=user,
                    aid=analysis_id, aname=analysis.name,
                    commands=commands)
        # fire off analysis run here
        # currently synch run so redirect done here. Will remove after demo
        run_analysis(user, analysis)
Exemple #3
0
 def test_create_exists(self):
     """Makes sure creation doesn't duplicate a job"""
     with self.assertRaises(QiitaDBDuplicateError):
         Job.create("18S", "Beta Diversity", {
             "--otu_table_fp": 1,
             "--mapping_fp": 1
         }, Analysis(1))
Exemple #4
0
 def test_set_options(self):
     new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1))
     new.options = self.options
     self.options['--output_dir'] = join(self._job_folder,
                                         '4_alpha_rarefaction.'
                                         'py_output_dir')
     self.assertEqual(new.options, self.options)
Exemple #5
0
 def test_set_options(self):
     new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1))
     new.options = self.options
     self.options['--output_dir'] = join(get_db_files_base_dir(),
                                         'job/4_alpha_rarefaction.'
                                         'py_output_dir')
     self.assertEqual(new.options, self.options)
Exemple #6
0
 def test_set_options(self):
     new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1))
     new.options = self.options
     self.options['--output_dir'] = join(
         get_db_files_base_dir(), 'job/4_alpha_rarefaction.'
         'py_output_dir')
     self.assertEqual(new.options, self.options)
Exemple #7
0
 def test_create_exists_return_existing(self):
     """Makes sure creation doesn't duplicate a job by returning existing"""
     Analysis.create(User("*****@*****.**"), "new", "desc")
     self.conn_handler.execute(
         "INSERT INTO qiita.analysis_sample (analysis_id, "
         "processed_data_id, sample_id) VALUES (3,1,'SKB8.640193'), "
         "(3,1,'SKD8.640184'), (3,1,'SKB7.640196'), (3,1,'SKM9.640192'),"
         "(3,1,'SKM4.640180')")
     new = Job.create("18S", "Beta Diversity",
                      {"--otu_table_fp": 1, "--mapping_fp": 1},
                      Analysis(3), return_existing=True)
     self.assertEqual(new.id, 2)
Exemple #8
0
 def test_create_exists_return_existing(self):
     """Makes sure creation doesn't duplicate a job by returning existing"""
     Analysis.create(User("*****@*****.**"), "new", "desc")
     self.conn_handler.execute(
         "INSERT INTO qiita.analysis_sample "
         "(analysis_id, processed_data_id, sample_id) VALUES "
         "(3, 1, '1.SKB8.640193'), (3, 1, '1.SKD8.640184'), "
         "(3, 1, '1.SKB7.640196'), (3, 1, '1.SKM9.640192'), "
         "(3, 1, '1.SKM4.640180')")
     new = Job.create("18S", "Beta Diversity",
                      {"--otu_table_fp": 1, "--mapping_fp": 1},
                      Analysis(3), return_existing=True)
     self.assertEqual(new.id, 2)
    def _construct_job_graph(self, analysis, commands, comm_opts=None,
                             rarefaction_depth=None,
                             merge_duplicated_sample_ids=False):
        """Builds the job graph for running an analysis

        Parameters
        ----------
        analysis: Analysis object
            Analysis to finalize.
        commands : list of tuples
            Commands to add as jobs in the analysis.
            Format [(data_type, command name), ...]
        comm_opts : dict of dicts, optional
            Options for commands. Format {command name: {opt1: value,...},...}
            Default None (use default options).
        rarefaction_depth : int, optional
            Rarefaction depth for analysis' biom tables. Default None.
        merge_duplicated_sample_ids : bool, optional
            If the duplicated sample ids in the selected studies should be
            merged or prepended with the artifact ids. False (default) prepends
            the artifact id
        """
        self._logger = stderr
        self.analysis = analysis

        # Add jobs to analysis
        if comm_opts is None:
            comm_opts = {}

        for data_type, command in commands:
            # get opts set by user, else make it empty dict
            opts = comm_opts.get(command, {})

            # Add commands to analysis as jobs
            # HARD CODED HACKY THING FOR DEMO, FIX  Issue #164
            if (command == "Beta Diversity" or command == "Alpha Rarefaction"):
                if data_type in {'16S', '18S'}:
                    opts["--tree_fp"] = join(get_db_files_base_dir(),
                                             "reference",
                                             "gg_97_otus_4feb2011.tre")
                else:
                    opts["--parameter_fp"] = join(
                        get_db_files_base_dir(), "reference",
                        "params_qiime.txt")

            if command == "Alpha Rarefaction":
                opts["-n"] = 4

            Job.create(data_type, command, opts, analysis,
                       return_existing=True)

        # Create the files for the jobs
        files_node_name = "%d_ANALYSISFILES" % analysis.id
        self._job_graph.add_node(files_node_name,
                                 func=_build_analysis_files,
                                 args=(analysis, rarefaction_depth,
                                       merge_duplicated_sample_ids),
                                 job_name='Build analysis',
                                 requires_deps=False)

        # Add the jobs
        job_nodes = []
        for job in analysis.jobs:
            node_name = "%d_JOB_%d" % (analysis.id, job.id)
            job_nodes.append(node_name)
            job_name = "%s: %s" % (job.datatype, job.command[0])
            self._job_graph.add_node(node_name,
                                     func=system_call_from_job,
                                     args=(job.id,),
                                     job_name=job_name,
                                     requires_deps=False)

            # Adding the dependency edges to the graph
            self._job_graph.add_edge(files_node_name, node_name)

        # Finalize the analysis.
        node_name = "FINISH_ANALYSIS_%d" % analysis.id
        self._job_graph.add_node(node_name,
                                 func=_finish_analysis,
                                 args=(analysis,),
                                 job_name='Finalize analysis',
                                 requires_deps=False)

        # Adding the dependency edges to the graph
        for job_node_name in job_nodes:
            self._job_graph.add_edge(job_node_name, node_name)
Exemple #10
0
    def _construct_job_graph(self, analysis, commands, comm_opts=None,
                             rarefaction_depth=None,
                             merge_duplicated_sample_ids=False):
        """Builds the job graph for running an analysis

        Parameters
        ----------
        analysis: Analysis object
            Analysis to finalize.
        commands : list of tuples
            Commands to add as jobs in the analysis.
            Format [(data_type, command name), ...]
        comm_opts : dict of dicts, optional
            Options for commands. Format {command name: {opt1: value,...},...}
            Default None (use default options).
        rarefaction_depth : int, optional
            Rarefaction depth for analysis' biom tables. Default None.
        merge_duplicated_sample_ids : bool, optional
            If the duplicated sample ids in the selected studies should be
            merged or prepended with the artifact ids. False (default) prepends
            the artifact id
        """
        self._logger = stderr
        self.analysis = analysis
        analysis_id = analysis.id

        # Add jobs to analysis
        if comm_opts is None:
            comm_opts = {}

        analysis.status = "running"
        # creating bioms at this point cause all this section runs on a worker
        # node, currently an ipython job
        analysis.build_files(rarefaction_depth, merge_duplicated_sample_ids)
        mapping_file = analysis.mapping_file

        tree_commands = ["Beta Diversity", "Alpha Rarefaction"]
        for data_type, biom_fp in viewitems(analysis.biom_tables):
            biom_table = load_table(biom_fp)
            # getting reference_id and software_command_id from the first
            # sample of the biom. This decision was discussed on the qiita
            # meeting on 02/24/16
            metadata = biom_table.metadata(biom_table.ids()[0])
            rid = metadata['reference_id']
            sci = metadata['command_id']

            if rid != 'na':
                reference = Reference(rid)
                tree = reference.tree_fp
            else:
                reference = None
                tree = ''

            cmd = Command(sci) if sci != 'na' else None

            for cmd_data_type, command in commands:
                if data_type != cmd_data_type:
                    continue

                # get opts set by user, else make it empty dict
                opts = comm_opts.get(command, {})
                opts["--otu_table_fp"] = biom_fp
                opts["--mapping_fp"] = mapping_file

                if command in tree_commands:
                    if tree != '':
                        opts["--tree_fp"] = tree
                    else:
                        opts["--parameter_fp"] = join(
                            get_db_files_base_dir(), "reference",
                            "params_qiime.txt")

                if command == "Alpha Rarefaction":
                    opts["-n"] = 4

                Job.create(data_type, command, opts, analysis, reference, cmd,
                           return_existing=True)

        # Add the jobs
        job_nodes = []
        for job in analysis.jobs:
            node_name = "%d_JOB_%d" % (analysis_id, job.id)
            job_nodes.append(node_name)
            job_name = "%s: %s" % (job.datatype, job.command[0])
            self._job_graph.add_node(node_name,
                                     func=system_call_from_job,
                                     args=(job.id,),
                                     job_name=job_name,
                                     requires_deps=False)

        # tgz-ing the analysis results
        tgz_node_name = "TGZ_ANALYSIS_%d" % (analysis_id)
        job_name = "tgz_analysis_%d" % (analysis_id)
        self._job_graph.add_node(tgz_node_name,
                                 func=_generate_analysis_tgz,
                                 args=(analysis,),
                                 job_name=job_name,
                                 requires_deps=False)
        # Adding the dependency edges to the graph
        for job_node_name in job_nodes:
            self._job_graph.add_edge(job_node_name, tgz_node_name)

        # Finalize the analysis.
        node_name = "FINISH_ANALYSIS_%d" % analysis.id
        self._job_graph.add_node(node_name,
                                 func=_finish_analysis,
                                 args=(analysis,),
                                 job_name='Finalize analysis',
                                 requires_deps=False)
        self._job_graph.add_edge(tgz_node_name, node_name)
Exemple #11
0
 def test_retrieve_results_empty(self):
     new = Job.create("18S", "Beta Diversity", {"opt1": 4}, Analysis(1))
     self.assertEqual(new.results, [])
Exemple #12
0
 def test_create_exists(self):
     """Makes sure creation doesn't duplicate a job"""
     with self.assertRaises(QiitaDBDuplicateError):
         Job.create("18S", "Beta Diversity",
                    {"--otu_table_fp": 1, "--mapping_fp": 1},
                    Analysis(1))
Exemple #13
0
    def _construct_job_graph(self, analysis, commands, comm_opts=None,
                             rarefaction_depth=None):
        """Builds the job graph for running an analysis

        Parameters
        ----------
        analysis: Analysis object
            Analysis to finalize.
        commands : list of tuples
            Commands to add as jobs in the analysis.
            Format [(data_type, command name), ...]
        comm_opts : dict of dicts, optional
            Options for commands. Format {command name: {opt1: value,...},...}
            Default None (use default options).
        rarefaction_depth : int, optional
            Rarefaction depth for analysis' biom tables. Default None.
        """
        self._logger = stderr
        self.analysis = analysis

        # Add jobs to analysis
        if comm_opts is None:
            comm_opts = {}

        for data_type, command in commands:
            # get opts set by user, else make it empty dict
            opts = comm_opts.get(command, {})

            # Add commands to analysis as jobs
            # HARD CODED HACKY THING FOR DEMO, FIX  Issue #164
            if (command == "Beta Diversity" or command == "Alpha Rarefaction"):
                if data_type in {'16S', '18S'}:
                    opts["--tree_fp"] = join(get_db_files_base_dir(),
                                             "reference",
                                             "gg_97_otus_4feb2011.tre")
                else:
                    opts["--parameter_fp"] = join(
                        get_db_files_base_dir(), "reference",
                        "params_qiime.txt")

            if command == "Alpha Rarefaction":
                opts["-n"] = 4

            Job.create(data_type, command, opts, analysis,
                       return_existing=True)

        # Create the files for the jobs
        files_node_name = "%d_ANALYSISFILES" % analysis.id
        self._job_graph.add_node(files_node_name,
                                 func=_build_analysis_files,
                                 args=(analysis, rarefaction_depth),
                                 job_name='Build analysis',
                                 requires_deps=False)

        # Add the jobs
        job_nodes = []
        for job in analysis.jobs:
            node_name = "%d_JOB_%d" % (analysis.id, job.id)
            job_nodes.append(node_name)
            job_name = "%s: %s" % (job.datatype, job.command[0])
            self._job_graph.add_node(node_name,
                                     func=system_call_from_job,
                                     args=(job.id,),
                                     job_name=job_name,
                                     requires_deps=False)

            # Adding the dependency edges to the graph
            self._job_graph.add_edge(files_node_name, node_name)

        # Finalize the analysis.
        node_name = "FINISH_ANALYSIS_%d" % analysis.id
        self._job_graph.add_node(node_name,
                                 func=_finish_analysis,
                                 args=(analysis,),
                                 job_name='Finalize analysis',
                                 requires_deps=False)

        # Adding the dependency edges to the graph
        for job_node_name in job_nodes:
            self._job_graph.add_edge(job_node_name, node_name)
Exemple #14
0
    def _construct_job_graph(self, user, analysis, commands, comm_opts=None,
                             rarefaction_depth=None):
        """Builds the job graph for running an analysis

        Parameters
        ----------
        user : str
            user running this analysis.
        analysis: Analysis object
            Analysis to finalize.
        commands : list of tuples
            Commands to add as jobs in the analysis.
            Format [(data_type, command name), ...]
        comm_opts : dict of dicts, optional
            Options for commands. Format {command name: {opt1: value,...},...}
            Default None (use default options).
        rarefaction_depth : int, optional
            Rarefaction depth for analysis' biom tables. Default None.
        """
        self._logger = stderr
        # Add jobs to analysis
        if comm_opts is None:
            comm_opts = {}
        for data_type, command in commands:
            # get opts set by user, else make it empty dict
            opts = comm_opts.get(command, {})
            # Add commands to analysis as jobs
            # HARD CODED HACKY THING FOR DEMO, FIX  Issue #164
            if (command == "Beta Diversity" or command == "Alpha Rarefaction"):
                if data_type in {'16S', '18S'}:
                    opts["--tree_fp"] = join(get_db_files_base_dir(),
                                             "reference",
                                             "gg_97_otus_4feb2011.tre")
                else:
                    opts["--parameter_fp"] = join(
                        get_db_files_base_dir(), "reference",
                        "params_qiime.txt")
            if command == "Alpha Rarefaction":
                opts["-n"] = 4
            Job.create(data_type, command, opts, analysis,
                       return_existing=True)

        # Create the files for the jobs
        files_node_name = "%d_ANALYSISFILES" % analysis.id
        self._job_graph.add_node(files_node_name,
                                 job=(_build_analysis_files,
                                      analysis, rarefaction_depth),
                                 requires_deps=False)
        # Add the jobs
        job_nodes = []
        for job_id in analysis.jobs:
            job = Job(job_id)
            node_name = "%d_JOB_%d" % (analysis.id, job.id)
            job_nodes.append(node_name)
            self._job_graph.add_node(node_name,
                                     job=(_job_comm_wrapper, user, analysis.id,
                                          job),
                                     requires_deps=False)
            # Adding the dependency edges to the graph
            self._job_graph.add_edge(files_node_name, node_name)

        # Finalize the analysis
        node_name = "FINISH_ANALYSIS_%d" % analysis.id
        self._job_graph.add_node(node_name,
                                 job=(_finish_analysis, user, analysis),
                                 requires_deps=False)
        # Adding the dependency edges to the graph
        for job_node_name in job_nodes:
            self._job_graph.add_edge(job_node_name, node_name)