def test_create(self): """Makes sure creation works as expected""" # make first job new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1)) self.assertEqual(new.id, 4) # make sure job inserted correctly obs = self.conn_handler.execute_fetchall("SELECT * FROM qiita.job " "WHERE job_id = 4") exp = [[4, 2, 1, 3, '{"opt1":4}', None]] self.assertEqual(obs, exp) # make sure job added to analysis correctly obs = self.conn_handler.execute_fetchall("SELECT * FROM " "qiita.analysis_job WHERE " "job_id = 4") exp = [[1, 4]] self.assertEqual(obs, exp) # make second job with diff datatype and command to test column insert new = Job.create("16S", "Beta Diversity", {"opt1": 4}, Analysis(1)) self.assertEqual(new.id, 5) # make sure job inserted correctly obs = self.conn_handler.execute_fetchall("SELECT * FROM qiita.job " "WHERE job_id = 5") exp = [[5, 1, 1, 2, '{"opt1":4}', None]] self.assertEqual(obs, exp) # make sure job added to analysis correctly obs = self.conn_handler.execute_fetchall("SELECT * FROM " "qiita.analysis_job WHERE " "job_id = 5") exp = [[1, 5]] self.assertEqual(obs, exp)
def post(self, analysis_id): command_args = self.get_arguments("commands") split = [x.split("#") for x in command_args] analysis = Analysis(analysis_id) commands = [] # HARD CODED HACKY THING FOR DEMO, FIX Issue #164 fp, mapping_file = mkstemp(suffix="_map_file.txt") close(fp) SampleTemplate(1).to_file(mapping_file) study_fps = {} for pd in Study(1).processed_data: processed = ProcessedData(pd) study_fps[processed.data_type] = processed.get_filepaths()[0][0] for data_type, command in split: opts = { "--otu_table_fp": study_fps[data_type], "--mapping_fp": mapping_file } if command == "Beta Diversity" and data_type in {'16S', '18S'}: opts["--tree_fp"] = join(get_db_files_base_dir(), "reference", "gg_97_otus_4feb2011.tre") elif command == "Beta Diversity": opts["--parameter_fp"] = join(get_db_files_base_dir(), "reference", "params_qiime.txt") Job.create(data_type, command, opts, analysis) commands.append("%s: %s" % (data_type, command)) user = self.get_current_user() self.render("analysis_waiting.html", user=user, aid=analysis_id, aname=analysis.name, commands=commands) # fire off analysis run here # currently synch run so redirect done here. Will remove after demo run_analysis(user, analysis)
def test_create_exists(self): """Makes sure creation doesn't duplicate a job""" with self.assertRaises(QiitaDBDuplicateError): Job.create("18S", "Beta Diversity", { "--otu_table_fp": 1, "--mapping_fp": 1 }, Analysis(1))
def test_set_options(self): new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1)) new.options = self.options self.options['--output_dir'] = join(self._job_folder, '4_alpha_rarefaction.' 'py_output_dir') self.assertEqual(new.options, self.options)
def test_set_options(self): new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1)) new.options = self.options self.options['--output_dir'] = join(get_db_files_base_dir(), 'job/4_alpha_rarefaction.' 'py_output_dir') self.assertEqual(new.options, self.options)
def test_set_options(self): new = Job.create("18S", "Alpha Rarefaction", {"opt1": 4}, Analysis(1)) new.options = self.options self.options['--output_dir'] = join( get_db_files_base_dir(), 'job/4_alpha_rarefaction.' 'py_output_dir') self.assertEqual(new.options, self.options)
def test_create_exists_return_existing(self): """Makes sure creation doesn't duplicate a job by returning existing""" Analysis.create(User("*****@*****.**"), "new", "desc") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample (analysis_id, " "processed_data_id, sample_id) VALUES (3,1,'SKB8.640193'), " "(3,1,'SKD8.640184'), (3,1,'SKB7.640196'), (3,1,'SKM9.640192')," "(3,1,'SKM4.640180')") new = Job.create("18S", "Beta Diversity", {"--otu_table_fp": 1, "--mapping_fp": 1}, Analysis(3), return_existing=True) self.assertEqual(new.id, 2)
def test_create_exists_return_existing(self): """Makes sure creation doesn't duplicate a job by returning existing""" Analysis.create(User("*****@*****.**"), "new", "desc") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample " "(analysis_id, processed_data_id, sample_id) VALUES " "(3, 1, '1.SKB8.640193'), (3, 1, '1.SKD8.640184'), " "(3, 1, '1.SKB7.640196'), (3, 1, '1.SKM9.640192'), " "(3, 1, '1.SKM4.640180')") new = Job.create("18S", "Beta Diversity", {"--otu_table_fp": 1, "--mapping_fp": 1}, Analysis(3), return_existing=True) self.assertEqual(new.id, 2)
def _construct_job_graph(self, analysis, commands, comm_opts=None, rarefaction_depth=None, merge_duplicated_sample_ids=False): """Builds the job graph for running an analysis Parameters ---------- analysis: Analysis object Analysis to finalize. commands : list of tuples Commands to add as jobs in the analysis. Format [(data_type, command name), ...] comm_opts : dict of dicts, optional Options for commands. Format {command name: {opt1: value,...},...} Default None (use default options). rarefaction_depth : int, optional Rarefaction depth for analysis' biom tables. Default None. merge_duplicated_sample_ids : bool, optional If the duplicated sample ids in the selected studies should be merged or prepended with the artifact ids. False (default) prepends the artifact id """ self._logger = stderr self.analysis = analysis # Add jobs to analysis if comm_opts is None: comm_opts = {} for data_type, command in commands: # get opts set by user, else make it empty dict opts = comm_opts.get(command, {}) # Add commands to analysis as jobs # HARD CODED HACKY THING FOR DEMO, FIX Issue #164 if (command == "Beta Diversity" or command == "Alpha Rarefaction"): if data_type in {'16S', '18S'}: opts["--tree_fp"] = join(get_db_files_base_dir(), "reference", "gg_97_otus_4feb2011.tre") else: opts["--parameter_fp"] = join( get_db_files_base_dir(), "reference", "params_qiime.txt") if command == "Alpha Rarefaction": opts["-n"] = 4 Job.create(data_type, command, opts, analysis, return_existing=True) # Create the files for the jobs files_node_name = "%d_ANALYSISFILES" % analysis.id self._job_graph.add_node(files_node_name, func=_build_analysis_files, args=(analysis, rarefaction_depth, merge_duplicated_sample_ids), job_name='Build analysis', requires_deps=False) # Add the jobs job_nodes = [] for job in analysis.jobs: node_name = "%d_JOB_%d" % (analysis.id, job.id) job_nodes.append(node_name) job_name = "%s: %s" % (job.datatype, job.command[0]) self._job_graph.add_node(node_name, func=system_call_from_job, args=(job.id,), job_name=job_name, requires_deps=False) # Adding the dependency edges to the graph self._job_graph.add_edge(files_node_name, node_name) # Finalize the analysis. node_name = "FINISH_ANALYSIS_%d" % analysis.id self._job_graph.add_node(node_name, func=_finish_analysis, args=(analysis,), job_name='Finalize analysis', requires_deps=False) # Adding the dependency edges to the graph for job_node_name in job_nodes: self._job_graph.add_edge(job_node_name, node_name)
def _construct_job_graph(self, analysis, commands, comm_opts=None, rarefaction_depth=None, merge_duplicated_sample_ids=False): """Builds the job graph for running an analysis Parameters ---------- analysis: Analysis object Analysis to finalize. commands : list of tuples Commands to add as jobs in the analysis. Format [(data_type, command name), ...] comm_opts : dict of dicts, optional Options for commands. Format {command name: {opt1: value,...},...} Default None (use default options). rarefaction_depth : int, optional Rarefaction depth for analysis' biom tables. Default None. merge_duplicated_sample_ids : bool, optional If the duplicated sample ids in the selected studies should be merged or prepended with the artifact ids. False (default) prepends the artifact id """ self._logger = stderr self.analysis = analysis analysis_id = analysis.id # Add jobs to analysis if comm_opts is None: comm_opts = {} analysis.status = "running" # creating bioms at this point cause all this section runs on a worker # node, currently an ipython job analysis.build_files(rarefaction_depth, merge_duplicated_sample_ids) mapping_file = analysis.mapping_file tree_commands = ["Beta Diversity", "Alpha Rarefaction"] for data_type, biom_fp in viewitems(analysis.biom_tables): biom_table = load_table(biom_fp) # getting reference_id and software_command_id from the first # sample of the biom. This decision was discussed on the qiita # meeting on 02/24/16 metadata = biom_table.metadata(biom_table.ids()[0]) rid = metadata['reference_id'] sci = metadata['command_id'] if rid != 'na': reference = Reference(rid) tree = reference.tree_fp else: reference = None tree = '' cmd = Command(sci) if sci != 'na' else None for cmd_data_type, command in commands: if data_type != cmd_data_type: continue # get opts set by user, else make it empty dict opts = comm_opts.get(command, {}) opts["--otu_table_fp"] = biom_fp opts["--mapping_fp"] = mapping_file if command in tree_commands: if tree != '': opts["--tree_fp"] = tree else: opts["--parameter_fp"] = join( get_db_files_base_dir(), "reference", "params_qiime.txt") if command == "Alpha Rarefaction": opts["-n"] = 4 Job.create(data_type, command, opts, analysis, reference, cmd, return_existing=True) # Add the jobs job_nodes = [] for job in analysis.jobs: node_name = "%d_JOB_%d" % (analysis_id, job.id) job_nodes.append(node_name) job_name = "%s: %s" % (job.datatype, job.command[0]) self._job_graph.add_node(node_name, func=system_call_from_job, args=(job.id,), job_name=job_name, requires_deps=False) # tgz-ing the analysis results tgz_node_name = "TGZ_ANALYSIS_%d" % (analysis_id) job_name = "tgz_analysis_%d" % (analysis_id) self._job_graph.add_node(tgz_node_name, func=_generate_analysis_tgz, args=(analysis,), job_name=job_name, requires_deps=False) # Adding the dependency edges to the graph for job_node_name in job_nodes: self._job_graph.add_edge(job_node_name, tgz_node_name) # Finalize the analysis. node_name = "FINISH_ANALYSIS_%d" % analysis.id self._job_graph.add_node(node_name, func=_finish_analysis, args=(analysis,), job_name='Finalize analysis', requires_deps=False) self._job_graph.add_edge(tgz_node_name, node_name)
def test_retrieve_results_empty(self): new = Job.create("18S", "Beta Diversity", {"opt1": 4}, Analysis(1)) self.assertEqual(new.results, [])
def test_create_exists(self): """Makes sure creation doesn't duplicate a job""" with self.assertRaises(QiitaDBDuplicateError): Job.create("18S", "Beta Diversity", {"--otu_table_fp": 1, "--mapping_fp": 1}, Analysis(1))
def _construct_job_graph(self, analysis, commands, comm_opts=None, rarefaction_depth=None): """Builds the job graph for running an analysis Parameters ---------- analysis: Analysis object Analysis to finalize. commands : list of tuples Commands to add as jobs in the analysis. Format [(data_type, command name), ...] comm_opts : dict of dicts, optional Options for commands. Format {command name: {opt1: value,...},...} Default None (use default options). rarefaction_depth : int, optional Rarefaction depth for analysis' biom tables. Default None. """ self._logger = stderr self.analysis = analysis # Add jobs to analysis if comm_opts is None: comm_opts = {} for data_type, command in commands: # get opts set by user, else make it empty dict opts = comm_opts.get(command, {}) # Add commands to analysis as jobs # HARD CODED HACKY THING FOR DEMO, FIX Issue #164 if (command == "Beta Diversity" or command == "Alpha Rarefaction"): if data_type in {'16S', '18S'}: opts["--tree_fp"] = join(get_db_files_base_dir(), "reference", "gg_97_otus_4feb2011.tre") else: opts["--parameter_fp"] = join( get_db_files_base_dir(), "reference", "params_qiime.txt") if command == "Alpha Rarefaction": opts["-n"] = 4 Job.create(data_type, command, opts, analysis, return_existing=True) # Create the files for the jobs files_node_name = "%d_ANALYSISFILES" % analysis.id self._job_graph.add_node(files_node_name, func=_build_analysis_files, args=(analysis, rarefaction_depth), job_name='Build analysis', requires_deps=False) # Add the jobs job_nodes = [] for job in analysis.jobs: node_name = "%d_JOB_%d" % (analysis.id, job.id) job_nodes.append(node_name) job_name = "%s: %s" % (job.datatype, job.command[0]) self._job_graph.add_node(node_name, func=system_call_from_job, args=(job.id,), job_name=job_name, requires_deps=False) # Adding the dependency edges to the graph self._job_graph.add_edge(files_node_name, node_name) # Finalize the analysis. node_name = "FINISH_ANALYSIS_%d" % analysis.id self._job_graph.add_node(node_name, func=_finish_analysis, args=(analysis,), job_name='Finalize analysis', requires_deps=False) # Adding the dependency edges to the graph for job_node_name in job_nodes: self._job_graph.add_edge(job_node_name, node_name)
def _construct_job_graph(self, user, analysis, commands, comm_opts=None, rarefaction_depth=None): """Builds the job graph for running an analysis Parameters ---------- user : str user running this analysis. analysis: Analysis object Analysis to finalize. commands : list of tuples Commands to add as jobs in the analysis. Format [(data_type, command name), ...] comm_opts : dict of dicts, optional Options for commands. Format {command name: {opt1: value,...},...} Default None (use default options). rarefaction_depth : int, optional Rarefaction depth for analysis' biom tables. Default None. """ self._logger = stderr # Add jobs to analysis if comm_opts is None: comm_opts = {} for data_type, command in commands: # get opts set by user, else make it empty dict opts = comm_opts.get(command, {}) # Add commands to analysis as jobs # HARD CODED HACKY THING FOR DEMO, FIX Issue #164 if (command == "Beta Diversity" or command == "Alpha Rarefaction"): if data_type in {'16S', '18S'}: opts["--tree_fp"] = join(get_db_files_base_dir(), "reference", "gg_97_otus_4feb2011.tre") else: opts["--parameter_fp"] = join( get_db_files_base_dir(), "reference", "params_qiime.txt") if command == "Alpha Rarefaction": opts["-n"] = 4 Job.create(data_type, command, opts, analysis, return_existing=True) # Create the files for the jobs files_node_name = "%d_ANALYSISFILES" % analysis.id self._job_graph.add_node(files_node_name, job=(_build_analysis_files, analysis, rarefaction_depth), requires_deps=False) # Add the jobs job_nodes = [] for job_id in analysis.jobs: job = Job(job_id) node_name = "%d_JOB_%d" % (analysis.id, job.id) job_nodes.append(node_name) self._job_graph.add_node(node_name, job=(_job_comm_wrapper, user, analysis.id, job), requires_deps=False) # Adding the dependency edges to the graph self._job_graph.add_edge(files_node_name, node_name) # Finalize the analysis node_name = "FINISH_ANALYSIS_%d" % analysis.id self._job_graph.add_node(node_name, job=(_finish_analysis, user, analysis), requires_deps=False) # Adding the dependency edges to the graph for job_node_name in job_nodes: self._job_graph.add_edge(job_node_name, node_name)