def get(self): # Format sel_data to get study IDs for the processed data sel_data = defaultdict(dict) proc_data_info = {} sel_samps = self.current_user.default_analysis.samples for pid, samps in viewitems(sel_samps): proc_data = Artifact(pid) sel_data[proc_data.study][pid] = samps # Also get processed data info parameters = proc_data.processing_parameters reference = Reference(parameters.values['reference']) proc_data_info[pid] = { 'processed_date': str(proc_data.timestamp), 'algorithm': parameters.command.name, 'reference_name': reference.name, 'reference_version': reference.version, 'sequence_filepath': reference.sequence_fp, 'taxonomy_filepath': reference.taxonomy_fp, 'tree_filepath': reference.tree_fp, 'data_type': proc_data.data_type } self.render("analysis_selected.html", sel_data=sel_data, proc_info=proc_data_info)
def get(self): # Format sel_data to get study IDs for the processed data sel_data = defaultdict(dict) proc_data_info = {} sel_samps = self.current_user.default_analysis.samples for aid, samples in viewitems(sel_samps): a = Artifact(aid) sel_data[a.study][aid] = samples # Also get processed data info processing_parameters = a.processing_parameters if processing_parameters is None: params = None algorithm = None else: cmd = processing_parameters.command params = processing_parameters.values if 'reference' in params: ref = Reference(params['reference']) del params['reference'] params['reference_name'] = ref.name params['reference_version'] = ref.version algorithm = '%s (%s)' % (cmd.software.name, cmd.name) proc_data_info[aid] = { 'processed_date': str(a.timestamp), 'algorithm': algorithm, 'data_type': a.data_type, 'params': params } self.render("analysis_selected.html", sel_data=sel_data, proc_info=proc_data_info)
def generate_param_str(param): """Generate an html string with the parameter values Parameters ---------- param : BaseParameters The parameter to generate the str Returns ------- str The html string with the parameter set values """ values = param.values ref = Reference(values['reference']) result = ["<b>Reference:</b> %s %s" % (ref.name, ref.version)] result.extend("<b>%s:</b> %s" % (name, value) for name, value in viewitems(values) if name != 'reference') return "<br/>".join(result)
def _build_single_proc_data_info(proc_data_id, data_type, samples): """Build the proc data info list for the child row in datatable Parameters ---------- proc_data_id : int The processed data attached to he study, in the form {study_id: [proc_data_id, proc_data_id, ...], ...} data_type : str Data type of the processed data proc_samples : dict of lists The samples available in the processed data, in the form {proc_data_id: [samp1, samp2, ...], ...} Returns ------- dict The information for the processed data, in the form {info: value, ...} """ proc_data = Artifact(proc_data_id) proc_info = {'processed_date': str(proc_data.timestamp)} proc_info['pid'] = proc_data_id proc_info['data_type'] = data_type proc_info['processed_date'] = str(proc_info['processed_date']) params = proc_data.processing_parameters.values del params['input_data'] ref = Reference(params.pop('reference')) proc_info['reference_name'] = ref.name proc_info['taxonomy_filepath'] = basename(ref.taxonomy_fp) proc_info['sequence_filepath'] = basename(ref.sequence_fp) proc_info['tree_filepath'] = basename(ref.tree_fp) proc_info['reference_version'] = ref.version proc_info['algorithm'] = 'sortmerna' proc_info['samples'] = sorted(proc_data.prep_templates[0].keys()) proc_info.update(params) return proc_info
def test_tree_fp(self): ref = Reference(1) exp = join(self.db_dir, "GreenGenes_13_8_97_otus.tree") self.assertEqual(ref.tree_fp, exp)
def test_taxonomy_fp(self): ref = Reference(1) exp = join(self.db_dir, "GreenGenes_13_8_97_otu_taxonomy.txt") self.assertEqual(ref.taxonomy_fp, exp)
def test_sequence_fp(self): ref = Reference(1) exp = join(self.db_dir, "GreenGenes_13_8_97_otus.fasta") self.assertEqual(ref.sequence_fp, exp)
def _construct_job_graph(self, analysis, commands, comm_opts=None, rarefaction_depth=None, merge_duplicated_sample_ids=False): """Builds the job graph for running an analysis Parameters ---------- analysis: Analysis object Analysis to finalize. commands : list of tuples Commands to add as jobs in the analysis. Format [(data_type, command name), ...] comm_opts : dict of dicts, optional Options for commands. Format {command name: {opt1: value,...},...} Default None (use default options). rarefaction_depth : int, optional Rarefaction depth for analysis' biom tables. Default None. merge_duplicated_sample_ids : bool, optional If the duplicated sample ids in the selected studies should be merged or prepended with the artifact ids. False (default) prepends the artifact id """ self._logger = stderr self.analysis = analysis analysis_id = analysis.id # Add jobs to analysis if comm_opts is None: comm_opts = {} analysis.status = "running" # creating bioms at this point cause all this section runs on a worker # node, currently an ipython job analysis.build_files(rarefaction_depth, merge_duplicated_sample_ids) mapping_file = analysis.mapping_file tree_commands = ["Beta Diversity", "Alpha Rarefaction"] for data_type, biom_fp in viewitems(analysis.biom_tables): biom_table = load_table(biom_fp) # getting reference_id and software_command_id from the first # sample of the biom. This decision was discussed on the qiita # meeting on 02/24/16 metadata = biom_table.metadata(biom_table.ids()[0]) rid = metadata['reference_id'] sci = metadata['command_id'] if rid != 'na': reference = Reference(rid) tree = reference.tree_fp else: reference = None tree = '' cmd = Command(sci) if sci != 'na' else None for cmd_data_type, command in commands: if data_type != cmd_data_type: continue # get opts set by user, else make it empty dict opts = comm_opts.get(command, {}) opts["--otu_table_fp"] = biom_fp opts["--mapping_fp"] = mapping_file if command in tree_commands: if tree != '': opts["--tree_fp"] = tree else: opts["--parameter_fp"] = join( get_db_files_base_dir(), "reference", "params_qiime.txt") if command == "Alpha Rarefaction": opts["-n"] = 4 Job.create(data_type, command, opts, analysis, reference, cmd, return_existing=True) # Add the jobs job_nodes = [] for job in analysis.jobs: node_name = "%d_JOB_%d" % (analysis_id, job.id) job_nodes.append(node_name) job_name = "%s: %s" % (job.datatype, job.command[0]) self._job_graph.add_node(node_name, func=system_call_from_job, args=(job.id,), job_name=job_name, requires_deps=False) # tgz-ing the analysis results tgz_node_name = "TGZ_ANALYSIS_%d" % (analysis_id) job_name = "tgz_analysis_%d" % (analysis_id) self._job_graph.add_node(tgz_node_name, func=_generate_analysis_tgz, args=(analysis,), job_name=job_name, requires_deps=False) # Adding the dependency edges to the graph for job_node_name in job_nodes: self._job_graph.add_edge(job_node_name, tgz_node_name) # Finalize the analysis. node_name = "FINISH_ANALYSIS_%d" % analysis.id self._job_graph.add_node(node_name, func=_finish_analysis, args=(analysis,), job_name='Finalize analysis', requires_deps=False) self._job_graph.add_edge(tgz_node_name, node_name)
def _get_process_target_gene_cmd(preprocessed_data, params): """Generates the pick_closed_reference_otus.py command Parameters ---------- preprocessed_data : PreprocessedData The preprocessed_data to process params : ProcessedSortmernaParams The parameters to use for the processing Returns ------- tuple (str, str) A 2-tuple of strings. The first string is the command to be executed. The second string is the path to the command's output directory Raises ------ ValueError If no sequence file is found on the preprocessed data """ # Get the filepaths from the preprocessed data object seqs_fp = None for fpid, fp, fp_type in preprocessed_data.get_filepaths(): if fp_type == "preprocessed_fasta": seqs_fp = fp break if not seqs_fp: raise ValueError("No sequence file found on the preprocessed data %s" % preprocessed_data.id) # Create a temporary directory to store the pick otus results output_dir = mkdtemp(dir=qiita_config.working_dir, prefix='pick_otus_otu_%s_' % preprocessed_data.id) # mkdtemp creates the directory, so we remove it here so the script # can safely run rmdir(output_dir) # We need to generate a parameters file with the parameters for # pick_otus.py fd, param_fp = mkstemp(dir=qiita_config.working_dir, prefix='params_%s_' % preprocessed_data.id, suffix='.txt') close(fd) with open(param_fp, 'w') as f: params.to_file(f) ref = Reference(params.reference) reference_fp = ref.sequence_fp taxonomy_fp = ref.taxonomy_fp if taxonomy_fp: params_str = "-t %s" % taxonomy_fp else: params_str = "" # Create the split_libraries_fastq.py command cmd = str("pick_closed_reference_otus.py -i %s -r %s -o %s -p %s %s" % (seqs_fp, reference_fp, output_dir, param_fp, params_str)) return (cmd, output_dir)