def append_samples_to_analysis(self): analysis_id = self.request_dict['analysis_id'] data_file_temp_path = self.request_dict['data_file_path'] self.serverstate.done_analyses.remove(analysis_id) self.serverstate.running_analyses.append(analysis_id) p = Meta(analysis_id) additional_data_file_path = os.path.join(p.dirs.analysis_dir, "additional_data_file") debug("Copying additional data file", p.files.log_file) shutil.copy(data_file_temp_path, additional_data_file_path) self.request_dict['additional_data_file_path'] = additional_data_file_path if not self.request_dict.get('return_when_done'): self.write_socket({'response': 'OK'}) ################################################################ # call server modules for append.. ################################################################ # analysis specific stuff. server_modules_dict[p.type]._append(p, self.request_dict) # common analysis routines.. server_modules_dict['commons']._append(p, self.request_dict) ################################################################ os.remove(additional_data_file_path) self.serverstate.running_analyses.remove(analysis_id) self.serverstate.done_analyses.append(analysis_id) debug("Done.", p.files.log_file) if self.request_dict.get('return_when_done'): self.write_socket({'response': 'OK'})
def sequences_bar_image(p): samples_dict = DeserializeFromFile(p.files.samples_serialized_file_path) debug("Generating number of sequences bar image", p.files.log_file) samples = framework.tools.helper_functions.sorted_copy(samples_dict.keys()) framework.tools.bar.generate( [(sample, samples_dict[sample]["tr"]) for sample in samples], p.images.samples_sequences_bar_path )
def t_test_values_and_probabilities_dict(p): samples_dict = DeserializeFromFile(p.files.sample_map_filtered_samples_dict_file_path) otu_library = DeserializeFromFile(p.files.otu_library_file_path) debug("Generating t-test values and probabilities dict", p.files.log_file) otu_t_p_tuples_dict = framework.tools.taxons.get_t_p_values_dict_for_subset( samples_dict, otu_library, p.files.sample_map_file_path, ranks=GetCopy(c.ranks[p.type]) ) SerializeToFile(otu_t_p_tuples_dict, p.files.sample_map_otu_t_p_tuples_dict_file_path)
def refresh_analysis_files(self): analysis_id = self.request_dict['analysis_id'] p = Meta(analysis_id) functions_module_dict = server_modules_dict[p.type]._module_functions(p, self.request_dict) functions_common_dict = server_modules_dict['commons']._module_functions(p, self.request_dict) if self.request_dict.has_key('get_refresh_options'): refresh_options_module = functions_module_dict.keys() refresh_options_common = functions_common_dict.keys() refresh_options_module.sort() refresh_options_common.sort() refresh_options = refresh_options_module + refresh_options_common print refresh_options functions_dict = {} functions_dict.update(functions_module_dict) functions_dict.update(functions_common_dict) # this is important. functions_dict contains function names, # descriptions, as well as actual memory references to functions # exported by protocol specific and common module provide. # # memory references are meaningful in the context of the server # and they shouldn't be sent to clients (since clients can not # do anything with them). # # here, I am solving the problem by stripping those references # from the data structure with my dirty loop before writing sending # response data back to the client. a better solution could replace # this anytime. for module_item in functions_dict: functions_dict[module_item]['func'] = None self.write_socket({'response': 'OK', 'refresh_options': refresh_options, 'functions_dict': functions_dict}) else: refresh_requests = self.request_dict['refresh_requests'] # if late response wasn't requested send response immediately if not self.request_dict.has_key('return_when_done'): self.write_socket({'response': 'OK'}) # trixy part: for request in refresh_requests: if request in functions_module_dict: functions_module_dict[request]['func'](p) elif request in functions_common_dict: functions_common_dict[request]['func'](p) # send the late response if it was requested if self.request_dict.has_key('return_when_done'): self.write_socket({'response': 'OK'}) debug("Done with refresh tasks.", p.files.log_file)
def simpsons_diversity_dot_plot(p): samples_dict = DeserializeFromFile(p.files.sample_map_filtered_samples_dict_file_path) debug("Generating diversity index images...", p.files.log_file) framework.tools.diversityindex.generate_for_sample_map( samples_dict, p.files.sample_map_file_path, save_dir=p.dirs.sample_map_instance_dir, type=p.type, method="simpsons", )
def shannon_diversity_bar_image(p): samples_dict = DeserializeFromFile(p.files.samples_serialized_file_path) debug("Generating shannons diversity index image", p.files.log_file) framework.tools.diversityindex.generate( samples_dict, p.images.shannon_diversity_index_img_path, p.files.shannon_diversity_index_data_path, p.type, method="shannons", )
def pie_charts(p): samples_dict = DeserializeFromFile(p.files.samples_serialized_file_path) debug("Generating piecharts", p.files.log_file) framework.tools.piechart.main( samples_dict, DeserializeFromFile(p.files.taxa_color_dict_file_path), ranks=GetCopy(c.ranks[p.type]), pie_chart_file_prefix=c.pie_chart_file_prefix, save_dir=p.dirs.pie_charts_dir, )
def _exec(p, request_dict): p.set_analysis_type("env") #extracting sample names from env file samples = framework.tools.helper_functions.sorted_copy(framework.tools.env.extract_sample_names(p.files.data_file_path)) open(p.files.all_unique_samples_file_path, 'w').write('\n'.join(samples) + '\n') debug("%d unique samples from ENV stored in samples file" % len(samples), p.files.log_file) samples_dictionary(p) otu_library(p)
def separate_low_confidence(p): debug("Separating low confidence sequences", p.files.log_file) separator = open(p.files.seperator_file_path).read() lo_seqs = framework.tools.rdp.low_confidence_seqs(open(p.files.data_file_path), open(p.files.rdp_output_file_path), p.threshold, separator) o = open(p.files.low_confidence_seqs_path,'w') for s in lo_seqs: o.write(s) o.close()
def samples_dictionary(p): debug("Computing samples dictionary", p.files.log_file) seperator = open(p.files.seperator_file_path).read() thresh = p.threshold if hasattr(p,'threshold') else None # read samples from RDP since it may have been updated. samples = list(set([sample.split(seperator)[0] for sample in open(p.files.rdp_output_file_path).readlines()])) open(p.files.all_unique_samples_file_path, 'w').write('\n'.join(samples) + '\n') samples_dict = framework.tools.rdp.create_samples_dictionary(p.files.rdp_output_file_path, seperator, samples,threshold=thresh) debug("Serializing samples dictionary object", p.files.log_file) SerializeToFile(samples_dict, p.files.samples_serialized_file_path)
def rarefaction_curves(p): samples_dict = DeserializeFromFile(p.files.samples_serialized_file_path) folds = c.number_of_folds_for_rarefaction_curves debug("Re-generating rarefaction curves: computing rarefraction dict (%d folds)" % folds, p.files.log_file) rarefaction_dict = framework.tools.rarefaction.generate_dict(samples_dict, number_of_folds = folds) debug("Re-generating rarefaction curves: saving rarefaction dict", p.files.log_file) SerializeToFile(rarefaction_dict, p.files.rarefaction_dict_serialized_file_path) debug("Re-generating rarefaction curves: generating all samples image", p.files.log_file) framework.tools.rarefaction.generate_all_samples_figure(rarefaction_dict, save_path = p.files.rarefaction_curves_all_samples_file_path) debug("Re-generating rarefaction curves: generating individual images", p.files.log_file) framework.tools.rarefaction.generate_individual_figures(rarefaction_dict, save_path = p.dirs.rarefaction_curves_dir, prefix = c.rarefaction_curve_file_prefix) debug("Re-generating rarefaction curves: Done", p.files.log_file)
def _exec(p, request_dict): p.set_analysis_type("vamps") # extracting sample names from samples = framework.tools.helper_functions.sorted_copy( framework.tools.vamps.extract_sample_names(p.files.data_file_path) ) open(p.files.all_unique_samples_file_path, "w").write("\n".join(samples) + "\n") debug("%d unique samples from VAMPS table stored in samples file" % len(samples), p.files.log_file) samples_dictionary(p) otu_library(p)
def dot_plots(p): samples_dict = DeserializeFromFile(p.files.sample_map_filtered_samples_dict_file_path) otu_t_p_tuples_dict = DeserializeFromFile(p.files.sample_map_otu_t_p_tuples_dict_file_path) for rank in c.ranks[p.type]: # taxon charts if p.type == "rdp" and rank == "domain": debug("Skipping domain level taxon charts.", p.files.log_file) continue debug("Generating dot plots for '%s' level" % rank, p.files.log_file) framework.tools.taxons.generate( samples_dict, otu_t_p_tuples_dict, p.files.sample_map_file_path, rank, p.dirs.sample_map_taxon_charts_dir )
def pie_chart_dendrograms(p): samples_dict = DeserializeFromFile(p.files.samples_serialized_file_path) debug("Generating piechart dendrograms", p.files.log_file) ranks = GetCopy(c.ranks[p.type]) if p.type == "rdp": ranks.remove("domain") framework.tools.hcluster.generate( samples_dict, DeserializeFromFile(p.files.otu_library_file_path), pie_charts_dir=p.dirs.pie_charts_dir, dendrogram_prefix=c.pie_chart_dendrogram_file_prefix, ranks=ranks, )
def _exec(p, request_dict): """execute a pristine analysis with""" p.set_analysis_type("rdp") p.threshold = request_dict.get('threshold') seperator = request_dict['seperator'] debug("storing seperator: '%s'" % seperator, p.files.log_file) open(p.files.seperator_file_path, 'w').write(seperator) if p.threshold: debug("storing confidence threshold", p.files.log_file) with open(p.files.threshold_path,'w') as f: f.write(str(p.threshold)) debug("Extracting QA info", p.files.log_file) cmt = open(p.files.data_comment_file_path,'w') for line in open(p.files.data_file_path): if line.startswith(';'): cmt.write(line) cmt.close() #running rdp on data number_of_sequences = helper_functions.get_number_of_lines(p.files.data_file_path) / 2 debug("running rdp on %d sequences" % number_of_sequences, p.files.log_file) framework.tools.rdp.run_classifier(c.rdp_running_path, p.files.data_file_path, p.files.rdp_output_file_path, p.files.rdp_error_log_file_path) samples_dictionary(p) rdp_general_confidence_image(p) rdp_otu_confidence_analysis(p) rdp_samples_confidence_image(p) otu_library(p) rarefaction_curves(p) if hasattr(p,'threshold'): separate_low_confidence(p)
def sample_dendrograms(p): samples_dict = DeserializeFromFile(p.files.sample_map_filtered_samples_dict_file_path) debug("Generating dendrograms for sample map...", p.files.log_file) ranks = GetCopy(c.ranks[p.type]) if p.type == "rdp": ranks.remove("domain") framework.tools.hcluster.generate( samples_dict, DeserializeFromFile(p.files.otu_library_file_path), pie_charts_dir=p.dirs.pie_charts_dir, dendrogram_prefix=c.pie_chart_dendrogram_file_prefix, save_dir=p.dirs.sample_map_dendrograms_dir, map=helper_functions.get_sample_map_dict(p), ranks=ranks, )
def heatmaps(p): samples_dict = DeserializeFromFile(p.files.sample_map_filtered_samples_dict_file_path) for rank in c.ranks[p.type]: if p.type == "rdp" and rank == "domain": debug("Skipping domain level heatmap.", p.files.log_file) continue debug("Creating percent abundance for '%s' level" % rank, p.files.log_file) percent_abundance_file_path = vars(p.files)[c.percent_abundance_file_prefix + rank + "_file_path"] framework.tools.helper_functions.create_percent_abundance_file( samples_dict, percent_abundance_file_path, rank=rank ) # heatmaps heatmap_options = copy.deepcopy(HeatmapOptions()) heatmap_options.abundance_file = RelativePath(percent_abundance_file_path) heatmap_options.sample_color_map_file = RelativePath(p.files.sample_map_file_path) heatmap_options.output_file = RelativePath(vars(p.files)[c.abundance_heatmap_file_prefix + rank + "_file_path"]) debug("Creating percent abundance heatmap for '%s' level" % rank, p.files.log_file) SerializeToFile(heatmap_options, vars(p.files)[c.heatmap_options_file_prefix + rank + "_file_path"]) framework.tools.heatmap.main(heatmap_options, c.analyses_dir)
def generate_or_refresh_sample_map(self): analysis_id = self.request_dict['analysis_id'] p = Meta(analysis_id) # update server state debug("Server state is being updated, running analyses.APPEND(this)", p.files.log_file) self.serverstate.done_analyses.remove(analysis_id) self.serverstate.running_analyses.append(analysis_id) # read the original sampels dict and otu library samples_dict = DeserializeFromFile(p.files.samples_serialized_file_path) otu_library = DeserializeFromFile(p.files.otu_library_file_path) # if request dict has 'instance', sample map exists and needs to be refreshed if self.request_dict.has_key('instance'): instance = self.request_dict['instance'] # if late response wasn't requested send response immediately if not self.request_dict.has_key('return_when_done'): self.write_socket({'response': 'OK'}) p.dirs.change_current_sample_map_instance(p.files, instance) filtered_samples_dict = DeserializeFromFile(p.files.sample_map_filtered_samples_dict_file_path) # else, this is a new sample map request. else: sample_map_name = self.request_dict['sample_map_dict']['sample_map_name'] sample_map_list = self.request_dict['sample_map_dict']['sample_map_list'] self.write_socket({'response': 'OK'}) # get a new instance id for the analysis and create sample map directories debug("Creating new sample map directory for '%s'" % sample_map_name, p.files.log_file) p.dirs.create_new_sample_map_instance(p.files) debug("Current sample map directory is '%s'" % p.dirs.sample_map_instance_dir, p.files.log_file) # store sample map name open(p.files.sample_map_name_file_path, 'w').write(sample_map_name + '\n') debug("Sample map name has been stored", p.files.log_file) # store sample map f = open(p.files.sample_map_file_path, 'w') for sample in sample_map_list: f.write('%(sample)s\t%(group)s\t%(color)s\n' % sample) f.close() debug("Sample map has been stored", p.files.log_file) # get filtered samples dict and store it for sample map debug("Filtered samples dict are being generated and stored", p.files.log_file) filtered_samples_dict = framework.tools.helper_functions.filter_dict(samples_dict, keep_only = [s['sample'] for s in sample_map_list]) SerializeToFile(filtered_samples_dict, p.files.sample_map_filtered_samples_dict_file_path) sample_map_functions_common_dict = server_modules_dict['commons']._sample_map_functions(p, self.request_dict) common_function_ids = sample_map_functions_common_dict.keys() common_function_ids.sort() for id in common_function_ids: sample_map_functions_common_dict[id]['func'](p) sample_map_functions_module_dict = server_modules_dict[p.type]._sample_map_functions(p, self.request_dict) module_function_ids = sample_map_functions_module_dict.keys() module_function_ids.sort() for id in module_function_ids: sample_map_functions_module_dict[id]['func'](p) # update server state debug("Server state is being updated, running analyses.REMOVE(this), done analyses.APPEND(this)", p.files.log_file) self.serverstate.running_analyses.remove(analysis_id) self.serverstate.done_analyses.append(analysis_id) debug("All files for sample map has been generated", p.files.log_file) # if late response was requested, send it now if self.request_dict.has_key('return_when_done'): self.write_socket({'response': 'OK'})
def exec_analysis(self): analysis_id = self.request_dict['data_file_sha1sum'] data_file_temp_path = self.request_dict['data_file_path'] job_description = self.request_dict['job_description'] analysis_type = self.request_dict['analysis_type'] analysis_module = server_modules_dict[analysis_type] late_response_request = self.request_dict.has_key('return_when_done') and self.request_dict['return_when_done'] == True #import pdb; pdb.set_trace() if analysis_type not in server_modules_dict:#this is where we could check filetype self.write_socket({'response': 'error', 'content': 'Wrong type of analysis.'}) return#fixes _framework.testServerError if(analysis_id in self.serverstate.running_analyses or analysis_id in self.serverstate.done_analyses): self.write_socket({'response':'error', 'content':'Analysis is already running'}) return#fixes _framework.testRepeatAnalysis p = Meta(analysis_id) debug("Server state is being updated, running processes.APPEND(this)", p.files.log_file) self.serverstate.running_analyses.append(analysis_id) if late_response_request is False: debug("Response is being sent", p.files.log_file) self.write_socket({'response': 'OK', 'process_id': analysis_id}) debug("Filling job description: '%s'" % job_description, p.files.log_file) open(p.files.job_file, 'w').write(job_description + '\n') ######################################################################## #in addition to copying the file, the module can perform any pre- #processing at this stage e.g. stripping barcodes/primers, chimera #checking, etc. ######################################################################## try: if hasattr(analysis_module, "_preprocess"): data_file = analysis_module._preprocess(p,self.request_dict) else: data_file = open(data_file_temp_path) data_file_dest = open(os.path.join(p.dirs.analysis_dir, c.data_file_name),'w') debug("Copying data file", p.files.log_file) for line in data_file: data_file_dest.write(line) data_file.close() data_file_dest.close() ################################################################ except: self.request_dict["analysis_id"] = analysis_id self._remove_analysis() raise # call server modules.. ################################################################ # analysis specific stuff. server_modules_dict[analysis_type]._exec(p, self.request_dict) # common analysis routines.. server_modules_dict['commons']._exec(p, self.request_dict) ################################################################ # update server state so the info page is browsable. debug("Server state is being updated, running analyses.REMOVE(this), done analyses.APPEND(this)", p.files.log_file) self.serverstate.running_analyses.remove(analysis_id) self.serverstate.done_analyses.append(analysis_id) debug("Info page is ready for this study.", p.files.log_file) if late_response_request is True: debug("Response is being sent", p.files.log_file) self.write_socket({'response': 'OK', 'process_id': analysis_id})
def rdp_otu_confidence_analysis(p): debug("Generating RDP confidence per otu figures", p.files.log_file) samples_dict = DeserializeFromFile(p.files.samples_serialized_file_path) samples = framework.tools.helper_functions.sorted_copy(samples_dict.keys()) seperator = open(p.files.seperator_file_path).read() framework.tools.rdp.otu_confidence_analysis(p.files.rdp_output_file_path, p.dirs.type_specific_data_dir, seperator, samples)
def rdp_samples_confidence_image(p): debug("Refreshing RDP Confidence per sample figure", p.files.log_file) samples_dict = DeserializeFromFile(p.files.samples_serialized_file_path) seperator = open(p.files.seperator_file_path).read() samples = framework.tools.helper_functions.sorted_copy(samples_dict.keys()) framework.tools.rdp.sample_confidence_analysis(p.files.rdp_output_file_path, p.dirs.analysis_dir, seperator, samples)
def random_taxon_colors_dict(p): samples_dict = DeserializeFromFile(p.files.samples_serialized_file_path) debug("Generating random taxon color dicts", p.files.log_file) taxa_color_dict = framework.tools.helper_functions.get_random_taxa_color_dict(p, samples_dict, cm) SerializeToFile(taxa_color_dict, p.files.taxa_color_dict_file_path)
def otu_library(p): debug("Regeneration OTU Library", p.files.log_file) otu_library = framework.tools.env.get_otu_library(p.files.data_file_path) SerializeToFile(otu_library, p.files.otu_library_file_path)
def simpsons_diversity_bar_image(p): samples_dict = DeserializeFromFile(p.files.samples_serialized_file_path) debug("Generating Simpson's diversity index image", p.files.log_file) framework.tools.diversityindex.generate( samples_dict, p.images.simpsons_diversity_index_img_path, p.files.simpsons_diversity_index_data_path, p.type )
def _append(p, request_dict): # TODO: there should be one function in RDP that takes care of all these in one step. # actually that one step solution should exist in every module that creates samples_dict eventually. # so the mess here for new analyses and additional samples could be carried into their own modules. # without putting everything together nicely in modules with standard hooks in them, there is no way to # fix this mess. p.threshold = request_dict.get('threshold') if p.threshold: debug("storing confidence threshold", p.files.log_file) with open(p.files.threshold_path,'w') as f: f.write(str(p.threshold)) debug("Extracting unique sample names from additional FASTA file", p.files.log_file) additional_data_file_path = request_dict['additional_data_file_path'] seperator = open(p.files.seperator_file_path).read() additional_samples = framework.tools.helper_functions.sorted_copy(framework.tools.rdp.extract_sample_names(additional_data_file_path, seperator)) original_samples = framework.tools.helper_functions.sorted_copy([sample.strip() for sample in open(p.files.all_unique_samples_file_path).readlines()]) number_of_sequences = sum(1 for l in open(additional_data_file_path) if l.startswith('>')) additional_rdp_output_path = os.path.join(p.dirs.analysis_dir, "additional_rdp_output") debug("Running rdp on %d additional sequences" % number_of_sequences, p.files.log_file) framework.tools.rdp.run_classifier(c.rdp_running_path, additional_data_file_path, additional_rdp_output_path, p.files.rdp_error_log_file_path) if p.threshold: debug("Separating low confidence sequences", p.files.log_file) lo_seqs = framework.tools.rdp.low_confidence_seqs(open(additional_data_file_path), open(additional_rdp_output_path), p.threshold, seperator) o = open(p.files.low_confidence_seqs_path,'a') for s in lo_seqs: o.write(s) o.close() #import pdb; pdb.set_trace() debug("Merging additional data with the original RDP results", p.files.log_file) framework.tools.rdp.merge(p.files.samples_serialized_file_path, additional_samples, original_samples, additional_rdp_output_path, p.files.rdp_output_file_path, seperator) debug("Reading updated samples dict", p.files.log_file) samples_dict = DeserializeFromFile(p.files.samples_serialized_file_path) debug("Unique samples in samples dict being stored in samples file", p.files.log_file) samples = framework.tools.helper_functions.sorted_copy(samples_dict.keys()) open(p.files.all_unique_samples_file_path, 'w').write('\n'.join(samples) + '\n') rdp_general_confidence_image(p) rdp_otu_confidence_analysis(p) rdp_samples_confidence_image(p) otu_library(p) os.remove(additional_rdp_output_path)
def real_dot_plots(p): samples_dict = DeserializeFromFile(p.files.sample_map_filtered_samples_dict_file_path) otu_t_p_tuples_dict_real = DeserializeFromFile(p.files.sample_map_otu_t_p_tuples_dict_real_file_path) debug("Generating dot plots w/ real values for sample map...", p.files.log_file) for rank in c.ranks[p.type]: framework.tools.taxons.generate(samples_dict, otu_t_p_tuples_dict_real, p.files.sample_map_file_path, rank, p.dirs.sample_map_taxon_charts_dir, real_abundance = True)
def rdp_general_confidence_image(p): debug("Generating RDP Confidence figure", p.files.log_file) framework.tools.rdp.general_confidence_analysis(p.files.rdp_output_file_path, p.dirs.analysis_dir)
def otu_library(p): debug("Generating OTU Library", p.files.log_file) otu_library = framework.tools.rdp.get_otu_library(p.files.rdp_output_file_path) SerializeToFile(otu_library, p.files.otu_library_file_path)
def real_t_test_values_and_probabilities_dict(p): samples_dict = DeserializeFromFile(p.files.sample_map_filtered_samples_dict_file_path) otu_library = DeserializeFromFile(p.files.otu_library_file_path) debug("Computing t-test and p values w/ real values", p.files.log_file) otu_t_p_tuples_dict_real = framework.tools.taxons.get_t_p_values_dict_for_subset(samples_dict, otu_library, p.files.sample_map_file_path, ranks = GetCopy(c.ranks[p.type]), real_abundance = True) SerializeToFile(otu_t_p_tuples_dict_real, p.files.sample_map_otu_t_p_tuples_dict_real_file_path)
def samples_dictionary(p): debug("Computing sample dictionary", p.files.log_file) samples_dict = framework.tools.env.create_samples_dictionary(p.files.data_file_path) debug("Serializing sample dictionary object", p.files.log_file) helper_functions.SerializeToFile(samples_dict, p.files.samples_serialized_file_path)