def sequences_bar_image(p): samples_dict = DeserializeFromFile(p.files.samples_serialized_file_path) debug("Generating number of sequences bar image", p.files.log_file) samples = framework.tools.helper_functions.sorted_copy(samples_dict.keys()) framework.tools.bar.generate( [(sample, samples_dict[sample]["tr"]) for sample in samples], p.images.samples_sequences_bar_path )
def _append(p, request_dict): # TODO: there should be one function in RDP that takes care of all these in one step. # actually that one step solution should exist in every module that creates samples_dict eventually. # so the mess here for new analyses and additional samples could be carried into their own modules. # without putting everything together nicely in modules with standard hooks in them, there is no way to # fix this mess. p.threshold = request_dict.get('threshold') if p.threshold: debug("storing confidence threshold", p.files.log_file) with open(p.files.threshold_path,'w') as f: f.write(str(p.threshold)) debug("Extracting unique sample names from additional FASTA file", p.files.log_file) additional_data_file_path = request_dict['additional_data_file_path'] seperator = open(p.files.seperator_file_path).read() additional_samples = framework.tools.helper_functions.sorted_copy(framework.tools.rdp.extract_sample_names(additional_data_file_path, seperator)) original_samples = framework.tools.helper_functions.sorted_copy([sample.strip() for sample in open(p.files.all_unique_samples_file_path).readlines()]) number_of_sequences = sum(1 for l in open(additional_data_file_path) if l.startswith('>')) additional_rdp_output_path = os.path.join(p.dirs.analysis_dir, "additional_rdp_output") debug("Running rdp on %d additional sequences" % number_of_sequences, p.files.log_file) framework.tools.rdp.run_classifier(c.rdp_running_path, additional_data_file_path, additional_rdp_output_path, p.files.rdp_error_log_file_path) if p.threshold: debug("Separating low confidence sequences", p.files.log_file) lo_seqs = framework.tools.rdp.low_confidence_seqs(open(additional_data_file_path), open(additional_rdp_output_path), p.threshold, seperator) o = open(p.files.low_confidence_seqs_path,'a') for s in lo_seqs: o.write(s) o.close() #import pdb; pdb.set_trace() debug("Merging additional data with the original RDP results", p.files.log_file) framework.tools.rdp.merge(p.files.samples_serialized_file_path, additional_samples, original_samples, additional_rdp_output_path, p.files.rdp_output_file_path, seperator) debug("Reading updated samples dict", p.files.log_file) samples_dict = DeserializeFromFile(p.files.samples_serialized_file_path) debug("Unique samples in samples dict being stored in samples file", p.files.log_file) samples = framework.tools.helper_functions.sorted_copy(samples_dict.keys()) open(p.files.all_unique_samples_file_path, 'w').write('\n'.join(samples) + '\n') rdp_general_confidence_image(p) rdp_otu_confidence_analysis(p) rdp_samples_confidence_image(p) otu_library(p) os.remove(additional_rdp_output_path)
def rdp_samples_confidence_image(p): debug("Refreshing RDP Confidence per sample figure", p.files.log_file) samples_dict = DeserializeFromFile(p.files.samples_serialized_file_path) seperator = open(p.files.seperator_file_path).read() samples = framework.tools.helper_functions.sorted_copy(samples_dict.keys()) framework.tools.rdp.sample_confidence_analysis(p.files.rdp_output_file_path, p.dirs.analysis_dir, seperator, samples)
def rdp_otu_confidence_analysis(p): debug("Generating RDP confidence per otu figures", p.files.log_file) samples_dict = DeserializeFromFile(p.files.samples_serialized_file_path) samples = framework.tools.helper_functions.sorted_copy(samples_dict.keys()) seperator = open(p.files.seperator_file_path).read() framework.tools.rdp.otu_confidence_analysis(p.files.rdp_output_file_path, p.dirs.type_specific_data_dir, seperator, samples)