예제 #1
0
def sequences_bar_image(p):
    samples_dict = DeserializeFromFile(p.files.samples_serialized_file_path)
    debug("Generating number of sequences bar image", p.files.log_file)
    samples = framework.tools.helper_functions.sorted_copy(samples_dict.keys())
    framework.tools.bar.generate(
        [(sample, samples_dict[sample]["tr"]) for sample in samples], p.images.samples_sequences_bar_path
    )
예제 #2
0
파일: rdp.py 프로젝트: ShannonCeb/viamics
def _append(p, request_dict):
    # TODO: there should be one function in RDP that takes care of all these in one step.
    # actually that one step solution should exist in every module that creates samples_dict eventually.
    # so the mess here for new analyses and additional samples could be carried into their own modules.
    # without putting everything together nicely in modules with standard hooks in them, there is no way to
    # fix this mess.

    p.threshold = request_dict.get('threshold')

    if p.threshold:
        debug("storing confidence threshold", p.files.log_file)
        with open(p.files.threshold_path,'w') as f:
            f.write(str(p.threshold))

    debug("Extracting unique sample names from additional FASTA file", p.files.log_file)
    additional_data_file_path = request_dict['additional_data_file_path']
    seperator = open(p.files.seperator_file_path).read()

    additional_samples = framework.tools.helper_functions.sorted_copy(framework.tools.rdp.extract_sample_names(additional_data_file_path, seperator))
    original_samples = framework.tools.helper_functions.sorted_copy([sample.strip() for sample in open(p.files.all_unique_samples_file_path).readlines()])

    number_of_sequences = sum(1 for l in open(additional_data_file_path) if l.startswith('>'))

    additional_rdp_output_path = os.path.join(p.dirs.analysis_dir, "additional_rdp_output")
    debug("Running rdp on %d additional sequences" % number_of_sequences, p.files.log_file)
    framework.tools.rdp.run_classifier(c.rdp_running_path, additional_data_file_path, additional_rdp_output_path, p.files.rdp_error_log_file_path)

    if p.threshold:
        debug("Separating low confidence sequences", p.files.log_file)
        lo_seqs = framework.tools.rdp.low_confidence_seqs(open(additional_data_file_path),
                                                          open(additional_rdp_output_path),
                                                          p.threshold,
                                                          seperator)
        o = open(p.files.low_confidence_seqs_path,'a')
        for s in lo_seqs:
            o.write(s)
        o.close()

    #import pdb; pdb.set_trace()
    debug("Merging additional data with the original RDP results", p.files.log_file)
    framework.tools.rdp.merge(p.files.samples_serialized_file_path, additional_samples, original_samples, additional_rdp_output_path, p.files.rdp_output_file_path, seperator)

    debug("Reading updated samples dict", p.files.log_file)
    samples_dict = DeserializeFromFile(p.files.samples_serialized_file_path)

    debug("Unique samples in samples dict being stored in samples file", p.files.log_file)
    samples = framework.tools.helper_functions.sorted_copy(samples_dict.keys())
    open(p.files.all_unique_samples_file_path, 'w').write('\n'.join(samples) + '\n')

    rdp_general_confidence_image(p)
    rdp_otu_confidence_analysis(p)
    rdp_samples_confidence_image(p)
    otu_library(p)
    os.remove(additional_rdp_output_path)
예제 #3
0
파일: rdp.py 프로젝트: ShannonCeb/viamics
def rdp_samples_confidence_image(p):
    debug("Refreshing RDP Confidence per sample figure", p.files.log_file)
    samples_dict = DeserializeFromFile(p.files.samples_serialized_file_path)
    seperator = open(p.files.seperator_file_path).read()
    samples = framework.tools.helper_functions.sorted_copy(samples_dict.keys())
    framework.tools.rdp.sample_confidence_analysis(p.files.rdp_output_file_path, p.dirs.analysis_dir, seperator, samples)
예제 #4
0
파일: rdp.py 프로젝트: ShannonCeb/viamics
def rdp_otu_confidence_analysis(p):
    debug("Generating RDP confidence per otu figures", p.files.log_file)
    samples_dict = DeserializeFromFile(p.files.samples_serialized_file_path)
    samples = framework.tools.helper_functions.sorted_copy(samples_dict.keys())
    seperator = open(p.files.seperator_file_path).read()
    framework.tools.rdp.otu_confidence_analysis(p.files.rdp_output_file_path, p.dirs.type_specific_data_dir, seperator, samples)