def main(environment_file,
         sample_mapping_file=None,
         unit_mapping_file=None,
         min_abundance=0,
         min_sum_normalized_percent=1):
    samples_dict = utils.get_samples_dict_from_environment_file(
        environment_file)
    oligos = utils.get_oligos_sorted_by_abundance(samples_dict,
                                                  min_abundance=min_abundance)
    unit_counts, unit_percents = utils.get_unit_counts_and_percents(
        oligos, samples_dict)

    if sample_mapping_file:
        sample_mapping = utils.get_sample_mapping_dict(sample_mapping_file)

    if unit_mapping_file:
        unit_mapping = utils.get_sample_mapping_dict(unit_mapping_file)

    output_file = '.'.join(environment_file.split('.')[:-1]) + '.gexf'
    utils.generate_gexf_network_file(
        oligos,
        samples_dict,
        unit_percents,
        output_file,
        sample_mapping_dict=sample_mapping if sample_mapping_file else None,
        unit_mapping_dict=unit_mapping if unit_mapping_file else None)
def main(
    environment_file, sample_mapping_file=None, unit_mapping_file=None, min_abundance=0, min_sum_normalized_percent=1
):
    samples_dict = utils.get_samples_dict_from_environment_file(environment_file)
    oligos = utils.get_oligos_sorted_by_abundance(samples_dict, min_abundance=min_abundance)
    unit_counts, unit_percents = utils.get_unit_counts_and_percents(oligos, samples_dict)

    if sample_mapping_file:
        sample_mapping = utils.get_sample_mapping_dict(sample_mapping_file)

    if unit_mapping_file:
        unit_mapping = utils.get_sample_mapping_dict(unit_mapping_file)

    output_file = ".".join(environment_file.split(".")[:-1]) + ".gexf"
    utils.generate_gexf_network_file(
        oligos,
        samples_dict,
        unit_percents,
        output_file,
        sample_mapping_dict=sample_mapping if sample_mapping_file else None,
        unit_mapping_dict=unit_mapping if unit_mapping_file else None,
    )
def generate_html_output(run_info_dict, html_output_directory=None):
    if not html_output_directory:
        html_output_directory = os.path.join(run_info_dict['output_directory'],
                                             'HTML-OUTPUT')

    if not os.path.exists(html_output_directory):
        os.makedirs(html_output_directory)

    html_dict = copy.deepcopy(run_info_dict)

    shutil.copy2(os.path.join(absolute, 'static/style.css'),
                 os.path.join(html_output_directory, 'style.css'))
    shutil.copy2(os.path.join(absolute, 'static/header_2.png'),
                 os.path.join(html_output_directory, 'header.png'))
    shutil.copy2(os.path.join(absolute, 'static/missing_image.png'),
                 os.path.join(html_output_directory, 'missing.png'))
    shutil.copy2(os.path.join(absolute, 'static/colorbar.png'),
                 os.path.join(html_output_directory, 'colorbar.png'))

    def copy_as(source, dest_name):
        dest = os.path.join(html_output_directory, dest_name)
        try:
            shutil.copy2(source, dest)
        except:
            if source.endswith('png'):
                shutil.copy2(
                    os.path.join(absolute, 'static/missing_image.png'), dest)

        return os.path.basename(dest)

    html_dict['matrix_count_file_path'] = copy_as(
        run_info_dict['matrix_count_file_path'], 'matrix_counts.txt')
    html_dict['matrix_percent_file_path'] = copy_as(
        run_info_dict['matrix_percent_file_path'], 'matrix_percents.txt')
    html_dict['environment_file_path'] = copy_as(
        run_info_dict['environment_file_path'], 'environment.txt')
    html_dict['read_distribution_table_path'] = copy_as(
        run_info_dict['read_distribution_table_path'], 'read_distribution.txt')

    def get_figures_dict(html_dict_prefix):
        html_dict_key = '%s_file_path' % html_dict_prefix
        if html_dict.has_key(html_dict_key):
            figures_dict = cPickle.load(open(html_dict[html_dict_key]))
            for _map in figures_dict:
                for _func in figures_dict[_map]:
                    for _op in figures_dict[_map][_func]:
                        if os.path.exists(figures_dict[_map][_func][_op] +
                                          '.pdf') or os.path.exists(
                                              figures_dict[_map][_func][_op] +
                                              '.png'):
                            prefix = copy_as(
                                figures_dict[_map][_func][_op] + '.png',
                                '%s.png' % '-'.join([_map, _func, _op]))
                            prefix = copy_as(
                                figures_dict[_map][_func][_op] + '.pdf',
                                '%s.pdf' % '-'.join([_map, _func, _op]))
                            figures_dict[_map][_func][_op] = '.'.join(
                                prefix.split('.')[:-1])
                        else:
                            figures_dict[_map][_func][_op] = None
            return figures_dict
        else:
            return None

    html_dict['figures_dict'] = get_figures_dict('figures_dict')
    html_dict['exclusive_figures_dict'] = get_figures_dict(
        'exclusive_figures_dict')

    if html_dict.has_key('node_representatives_file_path'):
        html_dict['node_representatives_file_path'] = copy_as(
            run_info_dict['node_representatives_file_path'],
            'node-representatives.fa.txt')
    else:
        html_dict['node_representatives_file_path'] = None

    if run_info_dict.has_key('blast_ref_db') and os.path.exists(
            run_info_dict['blast_ref_db']):
        html_dict['blast_ref_db_path'] = copy_as(run_info_dict['blast_ref_db'],
                                                 'reference_db.fa')

    if run_info_dict['sample_mapping']:
        html_dict['sample_mapping'] = copy_as(run_info_dict['sample_mapping'],
                                              'sample_mapping.txt')
    else:
        html_dict['sample_mapping'] = None

    if run_info_dict['gexf_network_file_path']:
        html_dict['gexf_network_file_path'] = copy_as(
            run_info_dict['gexf_network_file_path'], 'network.gexf')

    if run_info_dict['topology_gexf']:
        html_dict['topology_gexf'] = copy_as(run_info_dict['topology_gexf'],
                                             'topology.gexf')

    html_dict['samples_dict'] = get_samples_dict_from_environment_file(
        run_info_dict['environment_file_path'])
    html_dict['samples'] = sorted(html_dict['samples_dict'].keys())
    html_dict['blast_results_found'] = False

    # include pretty names
    html_dict['pretty_names'] = pretty_names

    # get javascript code for sample pie-charts
    html_dict['pie_charts_js'] = render_to_string('pie_charts_js.tmpl',
                                                  html_dict)

    # generate index
    index_page = os.path.join(html_output_directory, 'index.html')
    rendered = render_to_string('index_for_decomposition.tmpl', html_dict)

    open(index_page, 'w').write(rendered.encode("utf-8"))

    return index_page
# any later version.
#
# Please read the COPYING file.

import os
import sys
import cPickle

from Oligotyping.lib.decomposer import Decomposer
from Oligotyping.utils.utils import get_samples_dict_from_environment_file

runinfo = cPickle.load(open(sys.argv[1]))
sample_mapping = sys.argv[2]

decomposer = Decomposer()
decomposer.matrix_percent_file_path = runinfo['matrix_percent_file_path']
decomposer.matrix_count_file_path = runinfo['matrix_count_file_path']
decomposer.tmp_directory = runinfo['tmp_directory']
decomposer.output_directory = runinfo['output_directory']
decomposer.figures_directory = os.path.join(os.path.dirname(runinfo['figures_directory']), 'FIGURES')

if not os.path.exists(decomposer.tmp_directory):
    os.makedirs(decomposer.tmp_directory)

decomposer.sample_mapping = sample_mapping
decomposer._init_logger('exclusive_figures.log')
decomposer.samples_dict = get_samples_dict_from_environment_file(runinfo['environment_file_path'])
decomposer.samples = sorted(decomposer.samples_dict.keys())

decomposer._generate_exclusive_figures()
def generate_html_output(run_info_dict, html_output_directory=None):
    if not html_output_directory:
        html_output_directory = os.path.join(run_info_dict["output_directory"], "HTML-OUTPUT")

    if not os.path.exists(html_output_directory):
        os.makedirs(html_output_directory)

    html_dict = copy.deepcopy(run_info_dict)

    shutil.copy2(os.path.join(absolute, "static/style.css"), os.path.join(html_output_directory, "style.css"))
    shutil.copy2(os.path.join(absolute, "static/header_2.png"), os.path.join(html_output_directory, "header.png"))
    shutil.copy2(os.path.join(absolute, "static/missing_image.png"), os.path.join(html_output_directory, "missing.png"))
    shutil.copy2(os.path.join(absolute, "static/colorbar.png"), os.path.join(html_output_directory, "colorbar.png"))

    def copy_as(source, dest_name):
        dest = os.path.join(html_output_directory, dest_name)
        try:
            shutil.copy2(source, dest)
        except:
            if source.endswith("png"):
                shutil.copy2(os.path.join(absolute, "static/missing_image.png"), dest)

        return os.path.basename(dest)

    html_dict["matrix_count_file_path"] = copy_as(run_info_dict["matrix_count_file_path"], "matrix_counts.txt")
    html_dict["matrix_percent_file_path"] = copy_as(run_info_dict["matrix_percent_file_path"], "matrix_percents.txt")
    html_dict["environment_file_path"] = copy_as(run_info_dict["environment_file_path"], "environment.txt")
    html_dict["read_distribution_table_path"] = copy_as(
        run_info_dict["read_distribution_table_path"], "read_distribution.txt"
    )

    def get_figures_dict(html_dict_prefix):
        html_dict_key = "%s_file_path" % html_dict_prefix
        if html_dict.has_key(html_dict_key):
            figures_dict = cPickle.load(open(html_dict[html_dict_key]))
            for _map in figures_dict:
                for _func in figures_dict[_map]:
                    for _op in figures_dict[_map][_func]:
                        if os.path.exists(figures_dict[_map][_func][_op] + ".pdf") or os.path.exists(
                            figures_dict[_map][_func][_op] + ".png"
                        ):
                            prefix = copy_as(
                                figures_dict[_map][_func][_op] + ".png", "%s.png" % "-".join([_map, _func, _op])
                            )
                            prefix = copy_as(
                                figures_dict[_map][_func][_op] + ".pdf", "%s.pdf" % "-".join([_map, _func, _op])
                            )
                            figures_dict[_map][_func][_op] = ".".join(prefix.split(".")[:-1])
                        else:
                            figures_dict[_map][_func][_op] = None
            return figures_dict
        else:
            return None

    html_dict["figures_dict"] = get_figures_dict("figures_dict")
    html_dict["exclusive_figures_dict"] = get_figures_dict("exclusive_figures_dict")

    if html_dict.has_key("node_representatives_file_path"):
        html_dict["node_representatives_file_path"] = copy_as(
            run_info_dict["node_representatives_file_path"], "node-representatives.fa.txt"
        )
    else:
        html_dict["node_representatives_file_path"] = None

    if run_info_dict.has_key("blast_ref_db") and os.path.exists(run_info_dict["blast_ref_db"]):
        html_dict["blast_ref_db_path"] = copy_as(run_info_dict["blast_ref_db"], "reference_db.fa")

    if run_info_dict["sample_mapping"]:
        html_dict["sample_mapping"] = copy_as(run_info_dict["sample_mapping"], "sample_mapping.txt")
    else:
        html_dict["sample_mapping"] = None

    if run_info_dict["gexf_network_file_path"]:
        html_dict["gexf_network_file_path"] = copy_as(run_info_dict["gexf_network_file_path"], "network.gexf")

    if run_info_dict["topology_gexf"]:
        html_dict["topology_gexf"] = copy_as(run_info_dict["topology_gexf"], "topology.gexf")

    html_dict["samples_dict"] = get_samples_dict_from_environment_file(run_info_dict["environment_file_path"])
    html_dict["samples"] = sorted(html_dict["samples_dict"].keys())
    html_dict["blast_results_found"] = False

    # include pretty names
    html_dict["pretty_names"] = pretty_names

    # get javascript code for sample pie-charts
    html_dict["pie_charts_js"] = render_to_string("pie_charts_js.tmpl", html_dict)

    # generate index
    index_page = os.path.join(html_output_directory, "index.html")
    rendered = render_to_string("index_for_decomposition.tmpl", html_dict)

    open(index_page, "w").write(rendered.encode("utf-8"))

    return index_page
Example #6
0
def generate_html_output(run_info_dict, html_output_directory = None, entropy_figure = None):
    if not html_output_directory:    
        html_output_directory = os.path.join(run_info_dict['output_directory'], 'HTML-OUTPUT')
        
    if not os.path.exists(html_output_directory):
        os.makedirs(html_output_directory)
    
    html_dict = copy.deepcopy(run_info_dict)

    shutil.copy2(os.path.join(absolute, 'static/style.css'), os.path.join(html_output_directory, 'style.css'))
    shutil.copy2(os.path.join(absolute, 'static/header_1.png'), os.path.join(html_output_directory, 'header.png'))
    shutil.copy2(os.path.join(absolute, 'static/missing_image.png'), os.path.join(html_output_directory, 'missing.png'))
    shutil.copy2(os.path.join(absolute, 'scripts/jquery-1.7.1.js'), os.path.join(html_output_directory, 'jquery-1.7.1.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/popup.js'), os.path.join(html_output_directory, 'popup.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/g.pie.js'), os.path.join(html_output_directory, 'g.pie.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/g.raphael.js'), os.path.join(html_output_directory, 'g.raphael.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/raphael.js'), os.path.join(html_output_directory, 'raphael.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/morris.js'), os.path.join(html_output_directory, 'morris.js'))

    def copy_as(source, dest_name, essential = True):
        dest = os.path.join(html_output_directory, dest_name)

        if essential:
            shutil.copy2(source, dest)
        else:
            # it is ok if you fail to copy files that are not
            # essential.. 
            try:
                shutil.copy2(source, dest)
            except:
                sys.stderr.write('\n\n[HTML] Warning: Source file not found\n\tSource: "%s"\n\tDest: "%s\n\n"' % (source, dest))

        return os.path.basename(dest)

    # embarrassingly ad-hoc:
    if entropy_figure:
        if entropy_figure.endswith('.pdf') or entropy_figure.endswith('.png'):
            entropy_figure = entropy_figure[:-4]
            
    CP = lambda e, o:  copy_as(os.path.join(e + ('.%s' % ext)), o, essential = True if ext == 'png' else False)
    for ext in ['png', 'pdf']:
        output_file = 'entropy.%s' % ext
        if entropy_figure:
            html_dict['entropy_figure_%s' % ext] = CP(entropy_figure, output_file)
        else:
            try:
                html_dict['entropy_figure_%s' % ext] = CP(run_info_dict['entropy'], output_file)
            except:
                html_dict['entropy_figure_%s' % ext] = CP(run_info_dict['entropy'][:-4], output_file)

 
    if run_info_dict['gexf_network_file_path']:
        html_dict['gexf_network_file_path'] = copy_as(run_info_dict['gexf_network_file_path'], 'network.gexf')

    if run_info_dict['sample_mapping']:
        html_dict['sample_mapping'] = copy_as(run_info_dict['sample_mapping'], 'sample_mapping.txt')
    else:
        html_dict['sample_mapping'] = None

    html_dict['matrix_count_file_path'] = copy_as(run_info_dict['matrix_count_file_path'], 'matrix_counts.txt')
    html_dict['matrix_percent_file_path'] = copy_as(run_info_dict['matrix_percent_file_path'], 'matrix_percents.txt')
    html_dict['read_distribution_table_path'] = copy_as(run_info_dict['read_distribution_table_path'], 'read_distribution.txt')
    html_dict['environment_file_path'] = copy_as(run_info_dict['environment_file_path'], 'environment.txt')
    html_dict['oligos_fasta_file_path'] = copy_as(run_info_dict['oligos_fasta_file_path'], 'oligos.fa.txt')
    html_dict['oligos_nexus_file_path'] = copy_as(run_info_dict['oligos_nexus_file_path'], 'oligos.nex.txt')


    def get_figures_dict(html_dict_prefix):
        html_dict_key = '%s_file_path' % html_dict_prefix
        if html_dict.has_key(html_dict_key):
            figures_dict = cPickle.load(open(html_dict[html_dict_key]))
            for _map in figures_dict:
                for _func in figures_dict[_map]:
                    for _op in figures_dict[_map][_func]:
                        if os.path.exists(figures_dict[_map][_func][_op] + '.pdf') and os.path.exists(figures_dict[_map][_func][_op] + '.png'):
                            prefix = copy_as(figures_dict[_map][_func][_op] + '.pdf', '%s.pdf' % '-'.join([_map, _func, _op]))
                            prefix = copy_as(figures_dict[_map][_func][_op] + '.png', '%s.png' % '-'.join([_map, _func, _op]))
                            figures_dict[_map][_func][_op] = '.'.join(prefix.split('.')[:-1])
                        else:
                            figures_dict[_map][_func][_op] = None
            return figures_dict
        else:
            return None
        
    
    html_dict['figures_dict'] = get_figures_dict('figures_dict')
    html_dict['exclusive_figures_dict'] = get_figures_dict('exclusive_figures_dict')


    if html_dict['generate_sets']:
        html_dict['across_samples_MN_file_path'] = copy_as(run_info_dict['across_samples_MN_file_path'], 'across_samples_max_normalized.txt')
        html_dict['across_samples_SN_file_path'] = copy_as(run_info_dict['across_samples_SN_file_path'], 'across_samples_sum_normalized.txt')
        html_dict['oligo_sets_stackbar_figure'] = copy_as(run_info_dict['stack_bar_with_agglomerated_oligos_file_path'], 'stackbar_with_oligo_sets.png')
        html_dict['oligos_across_samples_figure'] = copy_as(run_info_dict['oligos_across_samples_file_path'], 'oligos_across_samples.png')
        html_dict['oligotype_sets_figure'] = copy_as(run_info_dict['oligotype_sets_across_samples_figure_path'], 'oligotype_sets.png')
        html_dict['matrix_count_oligo_sets_file_path'] = copy_as(run_info_dict['matrix_count_oligo_sets_file_path'], 'matrix_counts_oligo_sets.txt')
        html_dict['matrix_percent_oligo_sets_file_path'] = copy_as(run_info_dict['matrix_percent_oligo_sets_file_path'], 'matrix_percents_oligo_sets.txt')
        html_dict['oligotype_sets_file'] = copy_as(run_info_dict['oligotype_sets_file_path'], 'oligotype_sets.txt')
        html_dict['oligotype_sets'] = [l.strip().split('\t')[1].split(',') for l in open(run_info_dict['oligotype_sets_file_path'])]
 
    if html_dict.has_key('representative_seqs_fasta_file_path'):
        html_dict['representative_seqs_fasta_file_path'] = copy_as(run_info_dict['representative_seqs_fasta_file_path'], 'oligo-representatives.fa.txt')
    else:
        html_dict['representative_seqs_fasta_file_path'] = None
    if run_info_dict.has_key('blast_ref_db') and os.path.exists(run_info_dict['blast_ref_db']):
        html_dict['blast_ref_db_path'] = copy_as(run_info_dict['blast_ref_db'], 'reference_db.fa')
    html_dict['entropy_components'] = [int(x) for x in html_dict['bases_of_interest_locs'].split(',')]
    html_dict['samples_dict'] = get_samples_dict_from_environment_file(run_info_dict['environment_file_path'])
    html_dict['samples'] = sorted(html_dict['samples_dict'].keys())
    html_dict['blast_results_found'] = False

    # get alignment length
    html_dict['alignment_length'] = get_alignment_length(run_info_dict['alignment'])
    # include pretty names
    html_dict['pretty_names'] = pretty_names
    # get colors dict
    html_dict['color_dict'] = get_colors_dict(run_info_dict['colors_file_path'])
    # get abundant oligos list
    html_dict['oligos'] = get_oligos_list(run_info_dict['oligos_fasta_file_path'])
    # get oligo frequencies
    html_dict['frequency'] = {}
    for oligo in html_dict['oligos']:
        html_dict['frequency'][oligo] = pretty_print(sum([d[oligo] for d in html_dict['samples_dict'].values() if d.has_key(oligo)]))
    # get unique sequence dict (which will contain the most frequent unique sequence for given oligotype)
    if html_dict.has_key('output_directory_for_reps'):
        html_dict['rep_oligo_seqs_clean_dict'], html_dict['rep_oligo_seqs_fancy_dict'] = get_unique_sequences_dict(html_dict)
        html_dict['oligo_reps_dict'] = get_oligo_reps_dict(html_dict, html_output_directory)
        html_dict['component_reference'] = ''.join(['<a onmouseover="popup(\'\#%d\', 50)" href="">|</a>' % i for i in range(0, html_dict['alignment_length'])])

    # get javascript code for sample pie-charts
    html_dict['pie_charts_js'] = render_to_string('pie_charts_js.tmpl', html_dict)

    # FIXME: code below is very inefficient and causes a huge
    # memory issue. fix it by not using deepcopy.
    # generate individual oligotype pages
    if html_dict.has_key('output_directory_for_reps'):
        for i in range(0, len(html_dict['oligos'])):
            oligo = html_dict['oligos'][i]
            tmp_dict = copy.deepcopy(html_dict)
            tmp_dict['oligo'] = oligo
            tmp_dict['distribution'] = get_oligo_distribution_dict(oligo, html_dict)
            oligo_page = os.path.join(html_output_directory, 'oligo_%s.html' % oligo)
            
            tmp_dict['index'] = i + 1
            tmp_dict['total'] = len(html_dict['oligos'])
            tmp_dict['prev'] = None
            tmp_dict['next'] = None
            if i > 0:
                tmp_dict['prev'] = 'oligo_%s.html' % html_dict['oligos'][i - 1]
            if i < (len(html_dict['oligos']) - 1):
                tmp_dict['next'] = 'oligo_%s.html' % html_dict['oligos'][i + 1]
            
            rendered = render_to_string('single_oligo.tmpl', tmp_dict)
    
            open(oligo_page, 'w').write(rendered.encode("utf-8"))


    # generate index
    index_page = os.path.join(html_output_directory, 'index.html')
    rendered = render_to_string('index_for_oligo.tmpl', html_dict)

    open(index_page, 'w').write(rendered.encode("utf-8"))

    return index_page
Example #7
0
def generate_html_output(run_info_dict,
                         html_output_directory=None,
                         entropy_figure=None):
    if not html_output_directory:
        html_output_directory = os.path.join(run_info_dict['output_directory'],
                                             'HTML-OUTPUT')

    if not os.path.exists(html_output_directory):
        os.makedirs(html_output_directory)

    html_dict = copy.deepcopy(run_info_dict)

    shutil.copy2(os.path.join(absolute, 'static/style.css'),
                 os.path.join(html_output_directory, 'style.css'))
    shutil.copy2(os.path.join(absolute, 'static/header_1.png'),
                 os.path.join(html_output_directory, 'header.png'))
    shutil.copy2(os.path.join(absolute, 'static/missing_image.png'),
                 os.path.join(html_output_directory, 'missing.png'))
    shutil.copy2(os.path.join(absolute, 'static/colorbar.png'),
                 os.path.join(html_output_directory, 'colorbar.png'))
    shutil.copy2(os.path.join(absolute, 'scripts/jquery-1.7.1.js'),
                 os.path.join(html_output_directory, 'jquery-1.7.1.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/popup.js'),
                 os.path.join(html_output_directory, 'popup.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/g.pie.js'),
                 os.path.join(html_output_directory, 'g.pie.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/g.raphael.js'),
                 os.path.join(html_output_directory, 'g.raphael.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/raphael.js'),
                 os.path.join(html_output_directory, 'raphael.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/morris.js'),
                 os.path.join(html_output_directory, 'morris.js'))

    def copy_as(source, dest_name, essential=True):
        dest = os.path.join(html_output_directory, dest_name)

        if essential:
            shutil.copy2(source, dest)
        else:
            # it is ok if you fail to copy files that are not
            # essential..
            try:
                shutil.copy2(source, dest)
            except:
                sys.stderr.write(
                    '\n\n[HTML] Warning: Source file not found\n\tSource: "%s"\n\tDest: "%s\n\n"'
                    % (source, dest))

        return os.path.basename(dest)

    # embarrassingly ad-hoc:
    if entropy_figure:
        if entropy_figure.endswith('.pdf') or entropy_figure.endswith('.png'):
            entropy_figure = entropy_figure[:-4]

    CP = lambda e, o: copy_as(os.path.join(e + ('.%s' % ext)),
                              o,
                              essential=True if ext == 'png' else False)
    for ext in ['png', 'pdf']:
        output_file = 'entropy.%s' % ext
        if entropy_figure:
            html_dict['entropy_figure_%s' % ext] = CP(entropy_figure,
                                                      output_file)
        else:
            try:
                html_dict['entropy_figure_%s' % ext] = CP(
                    run_info_dict['entropy'], output_file)
            except:
                html_dict['entropy_figure_%s' % ext] = CP(
                    run_info_dict['entropy'][:-4], output_file)

    if run_info_dict['gexf_network_file_path']:
        html_dict['gexf_network_file_path'] = copy_as(
            run_info_dict['gexf_network_file_path'], 'network.gexf')

    if run_info_dict['sample_mapping']:
        html_dict['sample_mapping'] = copy_as(run_info_dict['sample_mapping'],
                                              'sample_mapping.txt')
    else:
        html_dict['sample_mapping'] = None

    html_dict['matrix_count_file_path'] = copy_as(
        run_info_dict['matrix_count_file_path'], 'matrix_counts.txt')
    html_dict['matrix_percent_file_path'] = copy_as(
        run_info_dict['matrix_percent_file_path'], 'matrix_percents.txt')
    html_dict['read_distribution_table_path'] = copy_as(
        run_info_dict['read_distribution_table_path'], 'read_distribution.txt')
    html_dict['environment_file_path'] = copy_as(
        run_info_dict['environment_file_path'], 'environment.txt')
    html_dict['oligos_fasta_file_path'] = copy_as(
        run_info_dict['oligos_fasta_file_path'], 'oligos.fa.txt')
    html_dict['oligos_nexus_file_path'] = copy_as(
        run_info_dict['oligos_nexus_file_path'], 'oligos.nex.txt')

    def get_figures_dict(html_dict_prefix):
        html_dict_key = '%s_file_path' % html_dict_prefix
        if html_dict.has_key(html_dict_key):
            figures_dict = cPickle.load(open(html_dict[html_dict_key]))
            for _map in figures_dict:
                for _func in figures_dict[_map]:
                    for _op in figures_dict[_map][_func]:
                        if os.path.exists(figures_dict[_map][_func][_op] +
                                          '.pdf') and os.path.exists(
                                              figures_dict[_map][_func][_op] +
                                              '.png'):
                            prefix = copy_as(
                                figures_dict[_map][_func][_op] + '.pdf',
                                '%s.pdf' % '-'.join([_map, _func, _op]))
                            prefix = copy_as(
                                figures_dict[_map][_func][_op] + '.png',
                                '%s.png' % '-'.join([_map, _func, _op]))
                            figures_dict[_map][_func][_op] = '.'.join(
                                prefix.split('.')[:-1])
                        else:
                            figures_dict[_map][_func][_op] = None
            return figures_dict
        else:
            return None

    html_dict['figures_dict'] = get_figures_dict('figures_dict')
    html_dict['exclusive_figures_dict'] = get_figures_dict(
        'exclusive_figures_dict')

    if html_dict['generate_sets']:
        html_dict['across_samples_MN_file_path'] = copy_as(
            run_info_dict['across_samples_MN_file_path'],
            'across_samples_max_normalized.txt')
        html_dict['across_samples_SN_file_path'] = copy_as(
            run_info_dict['across_samples_SN_file_path'],
            'across_samples_sum_normalized.txt')
        html_dict['oligo_sets_stackbar_figure'] = copy_as(
            run_info_dict['stack_bar_with_agglomerated_oligos_file_path'],
            'stackbar_with_oligo_sets.png')
        html_dict['oligos_across_samples_figure'] = copy_as(
            run_info_dict['oligos_across_samples_file_path'],
            'oligos_across_samples.png')
        html_dict['oligotype_sets_figure'] = copy_as(
            run_info_dict['oligotype_sets_across_samples_figure_path'],
            'oligotype_sets.png')
        html_dict['matrix_count_oligo_sets_file_path'] = copy_as(
            run_info_dict['matrix_count_oligo_sets_file_path'],
            'matrix_counts_oligo_sets.txt')
        html_dict['matrix_percent_oligo_sets_file_path'] = copy_as(
            run_info_dict['matrix_percent_oligo_sets_file_path'],
            'matrix_percents_oligo_sets.txt')
        html_dict['oligotype_sets_file'] = copy_as(
            run_info_dict['oligotype_sets_file_path'], 'oligotype_sets.txt')
        html_dict['oligotype_sets'] = [
            l.strip().split('\t')[1].split(',')
            for l in open(run_info_dict['oligotype_sets_file_path'])
        ]

    if html_dict.has_key('representative_seqs_fasta_file_path'):
        html_dict['representative_seqs_fasta_file_path'] = copy_as(
            run_info_dict['representative_seqs_fasta_file_path'],
            'oligo-representatives.fa.txt')
    else:
        html_dict['representative_seqs_fasta_file_path'] = None
    if run_info_dict.has_key('blast_ref_db') and os.path.exists(
            run_info_dict['blast_ref_db']):
        html_dict['blast_ref_db_path'] = copy_as(run_info_dict['blast_ref_db'],
                                                 'reference_db.fa')
    html_dict['entropy_components'] = [
        int(x) for x in html_dict['bases_of_interest_locs'].split(',')
    ]
    html_dict['samples_dict'] = get_samples_dict_from_environment_file(
        run_info_dict['environment_file_path'])
    html_dict['samples'] = sorted(html_dict['samples_dict'].keys())
    html_dict['blast_results_found'] = False

    # get alignment length
    html_dict['alignment_length'] = get_alignment_length(
        run_info_dict['alignment'])
    # include pretty names
    html_dict['pretty_names'] = pretty_names
    # get purity score colors dict
    html_dict['score_color_dict'] = {}
    gradient = get_list_of_colors(26, colormap='RdYlGn')
    for oligo in run_info_dict['final_purity_score_dict']:
        html_dict['score_color_dict'][oligo] = gradient[int(
            run_info_dict['final_purity_score_dict'][oligo] * 25)]
    # get total purity score color dict
    html_dict['total_score_color'] = gradient[int(
        float(run_info_dict['total_purity_score_dict']) * 25)]
    # get colors dict
    html_dict['color_dict'] = get_colors_dict(
        run_info_dict['colors_file_path'])
    # get abundant oligos list
    html_dict['oligos'] = get_oligos_list(
        run_info_dict['oligos_fasta_file_path'])
    # get oligo frequencies
    html_dict['frequency'] = {}
    for oligo in html_dict['oligos']:
        html_dict['frequency'][oligo] = pretty_print(
            sum([
                d[oligo] for d in html_dict['samples_dict'].values()
                if d.has_key(oligo)
            ]))
    # get purity score
    html_dict['purity_score'] = run_info_dict['final_purity_score_dict']
    # get total purity score
    html_dict['total_purity_score'] = run_info_dict['total_purity_score_dict']
    # get unique sequence dict (which will contain the most frequent unique sequence for given oligotype)
    if html_dict.has_key('output_directory_for_reps'):
        html_dict['rep_oligo_seqs_clean_dict'], html_dict[
            'rep_oligo_seqs_fancy_dict'] = get_unique_sequences_dict(html_dict)
        html_dict['oligo_reps_dict'] = get_oligo_reps_dict(
            html_dict, html_output_directory)
        html_dict['component_reference'] = ''.join([
            '<a onmouseover="popup(\'\#%d\', 50)" href="">|</a>' % i
            for i in range(0, html_dict['alignment_length'])
        ])

    # get javascript code for sample pie-charts
    html_dict['pie_charts_js'] = render_to_string('pie_charts_js.tmpl',
                                                  html_dict)

    # FIXME: code below is very inefficient and causes a huge
    # memory issue. fix it by not using deepcopy.
    # generate individual oligotype pages
    if html_dict.has_key('output_directory_for_reps'):
        for i in range(0, len(html_dict['oligos'])):
            oligo = html_dict['oligos'][i]
            tmp_dict = copy.deepcopy(html_dict)
            tmp_dict['oligo'] = oligo
            tmp_dict['distribution'] = get_oligo_distribution_dict(
                oligo, html_dict)
            oligo_page = os.path.join(html_output_directory,
                                      'oligo_%s.html' % oligo)

            tmp_dict['index'] = i + 1
            tmp_dict['total'] = len(html_dict['oligos'])
            tmp_dict['prev'] = None
            tmp_dict['next'] = None
            if i > 0:
                tmp_dict['prev'] = 'oligo_%s.html' % html_dict['oligos'][i - 1]
            if i < (len(html_dict['oligos']) - 1):
                tmp_dict['next'] = 'oligo_%s.html' % html_dict['oligos'][i + 1]

            rendered = render_to_string('single_oligo.tmpl', tmp_dict)

            open(oligo_page, 'w').write(rendered.encode("utf-8"))

    # generate index
    index_page = os.path.join(html_output_directory, 'index.html')
    rendered = render_to_string('index_for_oligo.tmpl', html_dict)

    open(index_page, 'w').write(rendered.encode("utf-8"))

    return index_page
Example #8
0
import sys
from Oligotyping.utils.utils import get_samples_dict_from_environment_file
from Oligotyping.utils.utils import get_oligos_sorted_by_abundance
from Oligotyping.utils.utils import get_units_across_samples_dicts
from Oligotyping.utils.utils import get_unit_counts_and_percents
from Oligotyping.utils.cosine_similarity import get_oligotype_sets
from Oligotyping.utils.cosine_similarity import get_oligotype_sets_greedy
from Oligotyping.visualization.oligotype_distribution_stack_bar import oligotype_distribution_stack_bar
from Oligotyping.utils.utils import generate_ENVIRONMENT_file

input_file_path = sys.argv[1]
cosine_similarity_value = float(sys.argv[2])
sets_output_file_name = input_file_path + '-cos-%s-SETS' % cosine_similarity_value
environ_output_file_name = input_file_path + '-cos-%s-SETS-ENVIRON' % cosine_similarity_value

samples_dict = get_samples_dict_from_environment_file(input_file_path)
oligos = get_oligos_sorted_by_abundance(samples_dict)
unit_counts, unit_percents = get_unit_counts_and_percents(oligos, samples_dict)
samples = samples_dict.keys()

across_samples_sum_normalized, across_samples_max_normalized = get_units_across_samples_dicts(
    oligos, samples_dict.keys(), unit_percents)
oligotype_sets = get_oligotype_sets_greedy(oligos,
                                           across_samples_sum_normalized,
                                           cosine_similarity_value,
                                           sets_output_file_name)

print '%d sets from %d units' % (len(oligotype_sets), len(oligos))

samples_dict_with_agglomerated_oligos = {}
Example #9
0
    parser.add_argument('--output-file', default = None, metavar = 'OUTPUT_FILE',\
                        help = 'File name for the figure to be stored. File name\
                                must end with "png", "jpg", or "tiff".'                                                                       )
    parser.add_argument('--legend',
                        action='store_true',
                        default=False,
                        help='Turn on legend')
    parser.add_argument('--colors-export',
                        metavar='COLORS_LIST_FILE',
                        help='Store the color list into a file')
    parser.add_argument('--project-title', default = None, metavar = 'PROJECT_TITLE',\
                        help = 'Project name for the samples.')

    args = parser.parse_args()

    samples_dict = get_samples_dict_from_environment_file(
        args.environment_file)

    if args.colors_file:
        colors_dict = {}
        for oligotype, color in [
                line.strip().split('\t')
                for line in open(args.colors_file).readlines()
        ]:
            colors_dict[oligotype] = color
    elif args.color_list_file:
        colors_dict = {}
        colors = [
            line.strip() for line in open(args.color_list_file).readlines()
        ]
        oligos = get_oligos_sorted_by_abundance(samples_dict, None)
        oligos.reverse()
# -*- coding: utf-8 -*-
# takes an environment file and a generates matching percent and count matrices.

import sys
from Oligotyping.utils.utils import get_samples_dict_from_environment_file
from Oligotyping.utils.utils import get_oligos_sorted_by_abundance
from Oligotyping.utils.utils import get_units_across_samples_dicts
from Oligotyping.utils.utils import get_unit_counts_and_percents
from Oligotyping.utils.utils import generate_MATRIX_files

samples_dict = get_samples_dict_from_environment_file(sys.argv[1])
oligos = get_oligos_sorted_by_abundance(samples_dict)
oligos.reverse()
unit_counts, unit_percents = get_unit_counts_and_percents(oligos, samples_dict)
samples = sorted(samples_dict.keys())

generate_MATRIX_files(oligos, samples, unit_counts, unit_percents, sys.argv[1] + '-MATRIX-COUNT',  sys.argv[1] + '-MATRIX-PERCENT')
Example #11
0
# -*- coding: utf-8 -*-
# takes an environment file and a generates matching percent and count matrices.

import sys
from Oligotyping.utils.utils import get_samples_dict_from_environment_file
from Oligotyping.utils.utils import get_oligos_sorted_by_abundance
from Oligotyping.utils.utils import get_units_across_samples_dicts
from Oligotyping.utils.utils import get_unit_counts_and_percents
from Oligotyping.utils.utils import generate_MATRIX_files

samples_dict = get_samples_dict_from_environment_file(sys.argv[1])
oligos = get_oligos_sorted_by_abundance(samples_dict)
oligos.reverse()
unit_counts, unit_percents = get_unit_counts_and_percents(oligos, samples_dict)
samples = sorted(samples_dict.keys())

generate_MATRIX_files(oligos, samples, unit_counts, unit_percents,
                      sys.argv[1] + '-MATRIX-COUNT',
                      sys.argv[1] + '-MATRIX-PERCENT')
# Please read the COPYING file.

import os
import sys
import cPickle

from Oligotyping.lib.decomposer import Decomposer
from Oligotyping.utils.utils import get_samples_dict_from_environment_file

runinfo = cPickle.load(open(sys.argv[1]))
sample_mapping = sys.argv[2]

decomposer = Decomposer()
decomposer.matrix_percent_file_path = runinfo['matrix_percent_file_path']
decomposer.matrix_count_file_path = runinfo['matrix_count_file_path']
decomposer.tmp_directory = runinfo['tmp_directory']
decomposer.output_directory = runinfo['output_directory']
decomposer.figures_directory = os.path.join(
    os.path.dirname(runinfo['figures_directory']), 'FIGURES')

if not os.path.exists(decomposer.tmp_directory):
    os.makedirs(decomposer.tmp_directory)

decomposer.sample_mapping = sample_mapping
decomposer._init_logger('exclusive_figures.log')
decomposer.samples_dict = get_samples_dict_from_environment_file(
    runinfo['environment_file_path'])
decomposer.samples = sorted(decomposer.samples_dict.keys())

decomposer._generate_exclusive_figures()
    parser.add_argument('--color-list-file', metavar = 'COLORS_FILE', default = None,\
                        help = 'Single column file that contains a list of colors')
    parser.add_argument('--output-file', default = None, metavar = 'OUTPUT_FILE',\
                        help = 'File name for the figure to be stored. File name\
                                must end with "png", "jpg", or "tiff".')
    parser.add_argument('--legend', action = 'store_true', default = False,
                        help = 'Turn on legend')
    parser.add_argument('--colors-export', metavar = 'COLORS_LIST_FILE',
                        help = 'Store the color list into a file')
    parser.add_argument('--project-title', default = None, metavar = 'PROJECT_TITLE',\
                        help = 'Project name for the samples.')


    args = parser.parse_args()

    samples_dict = get_samples_dict_from_environment_file(args.environment_file)

    if args.colors_file:
        colors_dict = {}
        for oligotype, color in [line.strip().split('\t') for line in open(args.colors_file).readlines()]:
            colors_dict[oligotype] = color
    elif args.color_list_file:
        colors_dict = {}
        colors = [line.strip() for line in open(args.color_list_file).readlines()]
        oligos = get_oligos_sorted_by_abundance(samples_dict, None)
        oligos.reverse()
        if len(oligos) > len(colors):
            sys.stderr.write('Error: number of colors in file is less than number of oligos. Quiting.\n')
            sys.exit()
        for oligo in oligos:
            colors_dict[oligo] = colors[oligos.index(oligo)]
from Oligotyping.utils.utils import get_samples_dict_from_environment_file
from Oligotyping.utils.utils import get_oligos_sorted_by_abundance
from Oligotyping.utils.utils import get_units_across_samples_dicts
from Oligotyping.utils.utils import get_unit_counts_and_percents
from Oligotyping.utils.cosine_similarity import get_oligotype_sets
from Oligotyping.utils.cosine_similarity import get_oligotype_sets_greedy
from Oligotyping.visualization.oligotype_distribution_stack_bar import oligotype_distribution_stack_bar
from Oligotyping.utils.utils import generate_ENVIRONMENT_file 


input_file_path = sys.argv[1]
cosine_similarity_value = float(sys.argv[2])
sets_output_file_name = input_file_path + '-cos-%s-SETS' % cosine_similarity_value
environ_output_file_name = input_file_path + '-cos-%s-SETS-ENVIRON' % cosine_similarity_value

samples_dict = get_samples_dict_from_environment_file(input_file_path)
oligos = get_oligos_sorted_by_abundance(samples_dict)
unit_counts, unit_percents = get_unit_counts_and_percents(oligos, samples_dict)
samples = samples_dict.keys()

across_samples_sum_normalized, across_samples_max_normalized = get_units_across_samples_dicts(oligos, samples_dict.keys(), unit_percents) 
oligotype_sets = get_oligotype_sets_greedy(oligos,
                                    across_samples_sum_normalized,
                                    cosine_similarity_value,
                                    sets_output_file_name)

print '%d sets from %d units' % (len(oligotype_sets), len(oligos))

samples_dict_with_agglomerated_oligos = {}

for sample in samples:
Example #15
0
def generate_html_output(run_info_dict, html_output_directory = None):
    if not html_output_directory:
        html_output_directory = os.path.join(run_info_dict['output_directory'], 'HTML-OUTPUT')
        
    if not os.path.exists(html_output_directory):
        os.makedirs(html_output_directory)
    
    html_dict = copy.deepcopy(run_info_dict)

    shutil.copy2(os.path.join(absolute, 'static/style.css'), os.path.join(html_output_directory, 'style.css'))
    shutil.copy2(os.path.join(absolute, 'static/header_2.png'), os.path.join(html_output_directory, 'header.png'))
    shutil.copy2(os.path.join(absolute, 'static/missing_image.png'), os.path.join(html_output_directory, 'missing.png'))
    shutil.copy2(os.path.join(absolute, 'static/colorbar.png'), os.path.join(html_output_directory, 'colorbar.png'))

    def copy_as(source, dest_name):
        dest = os.path.join(html_output_directory, dest_name)
        try:
            shutil.copy2(source, dest)
        except:
            if source.endswith('png'):
                shutil.copy2(os.path.join(absolute, 'static/missing_image.png'), dest)
                
        return os.path.basename(dest)

    html_dict['matrix_count_file_path'] = copy_as(run_info_dict['matrix_count_file_path'], 'matrix_counts.txt')
    html_dict['matrix_percent_file_path'] = copy_as(run_info_dict['matrix_percent_file_path'], 'matrix_percents.txt')
    html_dict['environment_file_path'] = copy_as(run_info_dict['environment_file_path'], 'environment.txt')
    html_dict['read_distribution_table_path'] = copy_as(run_info_dict['read_distribution_table_path'], 'read_distribution.txt')

    def get_figures_dict(html_dict_prefix):
        html_dict_key = '%s_file_path' % html_dict_prefix
        if html_dict.has_key(html_dict_key):
            figures_dict = cPickle.load(open(html_dict[html_dict_key]))
            for _map in figures_dict:
                for _func in figures_dict[_map]:
                    for _op in figures_dict[_map][_func]:
                        if os.path.exists(figures_dict[_map][_func][_op] + '.pdf') or os.path.exists(figures_dict[_map][_func][_op] + '.png'):
                            prefix = copy_as(figures_dict[_map][_func][_op] + '.png', '%s.png' % '-'.join([_map, _func, _op]))
                            prefix = copy_as(figures_dict[_map][_func][_op] + '.pdf', '%s.pdf' % '-'.join([_map, _func, _op]))
                            figures_dict[_map][_func][_op] = '.'.join(prefix.split('.')[:-1])
                        else:
                            figures_dict[_map][_func][_op] = None
            return figures_dict
        else:
            return None
        
    
    html_dict['figures_dict'] = get_figures_dict('figures_dict')
    html_dict['exclusive_figures_dict'] = get_figures_dict('exclusive_figures_dict')


    if html_dict.has_key('node_representatives_file_path'):
        html_dict['node_representatives_file_path'] = copy_as(run_info_dict['node_representatives_file_path'], 'node-representatives.fa.txt')
    else:
        html_dict['node_representatives_file_path'] = None

    if run_info_dict.has_key('blast_ref_db') and os.path.exists(run_info_dict['blast_ref_db']):
        html_dict['blast_ref_db_path'] = copy_as(run_info_dict['blast_ref_db'], 'reference_db.fa')

    if run_info_dict['sample_mapping']:
        html_dict['sample_mapping'] = copy_as(run_info_dict['sample_mapping'], 'sample_mapping.txt')
    else:
        html_dict['sample_mapping'] = None

    if run_info_dict['gexf_network_file_path']:
        html_dict['gexf_network_file_path'] = copy_as(run_info_dict['gexf_network_file_path'], 'network.gexf')

    if run_info_dict['topology_gexf']:
        html_dict['topology_gexf'] = copy_as(run_info_dict['topology_gexf'], 'topology.gexf')

    html_dict['samples_dict'] = get_samples_dict_from_environment_file(run_info_dict['environment_file_path'])
    html_dict['samples'] = sorted(html_dict['samples_dict'].keys())
    html_dict['blast_results_found'] = False

    # include pretty names
    html_dict['pretty_names'] = pretty_names

    # get javascript code for sample pie-charts
    html_dict['pie_charts_js'] = render_to_string('pie_charts_js.tmpl', html_dict)


    # generate index
    index_page = os.path.join(html_output_directory, 'index.html')
    rendered = render_to_string('index_for_decomposition.tmpl', html_dict)

    open(index_page, 'w').write(rendered.encode("utf-8"))

    return index_page