def main(environment_file,
         sample_mapping_file=None,
         unit_mapping_file=None,
         min_abundance=0,
         min_sum_normalized_percent=1):
    samples_dict = utils.get_samples_dict_from_environment_file(
        environment_file)
    oligos = utils.get_oligos_sorted_by_abundance(samples_dict,
                                                  min_abundance=min_abundance)
    unit_counts, unit_percents = utils.get_unit_counts_and_percents(
        oligos, samples_dict)

    if sample_mapping_file:
        sample_mapping = utils.get_sample_mapping_dict(sample_mapping_file)

    if unit_mapping_file:
        unit_mapping = utils.get_sample_mapping_dict(unit_mapping_file)

    output_file = '.'.join(environment_file.split('.')[:-1]) + '.gexf'
    utils.generate_gexf_network_file(
        oligos,
        samples_dict,
        unit_percents,
        output_file,
        sample_mapping_dict=sample_mapping if sample_mapping_file else None,
        unit_mapping_dict=unit_mapping if unit_mapping_file else None)
def main(
    environment_file, sample_mapping_file=None, unit_mapping_file=None, min_abundance=0, min_sum_normalized_percent=1
):
    samples_dict = utils.get_samples_dict_from_environment_file(environment_file)
    oligos = utils.get_oligos_sorted_by_abundance(samples_dict, min_abundance=min_abundance)
    unit_counts, unit_percents = utils.get_unit_counts_and_percents(oligos, samples_dict)

    if sample_mapping_file:
        sample_mapping = utils.get_sample_mapping_dict(sample_mapping_file)

    if unit_mapping_file:
        unit_mapping = utils.get_sample_mapping_dict(unit_mapping_file)

    output_file = ".".join(environment_file.split(".")[:-1]) + ".gexf"
    utils.generate_gexf_network_file(
        oligos,
        samples_dict,
        unit_percents,
        output_file,
        sample_mapping_dict=sample_mapping if sample_mapping_file else None,
        unit_mapping_dict=unit_mapping if unit_mapping_file else None,
    )
Esempio n. 3
0
from Oligotyping.utils.utils import get_samples_dict_from_environment_file
from Oligotyping.utils.utils import get_oligos_sorted_by_abundance
from Oligotyping.utils.utils import get_units_across_samples_dicts
from Oligotyping.utils.utils import get_unit_counts_and_percents
from Oligotyping.utils.cosine_similarity import get_oligotype_sets
from Oligotyping.utils.cosine_similarity import get_oligotype_sets_greedy
from Oligotyping.visualization.oligotype_distribution_stack_bar import oligotype_distribution_stack_bar
from Oligotyping.utils.utils import generate_ENVIRONMENT_file

input_file_path = sys.argv[1]
cosine_similarity_value = float(sys.argv[2])
sets_output_file_name = input_file_path + '-cos-%s-SETS' % cosine_similarity_value
environ_output_file_name = input_file_path + '-cos-%s-SETS-ENVIRON' % cosine_similarity_value

samples_dict = get_samples_dict_from_environment_file(input_file_path)
oligos = get_oligos_sorted_by_abundance(samples_dict)
unit_counts, unit_percents = get_unit_counts_and_percents(oligos, samples_dict)
samples = samples_dict.keys()

across_samples_sum_normalized, across_samples_max_normalized = get_units_across_samples_dicts(
    oligos, samples_dict.keys(), unit_percents)
oligotype_sets = get_oligotype_sets_greedy(oligos,
                                           across_samples_sum_normalized,
                                           cosine_similarity_value,
                                           sets_output_file_name)

print '%d sets from %d units' % (len(oligotype_sets), len(oligos))

samples_dict_with_agglomerated_oligos = {}

for sample in samples:
Esempio n. 4
0
def oligotype_distribution_stack_bar(samples_dict, colors_dict, output_file = None, legend = False,\
                                     colors_export = None, project_title = None, display = True, oligos = None):
    samples = samples_dict.keys()
    samples.sort()

    if oligos == None:
        oligos = get_oligos_sorted_by_abundance(samples_dict, oligos)
    else:
        oligos.reverse()

    if colors_dict == None:
        colors_dict = random_colors(copy.deepcopy(oligos))

    samples_oligo_vectors = {}
    for sample in samples:
        vector = []
        for oligo in oligos:
            if samples_dict[sample].has_key(oligo):
                vector.append(samples_dict[sample][oligo])
            else:
                vector.append(0)
        samples_oligo_vectors[sample] = vector

    samples_oligo_vectors_percent_normalized = {}
    for sample in samples:
        total_oligos_in_sample = sum(samples_oligo_vectors[sample])
        vector = []
        for oligo_abundance in samples_oligo_vectors[sample]:
            vector.append(oligo_abundance * 100.0 / total_oligos_in_sample)
        samples_oligo_vectors_percent_normalized[sample] = vector

    # figure..
    fig = plt.figure(figsize=(20, 10))

    if legend:
        plt.subplots_adjust(left=0.03, bottom=0.15, top=0.97, right=0.90)
    else:
        plt.subplots_adjust(left=0.03, bottom=0.15, top=0.97, right=0.99)

    N = len(samples)
    ind = np.arange(N)
    width = 0.75

    bars = []
    colors_list = []

    for i in range(0, len(oligos)):
        values = [
            samples_oligo_vectors_percent_normalized[sample][i]
            for sample in samples
        ]
        bottom = [
            sum(samples_oligo_vectors_percent_normalized[sample][0:i])
            for sample in samples
        ]
        try:
            color = HTMLColorToRGB(colors_dict[oligos[i]])
            colors_list.append(colors_dict[oligos[i]])
        except:
            color = 'black'
            colors_list.append('#000000')

        p = plt.bar(ind, values, width, bottom=bottom, color=color)
        bars.append(p)

    if colors_export:
        colors_list = reversed(colors_list)
        colors_file = open(colors_export, 'w')
        for oligo in oligos:
            colors_file.write('%s\t%s\n' % (oligo, colors_dict[oligo]))
        colors_file.close()

    plt.ylabel('Oligotype Distribution', size='large')
    plt.title('Stacked Bar Charts of Oligotype Distribution %s' \
                 % (('for "%s"' % project_title) if project_title else ''))

    plt.xticks(ind + width / 2., samples, rotation=90, size='small')
    plt.yticks([])
    plt.ylim(ymax=100)
    plt.xlim(xmin=-(width) / 2, xmax=len(samples))

    if legend:
        plt.legend([b[0] for b in bars][::-1],
                   oligos[::-1],
                   bbox_to_anchor=(1.01, 1),
                   loc=2,
                   borderaxespad=0.0,
                   shadow=True,
                   fancybox=True)

        leg = plt.gca().get_legend()
        ltext = leg.get_texts()
        llines = leg.get_lines()
        frame = leg.get_frame()

        frame.set_facecolor('0.80')
        plt.setp(ltext, fontsize='small', fontname='arial', family='monospace')
        plt.setp(llines, linewidth=1.5)

    if output_file:
        plt.savefig(output_file)
    if display:
        try:
            plt.show()
        except:
            pass
# -*- coding: utf-8 -*-
# takes an environment file and a generates matching percent and count matrices.

import sys
from Oligotyping.utils.utils import get_samples_dict_from_environment_file
from Oligotyping.utils.utils import get_oligos_sorted_by_abundance
from Oligotyping.utils.utils import get_units_across_samples_dicts
from Oligotyping.utils.utils import get_unit_counts_and_percents
from Oligotyping.utils.utils import generate_MATRIX_files

samples_dict = get_samples_dict_from_environment_file(sys.argv[1])
oligos = get_oligos_sorted_by_abundance(samples_dict)
oligos.reverse()
unit_counts, unit_percents = get_unit_counts_and_percents(oligos, samples_dict)
samples = sorted(samples_dict.keys())

generate_MATRIX_files(oligos, samples, unit_counts, unit_percents, sys.argv[1] + '-MATRIX-COUNT',  sys.argv[1] + '-MATRIX-PERCENT')
def oligotype_distribution_across_samples(samples_dict, colors_dict, output_file = None, legend = False, project_title = None, display = True, oligos = None):
    samples = samples_dict.keys()
    samples.sort()
   
    if oligos == None:
        oligos = get_oligos_sorted_by_abundance(samples_dict, oligos)
    else:
        oligos.reverse()
 
    if colors_dict == None:
        colors_dict = random_colors(copy.deepcopy(oligos))


    oligo_percents = {}
    max_normalized_across_samples_vectors = {}
    sum_normalized_across_samples_vectors = {}

    for oligo in oligos:
        percents = []
        for sample in samples:
            if samples_dict[sample].has_key(oligo):
                percents.append(samples_dict[sample][oligo] * 100.0 / sum(samples_dict[sample].values()))
            else:
                percents.append(0.0)

        oligo_percents[oligo] = percents

    for oligo in oligos:
        max_normalized_across_samples_vectors[oligo] = [p * 100.0 / max(oligo_percents[oligo]) for p in oligo_percents[oligo]]
        sum_normalized_across_samples_vectors[oligo] = [p * 100.0 / sum(oligo_percents[oligo]) for p in oligo_percents[oligo]]


    # figure.. 
    fig = plt.figure(figsize=(20, 10))
    
    if legend:
        plt.subplots_adjust(left=0.03, bottom = 0.15, top = 0.97, right = 0.80)
    else:
        plt.subplots_adjust(left=0.03, bottom = 0.15, top = 0.97, right = 0.99)

    plt.rcParams.update({'axes.linewidth' : 0.1})
    plt.rc('grid', color='0.70', linestyle='-', linewidth=0.1)
    plt.grid(True) 
    plt.subplot(2, 1, 1)
    plt.grid(True) 

    N = len(samples)
    ind = np.arange(N)
    width = 0.75
    
    lines = []
    
    for i in range(0, len(oligos)):
        oligo = oligos[i]
        try:
            color = HTMLColorToRGB(colors_dict[oligos[i]])
        except:
            color = 'black'

        if len(oligos) < 50:
            plt.plot(max_normalized_across_samples_vectors[oligo], color=color, linewidth = 3, alpha = 0.3, zorder = i)
            plt.plot(max_normalized_across_samples_vectors[oligo], color=color, linewidth = 5, alpha = 0.2, zorder = i)
        p = plt.plot(max_normalized_across_samples_vectors[oligo], color=color, linewidth = 1, alpha = 0.9, zorder = i)
        lines.append(p)
    
    plt.ylabel('MAX Normalized', size='large')
    plt.title('Normalized Oligotype Distributions Across Samples %s' \
                 % (('for "%s"' % project_title) if project_title else ''))

    plt.xticks(ind, ['' for d in samples], rotation=90, size='small')
    plt.yticks([])
    plt.ylim(ymax = 100)
    plt.xlim(xmin = -(width) / 2, xmax = len(samples) - 0.5)
    
    if legend:
        plt.legend([b[0] for b in lines][::-1], oligos[::-1], bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.0, shadow=True, fancybox=True)
        
        leg = plt.gca().get_legend()
        ltext  = leg.get_texts()
        llines = leg.get_lines()
        frame  = leg.get_frame()
        
        frame.set_facecolor('0.80')
        plt.setp(ltext, fontsize='small', fontname='arial', family='monospace')
        plt.setp(llines, linewidth=1.5)


    plt.subplot(2, 1, 2)
    if legend:
        plt.subplots_adjust(left=0.03, bottom = 0.15, top = 0.97, right = 0.80)
    else:
        plt.subplots_adjust(left=0.03, bottom = 0.15, top = 0.97, right = 0.99)

    plt.rcParams.update({'axes.linewidth' : 0.1})
    plt.rc('grid', color='0.70', linestyle='-', linewidth=0.1)
    plt.grid(True) 
    
    for i in range(0, len(oligos)):
        oligo = oligos[i]
        try:
            color = HTMLColorToRGB(colors_dict[oligos[i]])
        except:
            color = 'black'

        if len(oligos) < 50:
            plt.plot(sum_normalized_across_samples_vectors[oligo], color=color, linewidth = 3, alpha = 0.3, zorder = i)
            plt.plot(sum_normalized_across_samples_vectors[oligo], color=color, linewidth = 5, alpha = 0.2, zorder = i)
        p = plt.plot(sum_normalized_across_samples_vectors[oligo], color=color, linewidth = 1, alpha = 0.9, zorder = i)
    
    plt.ylabel('SUM Normalized', size='large')

    plt.xticks(ind, samples, rotation=90, size='small')
    plt.yticks([])
    plt.ylim(ymax = 100)
    plt.xlim(xmin = -(width) / 2, xmax = len(samples) - 0.5)
 
    if output_file:
        plt.savefig(output_file)
    if display:
        try:
            plt.show()
        except:
            pass
def oligotype_distribution_stack_bar(samples_dict, colors_dict, output_file = None, legend = False,\
                                     colors_export = None, project_title = None, display = True, oligos = None):
    samples = samples_dict.keys()
    samples.sort()
   
    if oligos == None:
        oligos = get_oligos_sorted_by_abundance(samples_dict, oligos)
    else:
        oligos.reverse()
 
    if colors_dict == None:
        colors_dict = random_colors(copy.deepcopy(oligos))

    samples_oligo_vectors = {}
    for sample in samples:
        vector = []
        for oligo in oligos:
            if samples_dict[sample].has_key(oligo):
                vector.append(samples_dict[sample][oligo])
            else:
                vector.append(0)
        samples_oligo_vectors[sample] = vector
    
    samples_oligo_vectors_percent_normalized = {}
    for sample in samples:
        total_oligos_in_sample = sum(samples_oligo_vectors[sample])
        vector = []
        for oligo_abundance in samples_oligo_vectors[sample]:
            vector.append(oligo_abundance * 100.0 / total_oligos_in_sample)
        samples_oligo_vectors_percent_normalized[sample] = vector
   
    # figure.. 
    fig = plt.figure(figsize=(20, 10))
    
    if legend:
        plt.subplots_adjust(left=0.03, bottom = 0.15, top = 0.97, right = 0.90)
    else:
        plt.subplots_adjust(left=0.03, bottom = 0.15, top = 0.97, right = 0.99)
    
    
    N = len(samples)
    ind = np.arange(N)
    width = 0.75
    
    bars = []
    colors_list = []

    for i in range(0, len(oligos)):
        values = [samples_oligo_vectors_percent_normalized[sample][i] for sample in samples]
        bottom = [sum(samples_oligo_vectors_percent_normalized[sample][0:i]) for sample in samples]
        try:
            color = HTMLColorToRGB(colors_dict[oligos[i]])
            colors_list.append(colors_dict[oligos[i]])
        except:
            color = 'black'
            colors_list.append('#000000')
   

        p = plt.bar(ind, values, width, bottom=bottom, color=color)
        bars.append(p)

    if colors_export:
        colors_list = reversed(colors_list)
        colors_file = open(colors_export, 'w')
        for oligo in oligos:
            colors_file.write('%s\t%s\n' % (oligo, colors_dict[oligo]))
        colors_file.close()

    plt.ylabel('Oligotype Distribution', size='large')
    plt.title('Stacked Bar Charts of Oligotype Distribution %s' \
                 % (('for "%s"' % project_title) if project_title else ''))

    plt.xticks(ind+width/2., samples, rotation=90, size='small')
    plt.yticks([])
    plt.ylim(ymax = 100)
    plt.xlim(xmin = -(width) / 2, xmax = len(samples))
    
    if legend:
        plt.legend([b[0] for b in bars][::-1], oligos[::-1], bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.0, shadow=True, fancybox=True)
        
        leg = plt.gca().get_legend()
        ltext  = leg.get_texts()
        llines = leg.get_lines()
        frame  = leg.get_frame()
        
        frame.set_facecolor('0.80')
        plt.setp(ltext, fontsize='small', fontname='arial', family='monospace')
        plt.setp(llines, linewidth=1.5)
    
    if output_file:
        plt.savefig(output_file)
    if display:
        try:
            plt.show()
        except:
            pass