def main(environment_file, sample_mapping_file=None, unit_mapping_file=None, min_abundance=0, min_sum_normalized_percent=1): samples_dict = utils.get_samples_dict_from_environment_file( environment_file) oligos = utils.get_oligos_sorted_by_abundance(samples_dict, min_abundance=min_abundance) unit_counts, unit_percents = utils.get_unit_counts_and_percents( oligos, samples_dict) if sample_mapping_file: sample_mapping = utils.get_sample_mapping_dict(sample_mapping_file) if unit_mapping_file: unit_mapping = utils.get_sample_mapping_dict(unit_mapping_file) output_file = '.'.join(environment_file.split('.')[:-1]) + '.gexf' utils.generate_gexf_network_file( oligos, samples_dict, unit_percents, output_file, sample_mapping_dict=sample_mapping if sample_mapping_file else None, unit_mapping_dict=unit_mapping if unit_mapping_file else None)
def main( environment_file, sample_mapping_file=None, unit_mapping_file=None, min_abundance=0, min_sum_normalized_percent=1 ): samples_dict = utils.get_samples_dict_from_environment_file(environment_file) oligos = utils.get_oligos_sorted_by_abundance(samples_dict, min_abundance=min_abundance) unit_counts, unit_percents = utils.get_unit_counts_and_percents(oligos, samples_dict) if sample_mapping_file: sample_mapping = utils.get_sample_mapping_dict(sample_mapping_file) if unit_mapping_file: unit_mapping = utils.get_sample_mapping_dict(unit_mapping_file) output_file = ".".join(environment_file.split(".")[:-1]) + ".gexf" utils.generate_gexf_network_file( oligos, samples_dict, unit_percents, output_file, sample_mapping_dict=sample_mapping if sample_mapping_file else None, unit_mapping_dict=unit_mapping if unit_mapping_file else None, )
from Oligotyping.utils.utils import get_samples_dict_from_environment_file from Oligotyping.utils.utils import get_oligos_sorted_by_abundance from Oligotyping.utils.utils import get_units_across_samples_dicts from Oligotyping.utils.utils import get_unit_counts_and_percents from Oligotyping.utils.cosine_similarity import get_oligotype_sets from Oligotyping.utils.cosine_similarity import get_oligotype_sets_greedy from Oligotyping.visualization.oligotype_distribution_stack_bar import oligotype_distribution_stack_bar from Oligotyping.utils.utils import generate_ENVIRONMENT_file input_file_path = sys.argv[1] cosine_similarity_value = float(sys.argv[2]) sets_output_file_name = input_file_path + '-cos-%s-SETS' % cosine_similarity_value environ_output_file_name = input_file_path + '-cos-%s-SETS-ENVIRON' % cosine_similarity_value samples_dict = get_samples_dict_from_environment_file(input_file_path) oligos = get_oligos_sorted_by_abundance(samples_dict) unit_counts, unit_percents = get_unit_counts_and_percents(oligos, samples_dict) samples = samples_dict.keys() across_samples_sum_normalized, across_samples_max_normalized = get_units_across_samples_dicts( oligos, samples_dict.keys(), unit_percents) oligotype_sets = get_oligotype_sets_greedy(oligos, across_samples_sum_normalized, cosine_similarity_value, sets_output_file_name) print '%d sets from %d units' % (len(oligotype_sets), len(oligos)) samples_dict_with_agglomerated_oligos = {} for sample in samples:
def oligotype_distribution_stack_bar(samples_dict, colors_dict, output_file = None, legend = False,\ colors_export = None, project_title = None, display = True, oligos = None): samples = samples_dict.keys() samples.sort() if oligos == None: oligos = get_oligos_sorted_by_abundance(samples_dict, oligos) else: oligos.reverse() if colors_dict == None: colors_dict = random_colors(copy.deepcopy(oligos)) samples_oligo_vectors = {} for sample in samples: vector = [] for oligo in oligos: if samples_dict[sample].has_key(oligo): vector.append(samples_dict[sample][oligo]) else: vector.append(0) samples_oligo_vectors[sample] = vector samples_oligo_vectors_percent_normalized = {} for sample in samples: total_oligos_in_sample = sum(samples_oligo_vectors[sample]) vector = [] for oligo_abundance in samples_oligo_vectors[sample]: vector.append(oligo_abundance * 100.0 / total_oligos_in_sample) samples_oligo_vectors_percent_normalized[sample] = vector # figure.. fig = plt.figure(figsize=(20, 10)) if legend: plt.subplots_adjust(left=0.03, bottom=0.15, top=0.97, right=0.90) else: plt.subplots_adjust(left=0.03, bottom=0.15, top=0.97, right=0.99) N = len(samples) ind = np.arange(N) width = 0.75 bars = [] colors_list = [] for i in range(0, len(oligos)): values = [ samples_oligo_vectors_percent_normalized[sample][i] for sample in samples ] bottom = [ sum(samples_oligo_vectors_percent_normalized[sample][0:i]) for sample in samples ] try: color = HTMLColorToRGB(colors_dict[oligos[i]]) colors_list.append(colors_dict[oligos[i]]) except: color = 'black' colors_list.append('#000000') p = plt.bar(ind, values, width, bottom=bottom, color=color) bars.append(p) if colors_export: colors_list = reversed(colors_list) colors_file = open(colors_export, 'w') for oligo in oligos: colors_file.write('%s\t%s\n' % (oligo, colors_dict[oligo])) colors_file.close() plt.ylabel('Oligotype Distribution', size='large') plt.title('Stacked Bar Charts of Oligotype Distribution %s' \ % (('for "%s"' % project_title) if project_title else '')) plt.xticks(ind + width / 2., samples, rotation=90, size='small') plt.yticks([]) plt.ylim(ymax=100) plt.xlim(xmin=-(width) / 2, xmax=len(samples)) if legend: plt.legend([b[0] for b in bars][::-1], oligos[::-1], bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.0, shadow=True, fancybox=True) leg = plt.gca().get_legend() ltext = leg.get_texts() llines = leg.get_lines() frame = leg.get_frame() frame.set_facecolor('0.80') plt.setp(ltext, fontsize='small', fontname='arial', family='monospace') plt.setp(llines, linewidth=1.5) if output_file: plt.savefig(output_file) if display: try: plt.show() except: pass
# -*- coding: utf-8 -*- # takes an environment file and a generates matching percent and count matrices. import sys from Oligotyping.utils.utils import get_samples_dict_from_environment_file from Oligotyping.utils.utils import get_oligos_sorted_by_abundance from Oligotyping.utils.utils import get_units_across_samples_dicts from Oligotyping.utils.utils import get_unit_counts_and_percents from Oligotyping.utils.utils import generate_MATRIX_files samples_dict = get_samples_dict_from_environment_file(sys.argv[1]) oligos = get_oligos_sorted_by_abundance(samples_dict) oligos.reverse() unit_counts, unit_percents = get_unit_counts_and_percents(oligos, samples_dict) samples = sorted(samples_dict.keys()) generate_MATRIX_files(oligos, samples, unit_counts, unit_percents, sys.argv[1] + '-MATRIX-COUNT', sys.argv[1] + '-MATRIX-PERCENT')
def oligotype_distribution_across_samples(samples_dict, colors_dict, output_file = None, legend = False, project_title = None, display = True, oligos = None): samples = samples_dict.keys() samples.sort() if oligos == None: oligos = get_oligos_sorted_by_abundance(samples_dict, oligos) else: oligos.reverse() if colors_dict == None: colors_dict = random_colors(copy.deepcopy(oligos)) oligo_percents = {} max_normalized_across_samples_vectors = {} sum_normalized_across_samples_vectors = {} for oligo in oligos: percents = [] for sample in samples: if samples_dict[sample].has_key(oligo): percents.append(samples_dict[sample][oligo] * 100.0 / sum(samples_dict[sample].values())) else: percents.append(0.0) oligo_percents[oligo] = percents for oligo in oligos: max_normalized_across_samples_vectors[oligo] = [p * 100.0 / max(oligo_percents[oligo]) for p in oligo_percents[oligo]] sum_normalized_across_samples_vectors[oligo] = [p * 100.0 / sum(oligo_percents[oligo]) for p in oligo_percents[oligo]] # figure.. fig = plt.figure(figsize=(20, 10)) if legend: plt.subplots_adjust(left=0.03, bottom = 0.15, top = 0.97, right = 0.80) else: plt.subplots_adjust(left=0.03, bottom = 0.15, top = 0.97, right = 0.99) plt.rcParams.update({'axes.linewidth' : 0.1}) plt.rc('grid', color='0.70', linestyle='-', linewidth=0.1) plt.grid(True) plt.subplot(2, 1, 1) plt.grid(True) N = len(samples) ind = np.arange(N) width = 0.75 lines = [] for i in range(0, len(oligos)): oligo = oligos[i] try: color = HTMLColorToRGB(colors_dict[oligos[i]]) except: color = 'black' if len(oligos) < 50: plt.plot(max_normalized_across_samples_vectors[oligo], color=color, linewidth = 3, alpha = 0.3, zorder = i) plt.plot(max_normalized_across_samples_vectors[oligo], color=color, linewidth = 5, alpha = 0.2, zorder = i) p = plt.plot(max_normalized_across_samples_vectors[oligo], color=color, linewidth = 1, alpha = 0.9, zorder = i) lines.append(p) plt.ylabel('MAX Normalized', size='large') plt.title('Normalized Oligotype Distributions Across Samples %s' \ % (('for "%s"' % project_title) if project_title else '')) plt.xticks(ind, ['' for d in samples], rotation=90, size='small') plt.yticks([]) plt.ylim(ymax = 100) plt.xlim(xmin = -(width) / 2, xmax = len(samples) - 0.5) if legend: plt.legend([b[0] for b in lines][::-1], oligos[::-1], bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.0, shadow=True, fancybox=True) leg = plt.gca().get_legend() ltext = leg.get_texts() llines = leg.get_lines() frame = leg.get_frame() frame.set_facecolor('0.80') plt.setp(ltext, fontsize='small', fontname='arial', family='monospace') plt.setp(llines, linewidth=1.5) plt.subplot(2, 1, 2) if legend: plt.subplots_adjust(left=0.03, bottom = 0.15, top = 0.97, right = 0.80) else: plt.subplots_adjust(left=0.03, bottom = 0.15, top = 0.97, right = 0.99) plt.rcParams.update({'axes.linewidth' : 0.1}) plt.rc('grid', color='0.70', linestyle='-', linewidth=0.1) plt.grid(True) for i in range(0, len(oligos)): oligo = oligos[i] try: color = HTMLColorToRGB(colors_dict[oligos[i]]) except: color = 'black' if len(oligos) < 50: plt.plot(sum_normalized_across_samples_vectors[oligo], color=color, linewidth = 3, alpha = 0.3, zorder = i) plt.plot(sum_normalized_across_samples_vectors[oligo], color=color, linewidth = 5, alpha = 0.2, zorder = i) p = plt.plot(sum_normalized_across_samples_vectors[oligo], color=color, linewidth = 1, alpha = 0.9, zorder = i) plt.ylabel('SUM Normalized', size='large') plt.xticks(ind, samples, rotation=90, size='small') plt.yticks([]) plt.ylim(ymax = 100) plt.xlim(xmin = -(width) / 2, xmax = len(samples) - 0.5) if output_file: plt.savefig(output_file) if display: try: plt.show() except: pass
def oligotype_distribution_stack_bar(samples_dict, colors_dict, output_file = None, legend = False,\ colors_export = None, project_title = None, display = True, oligos = None): samples = samples_dict.keys() samples.sort() if oligos == None: oligos = get_oligos_sorted_by_abundance(samples_dict, oligos) else: oligos.reverse() if colors_dict == None: colors_dict = random_colors(copy.deepcopy(oligos)) samples_oligo_vectors = {} for sample in samples: vector = [] for oligo in oligos: if samples_dict[sample].has_key(oligo): vector.append(samples_dict[sample][oligo]) else: vector.append(0) samples_oligo_vectors[sample] = vector samples_oligo_vectors_percent_normalized = {} for sample in samples: total_oligos_in_sample = sum(samples_oligo_vectors[sample]) vector = [] for oligo_abundance in samples_oligo_vectors[sample]: vector.append(oligo_abundance * 100.0 / total_oligos_in_sample) samples_oligo_vectors_percent_normalized[sample] = vector # figure.. fig = plt.figure(figsize=(20, 10)) if legend: plt.subplots_adjust(left=0.03, bottom = 0.15, top = 0.97, right = 0.90) else: plt.subplots_adjust(left=0.03, bottom = 0.15, top = 0.97, right = 0.99) N = len(samples) ind = np.arange(N) width = 0.75 bars = [] colors_list = [] for i in range(0, len(oligos)): values = [samples_oligo_vectors_percent_normalized[sample][i] for sample in samples] bottom = [sum(samples_oligo_vectors_percent_normalized[sample][0:i]) for sample in samples] try: color = HTMLColorToRGB(colors_dict[oligos[i]]) colors_list.append(colors_dict[oligos[i]]) except: color = 'black' colors_list.append('#000000') p = plt.bar(ind, values, width, bottom=bottom, color=color) bars.append(p) if colors_export: colors_list = reversed(colors_list) colors_file = open(colors_export, 'w') for oligo in oligos: colors_file.write('%s\t%s\n' % (oligo, colors_dict[oligo])) colors_file.close() plt.ylabel('Oligotype Distribution', size='large') plt.title('Stacked Bar Charts of Oligotype Distribution %s' \ % (('for "%s"' % project_title) if project_title else '')) plt.xticks(ind+width/2., samples, rotation=90, size='small') plt.yticks([]) plt.ylim(ymax = 100) plt.xlim(xmin = -(width) / 2, xmax = len(samples)) if legend: plt.legend([b[0] for b in bars][::-1], oligos[::-1], bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.0, shadow=True, fancybox=True) leg = plt.gca().get_legend() ltext = leg.get_texts() llines = leg.get_lines() frame = leg.get_frame() frame.set_facecolor('0.80') plt.setp(ltext, fontsize='small', fontname='arial', family='monospace') plt.setp(llines, linewidth=1.5) if output_file: plt.savefig(output_file) if display: try: plt.show() except: pass