def generate_dict(samples_dict, number_of_folds = 15000): taxonomic_levels = ['genus', 'family', 'order', 'class', 'phylum'] samples = helper_functions.sorted_copy(samples_dict.keys()) rarefaction_dict = {} for sample in samples: print "sample name: ", sample rarefaction_dict[sample] = {} for taxonomic_level in taxonomic_levels: species_in_the_sample = [] OTUs = samples_dict[sample][taxonomic_level].keys() for i in range(0, len(OTUs)): species_in_the_sample += [i] * samples_dict[sample][taxonomic_level][OTUs[i]] print "level: ", taxonomic_level, "sequences: ", len(species_in_the_sample), "folds: ", number_of_folds rarefaction_values_lists = [] for i in range(0, number_of_folds): random.shuffle(species_in_the_sample) rarefaction_values_lists.append(get_rarefaction_values(species_in_the_sample)) means_and_stdev_tmp = [] for i in range(0, len(rarefaction_values_lists[0])): means_and_stdev_tmp.append((scipy.mean([x[i] for x in rarefaction_values_lists]), scipy.std([x[i] for x in rarefaction_values_lists])),) rarefaction_dict[sample][taxonomic_level] = means_and_stdev_tmp return rarefaction_dict
def generate(samples_dict, img_save_path = None, data_save_path = None, type = None, method='simpsons'): taxonomic_level = const.ranks[type][-1] samples = helper_functions.sorted_copy(samples_dict.keys()) samples_diversity_index_list = [] if method == 'simpsons': samples_diversity_index_list = [get_simpsons_diversity_index(samples_dict[sample][taxonomic_level]) for sample in samples] lbl = "Simpson's Sample Diversity Index" c = color='#ADADEF' elif method == 'shannons': samples_diversity_index_list = [get_shannons_diversity_index(samples_dict[sample][taxonomic_level]) for sample in samples] lbl = "Shannon Diversity Index" c = color='#66AA66' else: return None # store diveristy indices in a text file as well if data_save_path: f = open(data_save_path, 'w') for i in range(0, len(samples)): f.write("%s\t%f\n" % (samples[i], samples_diversity_index_list[i])) f.close() width = len(samples) / 5 if width < 5: width = 5 if width > 15: width = 15 fig = pylab.figure(figsize=(width, 4)) pylab.rcParams.update({'axes.linewidth' : 0, 'axes.axisbelow': False}) pylab.rc('grid', color='0.80', linestyle='-', linewidth=0.1) pylab.grid(True) pos = pylab.arange(len(samples))+.5 pylab.bar(pos, samples_diversity_index_list, align='center', color=c, linewidth=0.1) pylab.plot([0], [1], '^', visible = False) pylab.xticks(pos, samples, rotation=90, size='xx-small') pylab.xlim(xmax=len(samples)) pylab.yticks(size='xx-small') pylab.ylabel(lbl, fontsize = 12) if img_save_path: pylab.savefig(img_save_path) else: pylab.show()
def generate(samples_dict, otu_t_p_tuples_dict, sample_map_file_path, rank = "genus", save_dir = None, is_transparent = False, real_abundance = False): sample_groups, group_colors = helper_functions.get_groups_colors_from_sample_map_file(sample_map_file_path) if real_abundance: """if we're gonna work with real abundance, we need to find out about the ymax of the y axis. to do that, first, we learn the max abundance, then, find out the smallest power of 10 that is larger than max_abundance..""" max_abundance = helper_functions.get_largest_abundance_number_in_all_samples(samples_dict) max_y = 1 while 1: if max_y > max_abundance: break max_y *= 10 for otu in [t[0] for t in otu_t_p_tuples_dict[rank]]: txt_output = '' plot_dict = {} for group in sample_groups.keys(): plot_dict[group] = [] for sample in sample_groups[group]: if samples_dict[sample][rank].has_key(otu): if real_abundance: plot_dict[group].append([samples_dict[sample][rank][otu], sample],) txt_output += '%s\t%s\t%f\n' % (group, sample, samples_dict[sample][rank][otu]) else: if samples_dict[sample]['tr'] == 0: otu_vectors[group].append(0.0) else: plot_dict[group].append([samples_dict[sample][rank][otu] * 100.0 / samples_dict[sample]['tr'], sample],) txt_output += '%s\t%s\t%f\n' % (group, sample, samples_dict[sample][rank][otu] * 100.0 / samples_dict[sample]['tr']) else: plot_dict[group].append([0.0, sample],) txt_output += '%s\t%s\t0.0\n' % (group, sample) fig = pylab.figure(figsize=(3, 6)) if real_abundance: ax = pylab.axes() pylab.rcParams['axes.titlesize'] = 12.0 pylab.rcParams['font.size'] = 8.0 pylab.rcParams.update({'axes.linewidth' : 0, 'axes.axisbelow': False}) pylab.rc('grid', color='0.50', linestyle='-', linewidth=0.1) pylab.grid(True) keys = helper_functions.sorted_copy(plot_dict.keys()) presence = [] for key in keys: i = keys.index(key) if real_abundance: """if abundance is 0.0, make it 1 so it would look better on log scale""" for j in range(0, len(plot_dict[key])): if plot_dict[key][j][0] < 1: plot_dict[key][j][0] = 1.0 pylab.title(otu) presence.append('%.3f' % (len([t[0] for t in plot_dict[key] if t[0] > 0.01]) * 100.0 / len(plot_dict[key])) ) # scattering the samples in X axis, so it would be easier to see them when there are a bunch of them # at the same spot. instead of this, i * len(plot_dict[key]) could be used to plot them. y_positions = [((1 - (r.gauss(100, 3) /100)) + i) for x in range(0, len(plot_dict[key]))] pylab.plot(y_positions, [t[0] for t in plot_dict[key]], 'o', color = group_colors[key], ms = 10, mew = 0.6, alpha = .5) b = pylab.boxplot([t[0] for t in plot_dict[key]], positions=[i + 0.35], sym=',', widths=0.2) pylab.setp(b['medians'], color=group_colors[key]) pylab.setp(b['whiskers'], color='black', alpha=0.3) pylab.setp(b['boxes'], color='black', alpha=0.3) pylab.setp(b['fliers'], color='black', alpha=0.3) pylab.setp(b['caps'], color='black', alpha=0.3) if real_abundance: ax.set_yscale('log') formatter = pylab.FuncFormatter(log_10_fix) ax.yaxis.set_major_formatter(formatter) pylab.xlim(xmin=-0.75, xmax=len(plot_dict) - 0.15) pylab.xticks(pylab.arange(len(plot_dict)), keys, rotation=90) pylab.ylim(ymin=1e-1, ymax=max_y) else: pylab.ylim(ymin=-5, ymax=105) pylab.xlim(xmin=-0.75, xmax=len(plot_dict) - 0.15) pylab.xticks(pylab.arange(len(plot_dict)), keys, rotation=90) pylab.yticks(pylab.arange(0, 101, 10)) print '%s,%s' % (otu, ','.join(presence)) if not save_dir: pylab.show() else: if real_abundance: pylab.savefig(os.path.join(save_dir, rank + "_" + helper_functions.get_fs_compatible_name(otu) + '_real_abundance' + '.png'), transparent = is_transparent) open(os.path.join(save_dir, rank + "_" + helper_functions.get_fs_compatible_name(otu) + '_real_abundance.txt'), 'w').write(txt_output) else: pylab.savefig(os.path.join(save_dir, rank + "_" + helper_functions.get_fs_compatible_name(otu) + '.png'), transparent = is_transparent) open(os.path.join(save_dir, rank + "_" + helper_functions.get_fs_compatible_name(otu) + '.txt'), 'w').write(txt_output) # clean memory try: fig.clf() except: pass pylab.close('all')
def generate_for_sample_map(samples_dict, sample_map_file_path, save_dir = None, type = None, method='simpsons'): taxonomic_level = const.ranks[type][-1] sample_groups, group_colors = helper_functions.get_groups_colors_from_sample_map_file(sample_map_file_path) plot_dict = {} for group in sample_groups: plot_dict[group] = [] for sample in sample_groups[group]: if method == 'simpsons': plot_dict[group].append(get_simpsons_diversity_index(samples_dict[sample][taxonomic_level])) __title = "Simpson's Diversity Index" if method == 'shannons': plot_dict[group].append(get_shannons_diversity_index(samples_dict[sample][taxonomic_level])) __title = "Shannon Diversity Index" max_val = max([max(l) for l in plot_dict.values()]) max_val = max_val + max_val * 10 / 100 fig = pylab.figure(figsize=(3.5, 6.5)) pylab.rcParams['axes.titlesize'] = 12.0 pylab.rcParams['font.size'] = 8.0 pylab.rcParams.update({'axes.linewidth' : 0, 'axes.axisbelow': False}) pylab.rc('grid', color='0.50', linestyle='-', linewidth=0.1) pylab.grid(True) pylab.title(__title) keys = helper_functions.sorted_copy(plot_dict.keys()) for key in keys: i = keys.index(key) # scattering the samples in X axis, so it would be easier to see them when there are a bunch of them # at the same spot. instead of this, [i] * len(plot_dict[key]) could be used to plot them. y_positions = [((1 - (r.gauss(100, 3) /100)) + i) for x in range(0, len(plot_dict[key]))] pylab.plot(y_positions, plot_dict[key], 'o', color = group_colors[key], ms = 10, mew = 0.6, alpha = .5) b = pylab.boxplot(plot_dict[key], positions=[i + 0.35], sym=',', widths=0.2) pylab.setp(b['medians'], color=group_colors[key]) pylab.setp(b['whiskers'], color='black', alpha=0.3) pylab.setp(b['boxes'], color='black', alpha=0.3) pylab.setp(b['fliers'], color='black', alpha=0.3) pylab.setp(b['caps'], color='black', alpha=0.3) pylab.ylim(ymin=-max_val * 10 / 100, ymax=max_val) pylab.xlim(xmin=-0.75, xmax=len(plot_dict) - 0.15) pylab.xticks(pylab.arange(len(plot_dict)), keys, rotation=90) if not save_dir: pylab.show() else: pylab.savefig(os.path.join(save_dir, method + ".png")) # clean memory try: fig.clf() except: pass pylab.close('all')