Exemple #1
0
def generate_dict(samples_dict, number_of_folds = 15000):
    taxonomic_levels = ['genus', 'family', 'order', 'class', 'phylum']
    samples = helper_functions.sorted_copy(samples_dict.keys())
    rarefaction_dict = {}
    for sample in samples:
        print "sample name: ", sample
        rarefaction_dict[sample] = {}
        for taxonomic_level in taxonomic_levels:
            species_in_the_sample = []
            OTUs = samples_dict[sample][taxonomic_level].keys()

            for i in range(0, len(OTUs)):
                species_in_the_sample += [i] * samples_dict[sample][taxonomic_level][OTUs[i]]

            print "level: ", taxonomic_level, "sequences: ", len(species_in_the_sample), "folds: ", number_of_folds
            rarefaction_values_lists = []

            for i in range(0, number_of_folds):
                random.shuffle(species_in_the_sample)
                rarefaction_values_lists.append(get_rarefaction_values(species_in_the_sample))

            means_and_stdev_tmp  = []
            for i in range(0, len(rarefaction_values_lists[0])):
                means_and_stdev_tmp.append((scipy.mean([x[i] for x in rarefaction_values_lists]), scipy.std([x[i] for x in rarefaction_values_lists])),)
            rarefaction_dict[sample][taxonomic_level] = means_and_stdev_tmp
    return rarefaction_dict
Exemple #2
0
def generate(samples_dict, img_save_path = None, data_save_path = None, type = None, method='simpsons'):

    taxonomic_level = const.ranks[type][-1]
    
    samples = helper_functions.sorted_copy(samples_dict.keys())
    samples_diversity_index_list = []



    if method == 'simpsons':
        samples_diversity_index_list = [get_simpsons_diversity_index(samples_dict[sample][taxonomic_level]) for sample in samples]
        lbl = "Simpson's Sample Diversity Index"
        c = color='#ADADEF'
    elif method == 'shannons':
        samples_diversity_index_list = [get_shannons_diversity_index(samples_dict[sample][taxonomic_level]) for sample in samples]
        lbl = "Shannon Diversity Index"
        c = color='#66AA66'
    else:
        return None


    # store diveristy indices in a text file as well
    if data_save_path:
        f = open(data_save_path, 'w')
        for i in range(0, len(samples)):
            f.write("%s\t%f\n" % (samples[i], samples_diversity_index_list[i]))
        f.close()

    width = len(samples) / 5
    if width < 5:
        width = 5

    if width > 15:
        width = 15

    fig = pylab.figure(figsize=(width, 4))

    pylab.rcParams.update({'axes.linewidth' : 0, 'axes.axisbelow': False})
    pylab.rc('grid', color='0.80', linestyle='-', linewidth=0.1)
    pylab.grid(True)

    pos = pylab.arange(len(samples))+.5
    pylab.bar(pos, samples_diversity_index_list, align='center', color=c, linewidth=0.1)
    pylab.plot([0], [1], '^', visible = False)
    pylab.xticks(pos, samples, rotation=90, size='xx-small')
    pylab.xlim(xmax=len(samples))
    pylab.yticks(size='xx-small')

    pylab.ylabel(lbl, fontsize = 12)

    if img_save_path:
        pylab.savefig(img_save_path)
    else:
        pylab.show()
Exemple #3
0
def generate(samples_dict, otu_t_p_tuples_dict, sample_map_file_path, rank = "genus", save_dir = None, is_transparent = False, real_abundance = False):
    sample_groups, group_colors = helper_functions.get_groups_colors_from_sample_map_file(sample_map_file_path)

    if real_abundance:
        """if we're gonna work with real abundance, we need to find out about the 
        ymax of the y axis. to do that, first, we learn the max abundance, then,
        find out the smallest power of 10 that is larger than max_abundance.."""
        max_abundance = helper_functions.get_largest_abundance_number_in_all_samples(samples_dict)
        max_y = 1
        while 1:
            if max_y > max_abundance:
                break
            max_y *= 10

    for otu in [t[0] for t in otu_t_p_tuples_dict[rank]]:
        txt_output = ''
        plot_dict = {}
        for group in sample_groups.keys():
            plot_dict[group] = []
            for sample in sample_groups[group]:
                if samples_dict[sample][rank].has_key(otu):
                    if real_abundance:
                        plot_dict[group].append([samples_dict[sample][rank][otu], sample],)
                        txt_output += '%s\t%s\t%f\n' % (group, sample, samples_dict[sample][rank][otu])
                    else:
                        if samples_dict[sample]['tr'] == 0:
                            otu_vectors[group].append(0.0)
                        else:
                            plot_dict[group].append([samples_dict[sample][rank][otu] * 100.0 / samples_dict[sample]['tr'], sample],)
                            txt_output += '%s\t%s\t%f\n' % (group, sample, samples_dict[sample][rank][otu] * 100.0 / samples_dict[sample]['tr'])
                else:
                    plot_dict[group].append([0.0, sample],)
                    txt_output += '%s\t%s\t0.0\n' % (group, sample)

        fig = pylab.figure(figsize=(3, 6))
        if real_abundance:
            ax = pylab.axes()

        pylab.rcParams['axes.titlesize'] = 12.0
        pylab.rcParams['font.size'] = 8.0

        pylab.rcParams.update({'axes.linewidth' : 0, 'axes.axisbelow': False})
        pylab.rc('grid', color='0.50', linestyle='-', linewidth=0.1)
        pylab.grid(True)

        keys = helper_functions.sorted_copy(plot_dict.keys())

        presence = []

        for key in keys:
            i = keys.index(key)
            if real_abundance:
                """if abundance is 0.0, make it 1 so it would look better on log scale"""
                for j in range(0, len(plot_dict[key])):
                    if plot_dict[key][j][0] < 1:
                        plot_dict[key][j][0] = 1.0

            pylab.title(otu)

            
            presence.append('%.3f' % (len([t[0] for t in plot_dict[key] if t[0] > 0.01]) * 100.0 / len(plot_dict[key])) )

            # scattering the samples in X axis, so it would be easier to see them when there are a bunch of them
            # at the same spot. instead of this, i * len(plot_dict[key]) could be used to plot them.
            y_positions =  [((1 - (r.gauss(100, 3) /100)) + i) for x in range(0, len(plot_dict[key]))]

            pylab.plot(y_positions, [t[0] for t in plot_dict[key]], 'o', color = group_colors[key], ms = 10, mew = 0.6, alpha = .5)

            b = pylab.boxplot([t[0] for t in plot_dict[key]], positions=[i + 0.35], sym=',', widths=0.2)
            pylab.setp(b['medians'], color=group_colors[key])
            pylab.setp(b['whiskers'], color='black', alpha=0.3)
            pylab.setp(b['boxes'], color='black', alpha=0.3)
            pylab.setp(b['fliers'], color='black', alpha=0.3)
            pylab.setp(b['caps'], color='black', alpha=0.3)
        if real_abundance:
            ax.set_yscale('log')
            formatter = pylab.FuncFormatter(log_10_fix)
            ax.yaxis.set_major_formatter(formatter)

            pylab.xlim(xmin=-0.75, xmax=len(plot_dict) - 0.15)
            pylab.xticks(pylab.arange(len(plot_dict)), keys, rotation=90)
            pylab.ylim(ymin=1e-1, ymax=max_y)
        else:
            pylab.ylim(ymin=-5, ymax=105)
            pylab.xlim(xmin=-0.75, xmax=len(plot_dict) - 0.15)
            pylab.xticks(pylab.arange(len(plot_dict)), keys, rotation=90)
            pylab.yticks(pylab.arange(0, 101, 10))

        print '%s,%s' % (otu, ','.join(presence))

        if not save_dir:
            pylab.show()
        else:
            if real_abundance:
                pylab.savefig(os.path.join(save_dir, rank + "_" + helper_functions.get_fs_compatible_name(otu) + '_real_abundance' + '.png'), transparent = is_transparent)
                open(os.path.join(save_dir, rank + "_" + helper_functions.get_fs_compatible_name(otu) + '_real_abundance.txt'), 'w').write(txt_output)
            else:
                pylab.savefig(os.path.join(save_dir, rank + "_" + helper_functions.get_fs_compatible_name(otu) + '.png'), transparent = is_transparent)
                open(os.path.join(save_dir, rank + "_" + helper_functions.get_fs_compatible_name(otu) + '.txt'), 'w').write(txt_output)

        # clean memory
        try:
            fig.clf()
        except:
            pass
        pylab.close('all')
Exemple #4
0
def generate_for_sample_map(samples_dict, sample_map_file_path, save_dir = None, type = None, method='simpsons'):
    
    taxonomic_level = const.ranks[type][-1]

    sample_groups, group_colors = helper_functions.get_groups_colors_from_sample_map_file(sample_map_file_path)

    plot_dict = {}

    for group in sample_groups:
        plot_dict[group] = []
        for sample in sample_groups[group]:
            if method == 'simpsons':
                plot_dict[group].append(get_simpsons_diversity_index(samples_dict[sample][taxonomic_level]))
                __title = "Simpson's Diversity Index"
            if method == 'shannons':
                plot_dict[group].append(get_shannons_diversity_index(samples_dict[sample][taxonomic_level]))
                __title = "Shannon Diversity Index"

    max_val = max([max(l) for l in plot_dict.values()])
    max_val = max_val + max_val * 10 / 100

    fig = pylab.figure(figsize=(3.5, 6.5))

    pylab.rcParams['axes.titlesize'] = 12.0
    pylab.rcParams['font.size'] = 8.0

    pylab.rcParams.update({'axes.linewidth' : 0, 'axes.axisbelow': False})
    pylab.rc('grid', color='0.50', linestyle='-', linewidth=0.1)
    pylab.grid(True)

    pylab.title(__title)

    keys = helper_functions.sorted_copy(plot_dict.keys())

    for key in keys:
        i = keys.index(key)
        # scattering the samples in X axis, so it would be easier to see them when there are a bunch of them
        # at the same spot. instead of this, [i] * len(plot_dict[key]) could be used to plot them.
        y_positions =  [((1 - (r.gauss(100, 3) /100)) + i) for x in range(0, len(plot_dict[key]))]

        pylab.plot(y_positions, plot_dict[key], 'o', color = group_colors[key], ms = 10, mew = 0.6, alpha = .5)

        b = pylab.boxplot(plot_dict[key], positions=[i + 0.35], sym=',', widths=0.2)
        pylab.setp(b['medians'], color=group_colors[key])
        pylab.setp(b['whiskers'], color='black', alpha=0.3)
        pylab.setp(b['boxes'], color='black', alpha=0.3)
        pylab.setp(b['fliers'], color='black', alpha=0.3)
        pylab.setp(b['caps'], color='black', alpha=0.3)

        pylab.ylim(ymin=-max_val * 10 / 100, ymax=max_val)
        pylab.xlim(xmin=-0.75, xmax=len(plot_dict) - 0.15)
        pylab.xticks(pylab.arange(len(plot_dict)), keys, rotation=90)

    if not save_dir:
        pylab.show()
    else:
        pylab.savefig(os.path.join(save_dir, method + ".png"))

    # clean memory
    try:
        fig.clf()
    except:
        pass
    pylab.close('all')