예제 #1
0
    def get_sample_map_instances(self):
        analysis_id = self.request_dict['analysis_id']

        p = Meta(analysis_id)

        instances = framework.tools.helper_functions.sorted_copy(p.dirs.get_sample_map_instances())

        meta = {}

        for instance in instances:
            p.dirs.change_current_sample_map_instance(p.files, instance)
            name = open(p.files.sample_map_name_file_path).read().strip()
            sample_groups, group_colors = helper_functions.get_groups_colors_from_sample_map_file(p.files.sample_map_file_path)
            meta[instance] = {'name': name or "Noname",
                              'sample_groups': sample_groups,
                              'group_colors': group_colors}

        self.write_socket({'response': 'OK', 'instances': instances, 'meta': meta })
예제 #2
0
    def get_otu_t_p_tuples(self):
        analysis_id = self.request_dict['analysis_id']
        instance    = self.request_dict['instance']
        if self.request_dict.has_key('rank'):
            rank        = self.request_dict['rank']
        else:
            rank = None

        p = Meta(analysis_id)

        p.dirs.change_current_sample_map_instance(p.files, instance)

        sample_groups, group_colors = helper_functions.get_groups_colors_from_sample_map_file(p.files.sample_map_file_path)

        otu_t_p_tuple_dict = DeserializeFromFile(p.files.sample_map_otu_t_p_tuples_dict_file_path)

        if rank:
            self.write_socket({'response': 'OK', 'otu_t_p_tuple_list': otu_t_p_tuple_dict[rank] })
        else:
            self.write_socket({'response': 'OK', 'otu_t_p_tuple_dict': otu_t_p_tuple_dict })
예제 #3
0
def generate(samples_dict, otu_t_p_tuples_dict, sample_map_file_path, rank = "genus", save_dir = None, is_transparent = False, real_abundance = False):
    sample_groups, group_colors = helper_functions.get_groups_colors_from_sample_map_file(sample_map_file_path)

    if real_abundance:
        """if we're gonna work with real abundance, we need to find out about the 
        ymax of the y axis. to do that, first, we learn the max abundance, then,
        find out the smallest power of 10 that is larger than max_abundance.."""
        max_abundance = helper_functions.get_largest_abundance_number_in_all_samples(samples_dict)
        max_y = 1
        while 1:
            if max_y > max_abundance:
                break
            max_y *= 10

    for otu in [t[0] for t in otu_t_p_tuples_dict[rank]]:
        txt_output = ''
        plot_dict = {}
        for group in sample_groups.keys():
            plot_dict[group] = []
            for sample in sample_groups[group]:
                if samples_dict[sample][rank].has_key(otu):
                    if real_abundance:
                        plot_dict[group].append([samples_dict[sample][rank][otu], sample],)
                        txt_output += '%s\t%s\t%f\n' % (group, sample, samples_dict[sample][rank][otu])
                    else:
                        if samples_dict[sample]['tr'] == 0:
                            otu_vectors[group].append(0.0)
                        else:
                            plot_dict[group].append([samples_dict[sample][rank][otu] * 100.0 / samples_dict[sample]['tr'], sample],)
                            txt_output += '%s\t%s\t%f\n' % (group, sample, samples_dict[sample][rank][otu] * 100.0 / samples_dict[sample]['tr'])
                else:
                    plot_dict[group].append([0.0, sample],)
                    txt_output += '%s\t%s\t0.0\n' % (group, sample)

        fig = pylab.figure(figsize=(3, 6))
        if real_abundance:
            ax = pylab.axes()

        pylab.rcParams['axes.titlesize'] = 12.0
        pylab.rcParams['font.size'] = 8.0

        pylab.rcParams.update({'axes.linewidth' : 0, 'axes.axisbelow': False})
        pylab.rc('grid', color='0.50', linestyle='-', linewidth=0.1)
        pylab.grid(True)

        keys = helper_functions.sorted_copy(plot_dict.keys())

        presence = []

        for key in keys:
            i = keys.index(key)
            if real_abundance:
                """if abundance is 0.0, make it 1 so it would look better on log scale"""
                for j in range(0, len(plot_dict[key])):
                    if plot_dict[key][j][0] < 1:
                        plot_dict[key][j][0] = 1.0

            pylab.title(otu)

            
            presence.append('%.3f' % (len([t[0] for t in plot_dict[key] if t[0] > 0.01]) * 100.0 / len(plot_dict[key])) )

            # scattering the samples in X axis, so it would be easier to see them when there are a bunch of them
            # at the same spot. instead of this, i * len(plot_dict[key]) could be used to plot them.
            y_positions =  [((1 - (r.gauss(100, 3) /100)) + i) for x in range(0, len(plot_dict[key]))]

            pylab.plot(y_positions, [t[0] for t in plot_dict[key]], 'o', color = group_colors[key], ms = 10, mew = 0.6, alpha = .5)

            b = pylab.boxplot([t[0] for t in plot_dict[key]], positions=[i + 0.35], sym=',', widths=0.2)
            pylab.setp(b['medians'], color=group_colors[key])
            pylab.setp(b['whiskers'], color='black', alpha=0.3)
            pylab.setp(b['boxes'], color='black', alpha=0.3)
            pylab.setp(b['fliers'], color='black', alpha=0.3)
            pylab.setp(b['caps'], color='black', alpha=0.3)
        if real_abundance:
            ax.set_yscale('log')
            formatter = pylab.FuncFormatter(log_10_fix)
            ax.yaxis.set_major_formatter(formatter)

            pylab.xlim(xmin=-0.75, xmax=len(plot_dict) - 0.15)
            pylab.xticks(pylab.arange(len(plot_dict)), keys, rotation=90)
            pylab.ylim(ymin=1e-1, ymax=max_y)
        else:
            pylab.ylim(ymin=-5, ymax=105)
            pylab.xlim(xmin=-0.75, xmax=len(plot_dict) - 0.15)
            pylab.xticks(pylab.arange(len(plot_dict)), keys, rotation=90)
            pylab.yticks(pylab.arange(0, 101, 10))

        print '%s,%s' % (otu, ','.join(presence))

        if not save_dir:
            pylab.show()
        else:
            if real_abundance:
                pylab.savefig(os.path.join(save_dir, rank + "_" + helper_functions.get_fs_compatible_name(otu) + '_real_abundance' + '.png'), transparent = is_transparent)
                open(os.path.join(save_dir, rank + "_" + helper_functions.get_fs_compatible_name(otu) + '_real_abundance.txt'), 'w').write(txt_output)
            else:
                pylab.savefig(os.path.join(save_dir, rank + "_" + helper_functions.get_fs_compatible_name(otu) + '.png'), transparent = is_transparent)
                open(os.path.join(save_dir, rank + "_" + helper_functions.get_fs_compatible_name(otu) + '.txt'), 'w').write(txt_output)

        # clean memory
        try:
            fig.clf()
        except:
            pass
        pylab.close('all')
예제 #4
0
def get_t_p_values_dict_for_subset(samples_dict, otu_library, sample_map_file_path, ranks = None, real_abundance = False):
    """this function sorts OTUs according to students t-test values with expected mean difference of 10. this way
       we get a well sorted OTUs from the ones that have the biggest differnce in terms of percent abundance mean
       to the ones that have smaller difference. Storing this information helps to decide in what order dot plots
       should be shown to the researcher.

       Also OTUs that present in the otu library but have no value within a particular subset of samples are being
       discarded for that subset"""

    otu_t_p_tuples_dict = {}

    # because the way we create otu_library, phylum is the first and genus is the last.
    # opposite of what we have in constants.
    ranks.reverse()

    for rank in ranks:
        sample_groups, group_colors = helper_functions.get_groups_colors_from_sample_map_file(sample_map_file_path)

        temporary_list_of_tuples = []

        for otu in set([o[ranks.index(rank)] for o in otu_library]):
            otu_vectors = {}
            for group in sample_groups.keys():
                otu_vectors[group] = []
                for sample in sample_groups[group]:
                    if samples_dict[sample][rank].has_key(otu):
                        if real_abundance:
                            otu_vectors[group].append(samples_dict[sample][rank][otu])
                        else:
                            if samples_dict[sample]['tr'] == 0:
                                otu_vectors[group].append(0.0)
                            else:
                                otu_vectors[group].append(samples_dict[sample][rank][otu] * 100.0 / samples_dict[sample]['tr'])
                    else:
                        otu_vectors[group].append(0.0)

            #has more than one sample in at least one group (if every group has only one sample t-test would fail)
            has_enough_samples = sum([len(t) - 1 for t in otu_vectors.values()]) > 0

            if sum([sum(v) for v in otu_vectors.values()]) > 0.0:

                sorting_assist = max([numpy.mean(t) for t in otu_vectors.values()])

                # ^^ an OTU is presented at least once in any group
                if len(sample_groups) == 2 and has_enough_samples:
                    # if we have only two groups, go for t-test stuff

                    # t-test fails when there is no variance. ex, t_test([5.0,5.0], [0.0,0.0]) is None, None.
                    # adding a very very small number to all values seemed okay.
                    for vector in otu_vectors.values():
                            vector[0] += r.random() * 1e-6
                    t, p = t_test(otu_vectors.values()[0], otu_vectors.values()[1])
                    temporary_list_of_tuples.append((abs(sorting_assist), otu, t, p),)
                else:
                    # we have more than two groups, just return nothing for now
                    # ANOVA could be used for the rest

                    temporary_list_of_tuples.append((sorting_assist, otu, None, None),)

        otu_t_p_tuples_dict[rank] = []

        # sorting the list based on the temporary sorting assist value
        temporary_list_of_tuples.sort(reverse = 1)

        for tpl in temporary_list_of_tuples:
            otu, otu_fs = tpl[1], helper_functions.get_fs_compatible_name(tpl[1])
            if len(sample_groups) == 2 and has_enough_samples:
                otu_t_p_tuples_dict[rank].append((otu, otu_fs, "%.2f" % tpl[2], "%.2f" % tpl[3]),)
            else:
                otu_t_p_tuples_dict[rank].append((otu, otu_fs, None, None),)

    return otu_t_p_tuples_dict
예제 #5
0
def generate_for_sample_map(samples_dict, sample_map_file_path, save_dir = None, type = None, method='simpsons'):
    
    taxonomic_level = const.ranks[type][-1]

    sample_groups, group_colors = helper_functions.get_groups_colors_from_sample_map_file(sample_map_file_path)

    plot_dict = {}

    for group in sample_groups:
        plot_dict[group] = []
        for sample in sample_groups[group]:
            if method == 'simpsons':
                plot_dict[group].append(get_simpsons_diversity_index(samples_dict[sample][taxonomic_level]))
                __title = "Simpson's Diversity Index"
            if method == 'shannons':
                plot_dict[group].append(get_shannons_diversity_index(samples_dict[sample][taxonomic_level]))
                __title = "Shannon Diversity Index"

    max_val = max([max(l) for l in plot_dict.values()])
    max_val = max_val + max_val * 10 / 100

    fig = pylab.figure(figsize=(3.5, 6.5))

    pylab.rcParams['axes.titlesize'] = 12.0
    pylab.rcParams['font.size'] = 8.0

    pylab.rcParams.update({'axes.linewidth' : 0, 'axes.axisbelow': False})
    pylab.rc('grid', color='0.50', linestyle='-', linewidth=0.1)
    pylab.grid(True)

    pylab.title(__title)

    keys = helper_functions.sorted_copy(plot_dict.keys())

    for key in keys:
        i = keys.index(key)
        # scattering the samples in X axis, so it would be easier to see them when there are a bunch of them
        # at the same spot. instead of this, [i] * len(plot_dict[key]) could be used to plot them.
        y_positions =  [((1 - (r.gauss(100, 3) /100)) + i) for x in range(0, len(plot_dict[key]))]

        pylab.plot(y_positions, plot_dict[key], 'o', color = group_colors[key], ms = 10, mew = 0.6, alpha = .5)

        b = pylab.boxplot(plot_dict[key], positions=[i + 0.35], sym=',', widths=0.2)
        pylab.setp(b['medians'], color=group_colors[key])
        pylab.setp(b['whiskers'], color='black', alpha=0.3)
        pylab.setp(b['boxes'], color='black', alpha=0.3)
        pylab.setp(b['fliers'], color='black', alpha=0.3)
        pylab.setp(b['caps'], color='black', alpha=0.3)

        pylab.ylim(ymin=-max_val * 10 / 100, ymax=max_val)
        pylab.xlim(xmin=-0.75, xmax=len(plot_dict) - 0.15)
        pylab.xticks(pylab.arange(len(plot_dict)), keys, rotation=90)

    if not save_dir:
        pylab.show()
    else:
        pylab.savefig(os.path.join(save_dir, method + ".png"))

    # clean memory
    try:
        fig.clf()
    except:
        pass
    pylab.close('all')