def run_single_correlation(OTU, category_info, otu_sample_info): """runs pearson correlation on the designated OTU """ result = {} #get a list of values for each category OTU_abundance_values = [] category_values = [] sample_info = otu_sample_info[OTU] for sample in category_info: # even if this OTU is not observed, we can use count=0 if sample in sample_info: count = sample_info[sample] else: count = 0 try: cat_val = float(category_info[sample]) category_values.append(cat_val) OTU_abundance_values.append(float(count)) except ValueError: raise ValueError( "The category values must be numeric to use the correlation option" ) r, prob = correlation(Numbers(OTU_abundance_values), Numbers(category_values)) return r, prob
def getPairwiseParam(self, param, summary_function="mean"): """Return the pairwise statistic estimates as a dictionary keyed by (seq1, seq2) Arguments: - param: name of a parameter in est_params or 'length' - summary_function: a string naming the function used for estimating param from threeway distances. Valid values are 'mean' (default) and 'median'.""" summary_func = summary_function.capitalize() pairwise_stats = {} assert param in self.__est_params + ['length'], \ "unrecognised param %s" % param if self.__threeway and param == 'length': pairwise = self.__make_pairwise_comparison_sets() # get all the distances involving this pair for a, b in pairwise: values = Numbers() for comp_names, param_vals in self.__param_ests.items(): if a in comp_names and b in comp_names: values.append(param_vals[param][a] + \ param_vals[param][b]) pairwise_stats[(a, b)] = getattr(values, summary_func) else: # no additional processing of the distances is required for comp_names, param_vals in self.__param_ests.items(): pairwise_stats[comp_names] = param_vals[param] return pairwise_stats
def getPairwiseParam(self, param, summary_function="mean"): """Return the pairwise statistic estimates as a dictionary keyed by (seq1, seq2) Arguments: - param: name of a parameter in est_params or 'length' - summary_function: a string naming the function used for estimating param from threeway distances. Valid values are 'mean' (default) and 'median'.""" summary_func = summary_function.capitalize() pairwise_stats = {} assert param in self.__est_params + ['length'], \ "unrecognised param %s" % param if self.__threeway and param == 'length': pairwise = self.__make_pairwise_comparison_sets() # get all the distances involving this pair for a, b in pairwise: values = Numbers() for comp_names, param_vals in self.__param_ests.items(): if a in comp_names and b in comp_names: values.append(param_vals[param][a] + \ param_vals[param][b]) pairwise_stats[(a,b)] = getattr(values, summary_func) else: # no additional processing of the distances is required for comp_names, param_vals in self.__param_ests.items(): pairwise_stats[comp_names] = param_vals[param] return pairwise_stats
def test_Numbers(self): """quantiles should be correct""" num = Numbers(range(1,11)) self.assertFloatEqual(num.quantile(.1), 1.9) self.assertFloatEqual(num.quantile(.2), 2.8) self.assertFloatEqual(num.quantile(.25), 3.25) self.assertFloatEqual(num.Median, 5.5) self.assertFloatEqual(num.quantile(.75), 7.75) self.assertFloatEqual(num.quantile(.77), 7.93)
def test_Numbers(self): """quantiles should be correct""" num = Numbers(range(1, 11)) self.assertFloatEqual(num.quantile(.1), 1.9) self.assertFloatEqual(num.quantile(.2), 2.8) self.assertFloatEqual(num.quantile(.25), 3.25) self.assertFloatEqual(num.Median, 5.5) self.assertFloatEqual(num.quantile(.75), 7.75) self.assertFloatEqual(num.quantile(.77), 7.93)
def get_pairwise_distance_from_triad(data, summary_function="mean"): """returns pairwise distances from lengths estimated from triads Arguments: - data: a dict keyed as {(a,b,c): {'length': 'a': val1, 'b', ...}} - summary_function: a string naming the function used for estimating param from threeway distances. Valid values are 'mean' (default) and 'median'. """ summary_func = summary_function.capitalize() pairwise_stats = {} lengths = {} for key in data: a, b, c = key for x, y in [(a,b), (a,c), (b,c)]: length = data[key]['length'][x] + data[key]['length'][y] try: lengths[(x,y)].append(length) except KeyError: lengths[(x,y)] = [length] # get all the distances involving this pair for pair in lengths: values = Numbers(lengths[pair]) pairwise_stats[pair] = getattr(values, summary_func) return pairwise_stats
def getParamValues(self, param, **kwargs): """Returns a Numbers object with all estimated values of param. Arguments: - param: name of a parameter in est_params or 'length' - **kwargs: arguments passed to getPairwiseParam""" ests = self.getPairwiseParam(param, **kwargs) return Numbers(ests.values())
def test_ANOVA_one_way(self): """ANOVA one way returns same values as ANOVA on a stats package """ g1 = Numbers([10.0, 11.0, 10.0, 5.0, 6.0]) g2 = Numbers([1.0, 2.0, 3.0, 4.0, 1.0, 2.0]) g3 = Numbers([6.0, 7.0, 5.0, 6.0, 7.0]) i = [g1, g2, g3] dfn, dfd, F, between_MS, within_MS, group_means, prob = ANOVA_one_way( i) self.assertEqual(dfn, 2) self.assertEqual(dfd, 13) self.assertFloatEqual(F, 18.565450643776831) self.assertFloatEqual(between_MS, 55.458333333333343) self.assertFloatEqual(within_MS, 2.9871794871794868) self.assertFloatEqual( group_means, [8.4000000000000004, 2.1666666666666665, 6.2000000000000002]) self.assertFloatEqual(prob, 0.00015486238993089464)
def codons(self, genetic_code=SGC, codon_usage=_equal_codons): """Predicts most likely set of codon frequencies. Optionally uses genetic_code (to figure out which codons belong with each amino acid), and codon_usage (to get most likely codons for each amino acid). Defaults are the standard genetic code and unbiased codon frequencies. """ result = {} normalized = Freqs(self) normalized.normalize() for aa, aa_freq in list(normalized.items()): curr_codons = [c.upper().replace('T','U') for c in genetic_code[aa]] if not curr_codons: continue #code might be missing some amino acids? curr_codon_freqs = Numbers([codon_usage[c] for c in curr_codons]) curr_codon_freqs.normalize() for codon, c_freq in zip(curr_codons, curr_codon_freqs): result[codon] = c_freq * aa_freq return CodonUsage(result, self.info, genetic_code)
def codons(self, genetic_code=SGC, codon_usage=_equal_codons): """Predicts most likely set of codon frequencies. Optionally uses genetic_code (to figure out which codons belong with each amino acid), and codon_usage (to get most likely codons for each amino acid). Defaults are the standard genetic code and unbiased codon frequencies. """ result = {} normalized = Freqs(self) normalized.normalize() for aa, aa_freq in normalized.items(): curr_codons = [c.upper().replace('T','U') for c in genetic_code[aa]] if not curr_codons: continue #code might be missing some amino acids? curr_codon_freqs = Numbers([codon_usage[c] for c in curr_codons]) curr_codon_freqs.normalize() for codon, c_freq in zip(curr_codons, curr_codon_freqs): result[codon] = c_freq * aa_freq return CodonUsage(result, self.info, genetic_code)
def run_single_ANOVA(OTU, category_info, otu_sample_info, category_values): """runs ANOVA on the designated OTU """ result = {} #get a list of values for each category values = [] for category in category_values: values.append(Numbers([])) sample_info = otu_sample_info[OTU] for sample in category_info: if sample in sample_info: count = sample_info[sample] else: count = 0 category = category_info[sample] index = category_values.index(category) values[index].append(count) dfn, dfd, F, between_MS, within_MS, group_means, prob = ANOVA_one_way( values) return group_means, prob
def run_single_ANOVA(OTU, category_info, otu_table, category_values): """runs ANOVA on the designated OTU""" result = {} #get a list of values for each category values = [] for category in category_values: values.append(Numbers([])) sample_data = otu_table.observationData(OTU) for sample in category_info: if sample in otu_table.SampleIds: sample_index = otu_table.SampleIds.index(sample) count = sample_data[sample_index] category = category_info[sample] index = category_values.index(category) values[index].append(count) # else: # print "Warning " + sample + "is in the category mapping file " +\ # "but not the OTU table" try: dfn, dfd, F, between_MS, within_MS, group_means, prob = ANOVA_one_way( values) return group_means, prob except ValueError: #set the p-value to 'diff' if the variances are 0.0 (within rounding #error) and the means are not all the same. If the means are all #the same and the variances are 0.0, set the p-value to 1 group_means = [] group_variances = [] for i in values: group_means.append(i.Mean) group_variances.append(i.Variance) group_means = set(group_means) if sum(group_variances) < 1e-21 and len(group_means) > 1: prob = 0.0 else: prob = 1.0 return group_means, prob
def run_single_correlation(OTU_abundance_values, category_values): """runs pearson correlation on the designated OTU """ return correlation(Numbers(category_values), Numbers(OTU_abundance_values))