コード例 #1
0
def run_single_correlation(OTU, category_info, otu_sample_info):
    """runs pearson correlation  on the designated OTU
    """
    result = {}
    #get a list of values for each category
    OTU_abundance_values = []
    category_values = []
    sample_info = otu_sample_info[OTU]
    for sample in category_info:
        # even if this OTU is not observed, we can use count=0
        if sample in sample_info:
            count = sample_info[sample]
        else:
            count = 0
        try:
            cat_val = float(category_info[sample])
            category_values.append(cat_val)
            OTU_abundance_values.append(float(count))
        except ValueError:
            raise ValueError(
                "The category values must be numeric to use the correlation option"
            )
    r, prob = correlation(Numbers(OTU_abundance_values),
                          Numbers(category_values))
    return r, prob
コード例 #2
0
    def getPairwiseParam(self, param, summary_function="mean"):
        """Return the pairwise statistic estimates as a dictionary keyed by
        (seq1, seq2)
        
        Arguments:
            - param: name of a parameter in est_params or 'length'
            - summary_function: a string naming the function used for
              estimating param from threeway distances. Valid values are 'mean'
              (default) and 'median'."""
        summary_func = summary_function.capitalize()
        pairwise_stats = {}
        assert param in self.__est_params + ['length'], \
                "unrecognised param %s" % param
        if self.__threeway and param == 'length':
            pairwise = self.__make_pairwise_comparison_sets()
            # get all the distances involving this pair
            for a, b in pairwise:
                values = Numbers()
                for comp_names, param_vals in self.__param_ests.items():
                    if a in comp_names and b in comp_names:
                        values.append(param_vals[param][a] + \
                                    param_vals[param][b])

                pairwise_stats[(a, b)] = getattr(values, summary_func)
        else:
            # no additional processing of the distances is required

            for comp_names, param_vals in self.__param_ests.items():
                pairwise_stats[comp_names] = param_vals[param]

        return pairwise_stats
コード例 #3
0
ファイル: distance.py プロジェクト: Skylersun/pycogent
 def getPairwiseParam(self, param, summary_function="mean"):
     """Return the pairwise statistic estimates as a dictionary keyed by
     (seq1, seq2)
     
     Arguments:
         - param: name of a parameter in est_params or 'length'
         - summary_function: a string naming the function used for
           estimating param from threeway distances. Valid values are 'mean'
           (default) and 'median'."""
     summary_func = summary_function.capitalize()
     pairwise_stats = {}
     assert param in self.__est_params + ['length'], \
             "unrecognised param %s" % param
     if self.__threeway and param == 'length':
         pairwise = self.__make_pairwise_comparison_sets()
         # get all the distances involving this pair
         for a, b in pairwise:
             values = Numbers()
             for comp_names, param_vals in self.__param_ests.items():
                 if a in comp_names and b in comp_names:
                     values.append(param_vals[param][a] + \
                                 param_vals[param][b])
             
             pairwise_stats[(a,b)] = getattr(values, summary_func)
     else:
         # no additional processing of the distances is required
         
         for comp_names, param_vals in self.__param_ests.items():
             pairwise_stats[comp_names] = param_vals[param]
         
     return pairwise_stats
コード例 #4
0
ファイル: test_special.py プロジェクト: carze/clovr-base
 def test_Numbers(self):
     """quantiles should be correct"""
     num = Numbers(range(1,11))
     self.assertFloatEqual(num.quantile(.1), 1.9)
     self.assertFloatEqual(num.quantile(.2), 2.8)
     self.assertFloatEqual(num.quantile(.25), 3.25)
     self.assertFloatEqual(num.Median, 5.5)
     self.assertFloatEqual(num.quantile(.75), 7.75)
     self.assertFloatEqual(num.quantile(.77), 7.93)
コード例 #5
0
 def test_Numbers(self):
     """quantiles should be correct"""
     num = Numbers(range(1, 11))
     self.assertFloatEqual(num.quantile(.1), 1.9)
     self.assertFloatEqual(num.quantile(.2), 2.8)
     self.assertFloatEqual(num.quantile(.25), 3.25)
     self.assertFloatEqual(num.Median, 5.5)
     self.assertFloatEqual(num.quantile(.75), 7.75)
     self.assertFloatEqual(num.quantile(.77), 7.93)
コード例 #6
0
ファイル: distance.py プロジェクト: miklou/pycogent
def get_pairwise_distance_from_triad(data, summary_function="mean"):
    """returns pairwise distances from lengths estimated from triads
    
    Arguments:
        - data: a dict keyed as {(a,b,c): {'length': 'a': val1, 'b', ...}}
        - summary_function: a string naming the function used for
          estimating param from threeway distances. Valid values are 'mean'
          (default) and 'median'.
    """
    summary_func = summary_function.capitalize()
    pairwise_stats = {}
    lengths = {}
    for key in data:
        a, b, c = key
        for x, y in [(a,b), (a,c), (b,c)]:
            length = data[key]['length'][x] + data[key]['length'][y]
            try:
                lengths[(x,y)].append(length)
            except KeyError:
                lengths[(x,y)] = [length]
    
    # get all the distances involving this pair
    for pair in lengths:
        values = Numbers(lengths[pair])
        pairwise_stats[pair] = getattr(values, summary_func)
    
    return pairwise_stats
コード例 #7
0
 def getParamValues(self, param, **kwargs):
     """Returns a Numbers object with all estimated values of param.
     
     Arguments:
         - param: name of a parameter in est_params or 'length'
         - **kwargs: arguments passed to getPairwiseParam"""
     ests = self.getPairwiseParam(param, **kwargs)
     return Numbers(ests.values())
コード例 #8
0
ファイル: test_test.py プロジェクト: carze/clovr-base
 def test_ANOVA_one_way(self):
     """ANOVA one way returns same values as ANOVA on a stats package
     """
     g1 = Numbers([10.0, 11.0, 10.0, 5.0, 6.0])
     g2 = Numbers([1.0, 2.0, 3.0, 4.0, 1.0, 2.0])
     g3 = Numbers([6.0, 7.0, 5.0, 6.0, 7.0])
     i = [g1, g2, g3]
     dfn, dfd, F, between_MS, within_MS, group_means, prob = ANOVA_one_way(
         i)
     self.assertEqual(dfn, 2)
     self.assertEqual(dfd, 13)
     self.assertFloatEqual(F, 18.565450643776831)
     self.assertFloatEqual(between_MS, 55.458333333333343)
     self.assertFloatEqual(within_MS, 2.9871794871794868)
     self.assertFloatEqual(
         group_means,
         [8.4000000000000004, 2.1666666666666665, 6.2000000000000002])
     self.assertFloatEqual(prob, 0.00015486238993089464)
コード例 #9
0
    def codons(self, genetic_code=SGC, codon_usage=_equal_codons):
        """Predicts most likely set of codon frequencies.

        Optionally uses genetic_code (to figure out which codons belong
        with each amino acid), and codon_usage (to get most likely codons for
        each amino acid). Defaults are the standard genetic code and unbiased
        codon frequencies.
        """
        result = {}
        normalized = Freqs(self)
        normalized.normalize()
        for aa, aa_freq in list(normalized.items()):
            curr_codons = [c.upper().replace('T','U') for c in genetic_code[aa]]
            if not curr_codons:
                continue    #code might be missing some amino acids?
            curr_codon_freqs = Numbers([codon_usage[c] for c in curr_codons])
            curr_codon_freqs.normalize()
            for codon, c_freq in zip(curr_codons, curr_codon_freqs):
                result[codon] = c_freq * aa_freq
        return CodonUsage(result, self.info, genetic_code)
コード例 #10
0
ファイル: usage.py プロジェクト: GavinHuttley/pycogent
    def codons(self, genetic_code=SGC, codon_usage=_equal_codons):
        """Predicts most likely set of codon frequencies.

        Optionally uses genetic_code (to figure out which codons belong
        with each amino acid), and codon_usage (to get most likely codons for 
        each amino acid). Defaults are the standard genetic code and unbiased 
        codon frequencies.
        """
        result = {}
        normalized = Freqs(self)
        normalized.normalize()
        for aa, aa_freq in normalized.items():
            curr_codons = [c.upper().replace('T','U') for c in genetic_code[aa]]
            if not curr_codons:
                continue    #code might be missing some amino acids?
            curr_codon_freqs = Numbers([codon_usage[c] for c in curr_codons])
            curr_codon_freqs.normalize()
            for codon, c_freq in zip(curr_codons, curr_codon_freqs):
                result[codon] = c_freq * aa_freq
        return CodonUsage(result, self.info, genetic_code)
コード例 #11
0
def run_single_ANOVA(OTU, category_info, otu_sample_info, category_values):
    """runs ANOVA on the designated OTU
    """
    result = {}
    #get a list of values for each category
    values = []
    for category in category_values:
        values.append(Numbers([]))
    sample_info = otu_sample_info[OTU]
    for sample in category_info:
        if sample in sample_info:
            count = sample_info[sample]
        else:
            count = 0
        category = category_info[sample]
        index = category_values.index(category)
        values[index].append(count)
    dfn, dfd, F, between_MS, within_MS, group_means, prob = ANOVA_one_way(
        values)
    return group_means, prob
コード例 #12
0
def run_single_ANOVA(OTU, category_info, otu_table, category_values):
    """runs ANOVA on the designated OTU"""
    result = {}
    #get a list of values for each category
    values = []
    for category in category_values:
        values.append(Numbers([]))
    sample_data = otu_table.observationData(OTU)
    for sample in category_info:
        if sample in otu_table.SampleIds:
            sample_index = otu_table.SampleIds.index(sample)
            count = sample_data[sample_index]
            category = category_info[sample]
            index = category_values.index(category)
            values[index].append(count)
    #    else:
    #        print "Warning " + sample + "is in the category mapping file " +\
    #            "but not the OTU table"
    try:
        dfn, dfd, F, between_MS, within_MS, group_means, prob = ANOVA_one_way(
            values)
        return group_means, prob
    except ValueError:
        #set the p-value to 'diff' if the variances are 0.0 (within rounding
        #error) and the means are not all the same. If the means are all
        #the same and the variances are 0.0, set the p-value to 1
        group_means = []
        group_variances = []
        for i in values:
            group_means.append(i.Mean)
            group_variances.append(i.Variance)
        group_means = set(group_means)
        if sum(group_variances) < 1e-21 and len(group_means) > 1:
            prob = 0.0
        else:
            prob = 1.0
        return group_means, prob
コード例 #13
0
def run_single_correlation(OTU_abundance_values, category_values):
    """runs pearson correlation  on the designated OTU
    """
    return correlation(Numbers(category_values), Numbers(OTU_abundance_values))