def testCorr(self): xs = [1, 2, 3] ys = [3, 4, 5] cor = correlation.Corr(xs, ys) self.assertAlmostEquals(cor, 1.0) xs = [1, 2, 100] ys = [3, 4, 5] cor = correlation.Corr(xs, ys) self.assertAlmostEquals(cor, 0.8703878312633373) cor = correlation.Corr(xs, xs) self.assertAlmostEquals(cor, 1.0)
def SpearmanCorr(xs, ys): """ 斯皮尔曼秩相关系数, 对异常值和变量分布不对称 不敏感 """ xs_r = ToRanks(xs) ys_r = ToRanks(ys) return correlation.Corr(xs_r, ys_r)
def main(): resp = brfss.Respondents() resp.ReadRecords(data_dir='res') heights, weights = resp.GetHeightAndWeight() r1 = Corr(heights, weights) # 方法2 r2 = correlation.Corr(heights, weights) print(r1, "vs", r2) print("E = 1 vs", Corr(heights, heights))
def ComputeCorrelations(): resp = brfss_scatter.Respondents() resp.ReadRecords() print('Number of records:', len(resp.records)) heights, weights = resp.GetHeightWeight() pearson = correlation.Corr(heights, weights) print('Pearson correlation (weights):', pearson) log_weights = Log(weights) pearson = correlation.Corr(heights, log_weights) print('Pearson correlation (log weights):', pearson) spearman = correlation.SpearmanCorr(heights, weights) print('Spearman correlation (weights):', spearman) inter, slope = correlation.LeastSquares(heights, log_weights) print('Least squares inter, slope (log weights):', inter, slope) res = correlation.Residuals(heights, log_weights, inter, slope) R2 = correlation.CoefDetermination(log_weights, res) print('Coefficient of determination:', R2) print('sqrt(R^2):', math.sqrt(R2))
def sim_pearson(perfs, p1, p2): """ 皮尔逊相关系数(Pearson correlation coefficient) cov(X, Y) / sigmaX*sigmaY 协方差(X,Y) / X的标准方差*Y的标准方差 """ shared_items = {} for item in perfs[p1]: if item in perfs[p2]: shared_items[item] = 1 n = len(shared_items) if n == 0: return 0 # p1, p2共同的影评数据 data_p1 = [perfs[p1][it] for it in shared_items] data_p2 = [perfs[p2][it] for it in shared_items] # 计算影评均值 mu_p1 = sum(data_p1) / n mu_p2 = sum(data_p2) / n # print(mu_p1, mu_p2) # 计算标准方差 var_p1 = sum([pow(it-mu_p1, 2) for it in data_p1]) / n var_p2 = sum([pow(it-mu_p2, 2) for it in data_p2]) / n # print(var_p1, var_p2) if var_p1 == 0 or var_p2 == 0: return 0 # 计算协方差 cov = sum([(x-mu_p1)*(y-mu_p2) for x, y in zip(data_p1, data_p2)]) / n # print(cov) # 计算皮尔逊相关系数 r = cov / sqrt(var_p1*var_p2) # ============ thinkstat 方法 =============== if show: rr = correlation.Corr(data_p1, data_p2) print(r, rr) thinkplot.Clf() thinkplot.Scatter(data_p1, data_p2) thinkplot.Show() return r
def ComputeLeastSquares(ages, weights): """Computes least squares fit for ages and weights. Prints summary statistics. """ # compute the correlation between age and weight print 'Pearson correlation', correlation.Corr(ages, weights) print 'Spearman correlation', correlation.SpearmanCorr(ages, weights) # compute least squares fit inter, slope = correlation.LeastSquares(ages, weights) print '(inter, slope):', inter, slope res = correlation.Residuals(ages, weights, inter, slope) R2 = correlation.CoefDetermination(weights, res) print 'R^2', R2 print return inter, slope, R2
def Correlation(self): """Computes the correlation between log volumes and rdts.""" vs, rdts = zip(*self.initial_rdt) lvs = [math.log(v) for v in vs] return correlation.Corr(lvs, rdts)
def peasson_for_distance(xs, ys): # 计算皮尔逊相关系数, 返回1-r, 越相近距离越短 r = correlation.Corr(xs, ys) return 1 - r