Beispiel #1
0
    def testCorr(self):
        xs = [1, 2, 3]
        ys = [3, 4, 5]
        cor = correlation.Corr(xs, ys)
        self.assertAlmostEquals(cor, 1.0)

        xs = [1, 2, 100]
        ys = [3, 4, 5]
        cor = correlation.Corr(xs, ys)
        self.assertAlmostEquals(cor, 0.8703878312633373)

        cor = correlation.Corr(xs, xs)
        self.assertAlmostEquals(cor, 1.0)
Beispiel #2
0
def SpearmanCorr(xs, ys):
    """
    斯皮尔曼秩相关系数, 对异常值和变量分布不对称 不敏感
    """
    xs_r = ToRanks(xs)
    ys_r = ToRanks(ys)
    return correlation.Corr(xs_r, ys_r)
Beispiel #3
0
def main():
    resp = brfss.Respondents()
    resp.ReadRecords(data_dir='res')
    heights, weights = resp.GetHeightAndWeight()

    r1 = Corr(heights, weights)
    # 方法2
    r2 = correlation.Corr(heights, weights)
    print(r1, "vs", r2)

    print("E = 1 vs", Corr(heights, heights))
Beispiel #4
0
def ComputeCorrelations():
    resp = brfss_scatter.Respondents()
    resp.ReadRecords()
    print('Number of records:', len(resp.records))

    heights, weights = resp.GetHeightWeight()
    pearson = correlation.Corr(heights, weights)
    print('Pearson correlation (weights):', pearson)

    log_weights = Log(weights)
    pearson = correlation.Corr(heights, log_weights)
    print('Pearson correlation (log weights):', pearson)

    spearman = correlation.SpearmanCorr(heights, weights)
    print('Spearman correlation (weights):', spearman)

    inter, slope = correlation.LeastSquares(heights, log_weights)
    print('Least squares inter, slope (log weights):', inter, slope)

    res = correlation.Residuals(heights, log_weights, inter, slope)
    R2 = correlation.CoefDetermination(log_weights, res)
    print('Coefficient of determination:', R2)
    print('sqrt(R^2):', math.sqrt(R2))
def sim_pearson(perfs, p1, p2):
    """
    皮尔逊相关系数(Pearson correlation coefficient)
    cov(X, Y) / sigmaX*sigmaY
    协方差(X,Y) / X的标准方差*Y的标准方差
    """
    shared_items = {}
    for item in perfs[p1]:
        if item in perfs[p2]:
            shared_items[item] = 1

    n = len(shared_items)

    if n == 0: return 0 

    # p1, p2共同的影评数据
    data_p1 = [perfs[p1][it] for it in shared_items]
    data_p2 = [perfs[p2][it] for it in shared_items]

    # 计算影评均值
    mu_p1 = sum(data_p1) / n
    mu_p2 = sum(data_p2) / n
    #  print(mu_p1, mu_p2)

    # 计算标准方差
    var_p1 = sum([pow(it-mu_p1, 2) for it in data_p1]) / n
    var_p2 = sum([pow(it-mu_p2, 2) for it in data_p2]) / n
    #  print(var_p1, var_p2)

    if var_p1 == 0 or var_p2 == 0: return 0

    # 计算协方差
    cov = sum([(x-mu_p1)*(y-mu_p2) for x, y in zip(data_p1, data_p2)]) / n
    #  print(cov)

    # 计算皮尔逊相关系数
    r = cov / sqrt(var_p1*var_p2)

    # ============  thinkstat 方法 ===============

    if show:
        rr = correlation.Corr(data_p1, data_p2)
        print(r, rr)
        thinkplot.Clf()
        thinkplot.Scatter(data_p1, data_p2)
        thinkplot.Show()
    
    return r
Beispiel #6
0
def ComputeLeastSquares(ages, weights):
    """Computes least squares fit for ages and weights.

    Prints summary statistics.
    """
    # compute the correlation between age and weight
    print 'Pearson correlation', correlation.Corr(ages, weights)
    print 'Spearman correlation', correlation.SpearmanCorr(ages, weights)

    # compute least squares fit
    inter, slope = correlation.LeastSquares(ages, weights)
    print '(inter, slope):', inter, slope

    res = correlation.Residuals(ages, weights, inter, slope)
    R2 = correlation.CoefDetermination(weights, res)

    print 'R^2', R2
    print
    return inter, slope, R2
Beispiel #7
0
 def Correlation(self):
     """Computes the correlation between log volumes and rdts."""
     vs, rdts = zip(*self.initial_rdt)
     lvs = [math.log(v) for v in vs]
     return correlation.Corr(lvs, rdts)
Beispiel #8
0
def peasson_for_distance(xs, ys):
    # 计算皮尔逊相关系数, 返回1-r, 越相近距离越短
    r = correlation.Corr(xs, ys)
    return 1 - r