예제 #1
0
def calc():
    # read data
    rawdata = readcsv(DATA_PATH)

    # calculation
    DATA = []
    S = []
    for i in xrange(0, len(rawdata[0])):
        d = [x[i] for x in rawdata]
        DATA.append(d)
        S.append(Stat(d))
    data = list(itertools.chain.from_iterable(DATA)) # flattend list from DATA 
    s = Stat(data)
    SUM_OF_SQUARES = sum((x.sum_of_squares for x in S))
    SS = [((x.mean-s.mean)**2)*x.size for x in S]
    ss = sum(SS)
    sum_of_squares = (ss, SUM_OF_SQUARES, s.sum_of_squares)
    degrees = (len(S)-1, sum([x.size-1 for x in S]), s.size-1)
    mean_squares = [sum_of_squares[i]/degrees[i] for i in (0, 1)]
    F_value = mean_squares[0] / mean_squares[1]
    F_dist = F_distribution(degrees[0], degrees[1])
    F_dist_001 = F_distribution(degrees[0], degrees[1], alpha=0.01)

    # output
    print '[Pure Python]'
    print 'F value:', F_value
    print 'F dist(0.05):', F_dist, abs(F_value)>F_dist
    print 'F dist(0.01):', F_dist_001, abs(F_value)>F_dist_001
    print 
def calc():
    # read data
    data = readcsv(DATA_PATH)
    d0 = [x[1] for x in data]
    d1 = [x[2] for x in data]

    # calculation
    s0 = Stat(d0)
    s1 = Stat(d1)
    variance_estimation = (s0.variance*s0.size+s1.variance*s1.size)/((s0.size-1)+(s1.size-1))
    standard_error = math.sqrt(variance_estimation*((1.0/s0.size)+(1.0/s1.size)))
    df = (s0.size - 1) + (s1.size - 1)
    t_dist = t_distribution(df, alpha=0.05)
    t_dist_001 = t_distribution(df, alpha=0.01)
    diff_mean = s0.mean - s1. mean
    confidence_interval = (diff_mean-standard_error*t_dist,
                           diff_mean+standard_error*t_dist)
    t_value = diff_mean / standard_error

    # output
    print '[Pure Python]'
    print 't value:', t_value
    print 't dist(0.05):', t_dist, abs(t_value)>t_dist
    print 't dist(0.01):', t_dist_001, abs(t_value)>t_dist_001
    print 
def calc():
    # read data
    rawdata = readcsv(DATA_PATH)

    # pre-calculation
    x = [z[1] for z in rawdata]
    y = [z[2] for z in rawdata]
    
    # calculation
    cor, a, b = linear_regression(x, y)
    
    # output
    print '[Pure Python]'
    print 'Y = %f + %fX (r=%0.2f)' % (a, b, cor)
    print
    return x, y, a, b, cor
예제 #4
0
def calc():
    # read data
    data = readcsv(DATA_PATH)
    d = [x[1]-x[2] for x in data]

    # calculation
    s = Stat(d)
    df = s.size - 1
    t_dist = t_distribution(df, alpha=0.05)
    t_dist_001 = t_distribution(df, alpha=0.01)
    t_value = s.mean / s.standard_error

    # output
    print '[Pure Python]'
    print 't value:', t_value
    print 't dist(0.05):', t_dist, abs(t_value)>t_dist
    print 't dist(0.01):', t_dist_001, abs(t_value)>t_dist_001
    print