def correlation(x, y):
    stdev_x = standard_deviation(x)
    stdev_y = standard_deviation(y)
    if stdev_x > 0 and stdev_y > 0:
        return covariance(x, y) / stdev_x / stdev_y
    else:
        return 0
Esempio n. 2
0
def correlation(x, y):
    standard_dev_x = standard_deviation(x)
    standard_dev_y = standard_deviation(y)
    if (standard_dev_x is 0 or standard_dev_y is 0):
        return 0
    else:
        return covariance(x,
                          y) / (standard_deviation(x) * standard_deviation(y))
def correlation(x, y):
    """
    Covariance values are sometimes difficult to interprete. For this reason, 
    correlation is a more common measure.
    
    Correlation is always unitless and always lies between -1 (perfect anti-correlation) 
    and 1 (perfect correlation). Correlation is sensitive to outliers.
    """
    stdev_x = standard_deviation(x)
    stdev_y = standard_deviation(y)
    if stdev_x > 0 and stdev_y > 0:
        return covariance(x, y) / stdev_x / stdev_y
    else:
        return 0    # if no variation, correlation is zero
def correlation(x, y):
    """
    Covariance values are sometimes difficult to interprete. For this reason, 
    correlation is a more common measure.
    
    Correlation is always unitless and always lies between -1 (perfect anti-correlation) 
    and 1 (perfect correlation). Correlation is sensitive to outliers.
    """
    stdev_x = standard_deviation(x)
    stdev_y = standard_deviation(y)
    if stdev_x > 0 and stdev_y > 0:
        return covariance(x, y) / stdev_x / stdev_y
    else:
        return 0  # if no variation, correlation is zero
Esempio n. 5
0
def scale(data_matrix):
    # data matrix e.g. [[63,67,70], [160,170.2,177.8], [150,160,171]]
    
    """returns the means and standard deviations of each column"""
    num_rows, num_cols = shape(data_matrix)
    means = [mean(get_column(data_matrix, j))
             for j in range(num_cols)]
    stdevs = [standard_deviation(get_column(data_matrix, j))
              for j in range(num_cols)]
    return means, stdevs
Esempio n. 6
0
# Repeatedly take a bootstrap sample
# If coefficient of one of the indpendent vars doesn't vary much across samples,
# then we can be confident that our estimate is relatively tight.
# If the coefficient varies greatly across samples, then we can't be at all
# confident in our estimate.
random.seed(0)
bootstrap_betas = bootstrap_statistic(
    list(zip(trainer_party_stats, trainer_badge_counts)), estimate_sample_beta,
    10)

print('bootstrap betas:')
for beta in bootstrap_betas:
    print('beta = %s' % beta)

bootstrap_standard_errors = [
    standard_deviation([beta[index] for beta in bootstrap_betas])
    for index in range(3)
]

print('standard errors: %s' % bootstrap_standard_errors)


# We can then evaluate the meaningfulness of the betas
# with the following calculations
def p_value(beta_hat_j, sigma_hat_j):
    if beta_hat_j > 0:
        return 2 * (1 - normal_cdf(beta_hat_j / sigma_hat_j))
    else:
        return 2 * normal_cdf(beta_hat_j / sigma_hat_j)

def least_squares_fit(xs, ys):
    beta = correlation(xs,
                       ys) * standard_deviation(ys) / standard_deviation(xs)
    alpha = mean(ys) - beta * mean(xs)
    return alpha, beta