def scale(data_matrix):
    num_rows, num_cols = shape(data_matrix)
    means = [mean(get_column(data_matrix,j))
             for j in range(num_cols)]
    stdevs = [standard_deviation(get_column(data_matrix,j))
              for j in range(num_cols)]
    return means, stdevs
Ejemplo n.º 2
0
def least_squares_fit(x, y):
    beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x)
    alpha = mean(y) - beta * mean(x)
    return alpha, beta
def random_distances(dim, num_pairs):
    return [
        distance(random_point(dim), random_point(dim))
        for _ in range(num_pairs)
    ]


dimensions = range(1, 101)

avg_distances = []
min_distances = []

random.seed(0)
for dim in dimensions:
    distances = random_distances(dim, 10000)  # 10,000 random pairs
    avg_distances.append(mean(distances))  # track the average
    min_distances.append(min(distances))  # track the minimum
    print(dim, min(distances), mean(distances),
          min(distances) / mean(distances))
    print("\n")

min_avg_ratio = [
    min_dist / avg_dist
    for min_dist, avg_dist in zip(min_distances, avg_distances)
]

plt.plot(dimensions, avg_distances, color='green', linestyle='solid')
plt.plot(dimensions, min_distances, color='blue', linestyle='solid')
plt.title("10000 Random Distances")
plt.xlabel("# of dimensions")
plt.show()
def de_mean(x):
    """translate x by subtracting its mean (so the result has mean 0)"""
    x_bar = bso.mean(x)
    return [x_i - x_bar for x_i in x]
        18, 16, 13, 12, 19, 19, 21, 10, 5, 10, 11, 13, 14, 15, 11, 10, 16, 9,
        10, 10, 10, 10, 7, 7, 5, 7, 4, 9, 10, 11, 14, 14, 13, 10, 9, 7, 4, 4,
        7, 9, 1, 1, 10, 10, 11, 4, 5, 7, 1, 1
    ]
    total_balls_faced = sum(balls_per_innings)
    total_runs_scored = sum(bso.runs_scored)

    print("Total balls faced: ", total_balls_faced)
    print("Total runs: ", total_runs_scored)

    print("Career Strike rate: ",
          strike_rate(total_runs_scored, total_balls_faced))

    outlier = bso.runs_scored.index(219)  # index of outlier

    num_friends_good = [
        x for i, x in enumerate(bso.runs_scored) if i != outlier
    ]

    daily_minutes_good = [
        x for i, x in enumerate(balls_per_innings) if i != outlier
    ]

    #Covariance measures how two variables vary in tandems from their means
    print("Covariation: ", covariance(bso.runs_scored, balls_per_innings))

    print("Correlation: ", correlation(bso.runs_scored, balls_per_innings))

    x = [3, 4, 2, 1, 5]
    print("m:", bso.mean(x))
    print("dm: ", de_mean(x))