def run_single_demension_data_process(): random.seed(0) uniform = [200 * random.random() - 100 for _ in range(10000)] normal = [57 * inverse_normal_cdf(random.random()) for _ in range(10000)] plot_histogram(uniform, 10, "Uniform values") plot_histogram(normal, 10, "Normal values")
def compare_two_distributions(): random.seed(0) uniform = [random.randrange(-100, 101) for _ in range(200)] normal = [57 * inverse_normal_cdf(random.random()) for _ in range(200)] plot_histrogram(uniform, 10, "Uniform Histogram") plot_histrogram(normal, 10, "Normal Histogram")
def random_normal(*dims: int, mean: float = 0.0, variance: float = 1.0) -> Tensor: if len(dims) == 1: return [mean + variance*inverse_normal_cdf(random.random()) for _ in range(dims[0])] else: return [random_normal(*dims[1:], mean = mean, variance = variance) for _ in range(dims[0])]
def compare_two_distributions(): random.seed(0) uniform = [random.randrange(-100,101) for _ in range(200)] normal = [57 * inverse_normal_cdf(random.random()) for _ in range(200)] plot_histogram(uniform, 10, "Uniform Histogram") plot_histogram(normal, 10, "Normal Histogram")
def compare_two_distributions(): random.seed(0) data = [ 1, 2, 5, 8, 6, 98, 45, 4, 8, 2, 2, 5, 8, 2, 36, 9, 4, 1, 2, 5, 8, 3, 2, 5, 99, 8, 45, 12, 3, 4, 84, 51, 2, 3 ] uniform = [random.randrange(-100, 101) for _ in range(200)] normal = [57 * inverse_normal_cdf(random.random()) for _ in range(200)] plot_histogram(data, 2, "Mydata") plot_histogram(uniform, 10, "Uniform Histogram") plot_histogram(normal, 10, "Normal Histogram")
def compare_two_distributions(): random.seed(0) uniform = [random.randrange(-100, 101) for _ in range(200)] normal = [57 * inverse_normal_cdf(random.random()) for _ in range(200)] data1 = [ 3, 4, 5, 9, 11, 13, 15, 17, 19, 35, 62, 48, 25, 65, 95, 15, 88, 33, 78, 66, 99, 100, 101, 102 ] plot_histogram(uniform, 10, "Uniform Histogram") plot_histogram(normal, 10, "Normal Histogram") plot_histogram(data1, 3, "My Data") plt.show()
def normal_upper_bound(probability, mu=0, sigma=1): """returns z for which P(Z <= z) = probability""" return inverse_normal_cdf(probability, mu=0, sigma=1)
def random_normal(mu:float = 0, sigma: float = 1) -> float: """ random sample X from a normal distribution X ~ Normal(mu, sigma) """ return inverse_normal_cdf(random.random(), mu, sigma)
def random_normal(): """returns a random draw from a standard normal distribution""" return inverse_normal_cdf(random.random())
def normal_lower_bound(probability, mu = 0, sigma = 1): """find value that corresponds right part of probability (look normal_cdf) <start><left_probability><value><right_probability><end>""" return inverse_normal_cdf(1 - probability, mu, sigma)
def main(): # I don't know why this is necessary plt.gca().clear() plt.close() import random from probability import inverse_normal_cdf random.seed(0) # uniform between -100 and 100 uniform = [200 * random.random() - 100 for _ in range(10000)] # normal distribution with mean 0, standard deviation 57 normal = [57 * inverse_normal_cdf(random.random()) for _ in range(10000)] plot_histogram(uniform, 10, "Uniform Histogram") plt.savefig('im/working_histogram_uniform.png') plt.gca().clear() plt.close() plot_histogram(normal, 10, "Normal Histogram") plt.savefig('im/working_histogram_normal.png') plt.gca().clear() from statistics import correlation print(correlation(xs, ys1)) # about 0.9 print(correlation(xs, ys2)) # about -0.9 from typing import List # Just some random data to show off correlation scatterplots num_points = 100 def random_row() -> List[float]: row = [0.0, 0, 0, 0] row[0] = random_normal() row[1] = -5 * row[0] + random_normal() row[2] = row[0] + row[1] + 5 * random_normal() row[3] = 6 if row[2] > -2 else 0 return row random.seed(0) # each row has 4 points, but really we want the columns corr_rows = [random_row() for _ in range(num_points)] corr_data = [list(col) for col in zip(*corr_rows)] # corr_data is a list of four 100-d vectors num_vectors = len(corr_data) fig, ax = plt.subplots(num_vectors, num_vectors) for i in range(num_vectors): for j in range(num_vectors): # Scatter column_j on the x-axis vs column_i on the y-axis, if i != j: ax[i][j].scatter(corr_data[j], corr_data[i]) # unless i == j, in which case show the series name. else: ax[i][j].annotate("series " + str(i), (0.5, 0.5), xycoords='axes fraction', ha="center", va="center") # Then hide axis labels except left and bottom charts if i < num_vectors - 1: ax[i][j].xaxis.set_visible(False) if j > 0: ax[i][j].yaxis.set_visible(False) # Fix the bottom right and top left axis labels, which are wrong because # their charts only have text in them ax[-1][-1].set_xlim(ax[0][-1].get_xlim()) ax[0][0].set_ylim(ax[0][1].get_ylim()) # plt.show() plt.savefig('im/working_scatterplot_matrix.png') plt.gca().clear() plt.close() plt.clf() import csv data: List[StockPrice] = [] with open("comma_delimited_stock_prices.csv") as f: reader = csv.reader(f) for row in reader: maybe_stock = try_parse_row(row) if maybe_stock is None: print(f"skipping invalid row: {row}") else: data.append(maybe_stock) from typing import List def primes_up_to(n: int) -> List[int]: primes = [2] with tqdm.trange(3, n) as t: for i in t: # i is prime if no smaller prime divides it. i_is_prime = not any(i % p == 0 for p in primes) if i_is_prime: primes.append(i) t.set_description(f"{len(primes)} primes") return primes my_primes = primes_up_to(100_000) de_meaned = de_mean(pca_data) fpc = first_principal_component(de_meaned) assert 0.923 < fpc[0] < 0.925 assert 0.382 < fpc[1] < 0.384
def normal_lower_bound(probability, mu=0, sigma=1): return inverse_normal_cdf(1 - probability, mu, sigma)
def normal_lower_bound(probability: float, mu: float = 0, sigma: float = 1) -> float: """Returns the z for which P(Z>=z) = probability""" return inverse_normal_cdf(1 - probability, mu, sigma)
def normal_upper_bound(probability, mu=0, sigma=1): """ Returns the z score for which P(Z <= z) = probability """ return inverse_normal_cdf(probability, mu, sigma)
def plot_histogram(points, bucket_size, title=""): histogram = make_histogram(points, bucket_size) plt.bar(histogram.keys(), histogram.values(), width=bucket_size) plt.title(title) plt.show() random.seed(0) #uniform between -100 and 100 uniform = [200 * random.random() - 100 for _ in range(10000)] #normal distribution with mean 0, standard deviation 57 normal = [57 * inverse_normal_cdf(random.random()) for _ in range(10000)] plot_histogram(uniform, 10, 'Uniform Histogram') plot_histogram(normal, 10, 'Normal Histogram') """Two-Domentional data""" def random_normal(): '''returns a random draw from a stndard normal distribution''' return inverse_normal_cdf(random.random()) xs = [random_normal() for _ in range(1000)] ys1 = [x + random_normal() / 2 for x in xs] ys2 = [-x + random_normal() / 2 for x in xs] data = [xs, ys1]
def random_normal(): return inverse_normal_cdf(random.random())
def normal_lower_bound(probability, mu=0, sigma=1): """returns the z for which P(Z >= z) = probability""" return inverse_normal_cdf(1 - probability, mu, sigma)
def test_inverse_normal_cdf(self): self.assertAlmostEqual(1, probability.inverse_normal_cdf(p=(1 + math.erf(-1 / (3 * math.sqrt(2)))) / 2, mu=2, sigma=3), places=4)