def generate_sample(draw=False): w = pd.Series([.2, .5, .3]) n = 500 bootstrapper = Bootstrapper() dists = bootstrapper.sample_discrete(w, n) dists = pd.Series(dists) cov = [[1,0],[0,1]] mu0, n0 = [0,0], dists[dists == 0].count() mu1, n1 = [3,0], dists[dists == 1].count() mu2, n2 = [0,3], dists[dists == 2].count() sample = [] sample.extend(np.random.multivariate_normal(mean=mu0, cov=cov, size=n0)) sample.extend(np.random.multivariate_normal(mean=mu1, cov=cov, size=n1)) sample.extend(np.random.multivariate_normal(mean=mu2, cov=cov, size=n2)) sample = pd.DataFrame(sample) if draw: draw_sample(sample) return sample
def part1(): dist = pd.Series([0.1, 0.2, 0.3, 0.4], index=[1,2,3,4]) n_values = [100, 200, 300, 400, 500] bootstrapper = Bootstrapper() samples = {n: bootstrapper.sample_discrete(dist, n) for n in n_values} visualizer.plot_params_hist(samples)