def RunEstimate(update_func, num_points=31, median_flag=False): """Runs the whole analysis. update_func: which of the update functions to use num_points: number of points in the Suite (in each dimension) """ d = ReadHeights(nrows=None) labels = {1: 'male', 2: 'female'} # PlotCdfs(d, labels) suites = {} for key, xs in d.items(): label = labels[key] print(label, len(xs)) Summarize(xs) xs = thinkbayes2.Jitter(xs, 1.3) mus, sigmas = FindPriorRanges(xs, num_points, median_flag=median_flag) suite = Height(mus, sigmas, label) suites[label] = suite update_func(suite, xs) print('MLE', suite.MaximumLikelihood()) PlotPosterior(suite) pmf_m = suite.Marginal(0) pmf_s = suite.Marginal(1) print('marginal mu', pmf_m.Mean(), pmf_m.Var()) print('marginal sigma', pmf_s.Mean(), pmf_s.Var()) # PlotMarginals(suite) PlotCoefVariation(suites)
def RunEstimate(update_func, num_points=31, median_flag=False): """Runs the whole analysis. update_func: which of the update functions to use num_points: number of points in the Suite (in each dimension) """ d = ReadHeights(nrows=None) labels = {1: 'male', 2: 'female'} suites = {} for key, xs in d.items(): label = labels[key] print(label, len(xs)) Summarize(xs) xs = thinkbayes2.Jitter(xs, 1.3) mus, sigmas = FindPriorRanges(xs, num_points, median_flag=median_flag) suite = Height(mus, sigmas, label) suites[label] = suite update_func(suite, xs) print('MAP', suite.MaximumLikelihood()) # joint distributions of mu and sigma for men and women suite1 = suites['male'] suite2 = suites['female']
def PlotCdfs(d, labels): """Plot CDFs for each sequence in a dictionary. Jitters the data and subtracts away the mean. d: map from key to sequence of values labels: map from key to string label """ thinkplot.Clf() for key, xs in d.items(): mu = thinkbayes2.Mean(xs) xs = thinkbayes2.Jitter(xs, 1.3) xs = [x - mu for x in xs] cdf = thinkbayes2.MakeCdfFromList(xs) thinkplot.Cdf(cdf, label=labels[key]) thinkplot.Show()
def RunEstimate(update_func, num_points=31, median_flag=False): """Runs the whole analysis. update_func: which of the update functions to use num_points: number of points in the Suite (in each dimension) """ d = ReadHeights(nrows=None) labels = {1: 'male', 2: 'female'} suites = {} for key, xs in d.items(): label = labels[key] print(label, len(xs)) Summarize(xs) xs = thinkbayes2.Jitter(xs, 1.3) mus, sigmas = FindPriorRanges(xs, num_points, median_flag=median_flag) suite = Height(mus, sigmas, label) suites[label] = suite update_func(suite, xs) print('MAP', suite.MaximumLikelihood()) suite1 = suites['male'] suite2 = suites['female'] mu1 = suite1.Marginal(0) sigma1 = suite1.Marginal(1) mu2 = suite2.Marginal(0) sigma2 = suite2.Marginal(1) diff = mu1 - mu2 sigma = (sigma1 + sigma2) / 2 pmf_d = diff / sigma thinkplot.Cdf(pmf_d.MakeCdf()) thinkplot.Show(xlabel='# stddev between means', ylabel='PMF')