def RunEstimate(update_func, num_points=31, median_flag=False): """Runs the whole analysis. update_func: which of the update functions to use num_points: number of points in the Suite (in each dimension) """ # DumpHeights(n=10000000) ### injection ### injection ### injection ### injection ### #d = LoadHeights() labels = {1: 'male', 2: 'female'} import pandas as pd df = pd.read_csv(r"../data/BRFSS.csv", sep=',', index_col=0) d = {1: df.M.dropna().values, 2: df.F.dropna().values} ### injection ### injection ### injection ### injection ### # PlotCdfs(d, labels) suites = {} for key, xs in d.items(): name = labels[key] print(name, len(xs)) Summarize(xs) xs = thinkstats.Jitter(xs, 1.3) mus, sigmas = FindPriorRanges(xs, num_points, median_flag=median_flag)
def PlotCdfs(d, labels): """Plot CDFs for each sequence in a dictionary. Jitters the data and subtracts away the mean. d: map from key to sequence of values labels: map from key to string label """ thinkplot.Clf() for key, xs in d.iteritems(): mu = thinkstats.Mean(xs) xs = thinkstats.Jitter(xs, 1.3) xs = [x - mu for x in xs] cdf = thinkbayes.MakeCdfFromList(xs) thinkplot.Cdf(cdf, label=labels[key]) thinkplot.Show()
def RunEstimate(update_func, num_points=31, median_flag=False): """Runs the whole analysis. update_func: which of the update functions to use num_points: number of points in the Suite (in each dimension) """ DumpHeights(n=10000000) d = LoadHeights() labels = {1: 'male', 2: 'female'} # PlotCdfs(d, labels) suites = {} for key, xs in d.iteritems(): name = labels[key] print(name, len(xs)) Summarize(xs) xs = thinkstats.Jitter(xs, 1.3) mus, sigmas = FindPriorRanges(xs, num_points, median_flag=median_flag) suite = Height(mus, sigmas, name) suites[name] = suite update_func(suite, xs) print('MLE', suite.MaximumLikelihood()) PlotPosterior(suite) pmf_m = suite.Marginal(0) pmf_s = suite.Marginal(1) print('marginal mu', pmf_m.Mean(), pmf_m.Var()) print('marginal sigma', pmf_s.Mean(), pmf_s.Var()) # PlotMarginals(suite) PlotCoefVariation(suites)