Exemple #1
0
def RunEstimate(update_func, num_points=31, median_flag=False):
    """Runs the whole analysis.

    update_func: which of the update functions to use
    num_points: number of points in the Suite (in each dimension)
    """

    # DumpHeights(n=10000000)

    ### injection ### injection ### injection ### injection ###
    #d = LoadHeights()
    labels = {1: 'male', 2: 'female'}

    import pandas as pd
    df = pd.read_csv(r"../data/BRFSS.csv", sep=',', index_col=0)
    d = {1: df.M.dropna().values, 2: df.F.dropna().values}
    ### injection ### injection ### injection ### injection ###

    # PlotCdfs(d, labels)

    suites = {}
    for key, xs in d.items():
        name = labels[key]
        print(name, len(xs))
        Summarize(xs)

        xs = thinkstats.Jitter(xs, 1.3)

        mus, sigmas = FindPriorRanges(xs, num_points, median_flag=median_flag)
Exemple #2
0
def PlotCdfs(d, labels):
    """Plot CDFs for each sequence in a dictionary.

    Jitters the data and subtracts away the mean.

    d: map from key to sequence of values
    labels: map from key to string label
    """
    thinkplot.Clf()
    for key, xs in d.iteritems():
        mu = thinkstats.Mean(xs)
        xs = thinkstats.Jitter(xs, 1.3)
        xs = [x - mu for x in xs]
        cdf = thinkbayes.MakeCdfFromList(xs)
        thinkplot.Cdf(cdf, label=labels[key])
    thinkplot.Show()
Exemple #3
0
def RunEstimate(update_func, num_points=31, median_flag=False):
    """Runs the whole analysis.

    update_func: which of the update functions to use
    num_points: number of points in the Suite (in each dimension)
    """
    DumpHeights(n=10000000)
    d = LoadHeights()
    labels = {1: 'male', 2: 'female'}

    # PlotCdfs(d, labels)

    suites = {}
    for key, xs in d.iteritems():
        name = labels[key]
        print(name, len(xs))
        Summarize(xs)

        xs = thinkstats.Jitter(xs, 1.3)

        mus, sigmas = FindPriorRanges(xs, num_points, median_flag=median_flag)
        suite = Height(mus, sigmas, name)
        suites[name] = suite
        update_func(suite, xs)
        print('MLE', suite.MaximumLikelihood())

        PlotPosterior(suite)

        pmf_m = suite.Marginal(0)
        pmf_s = suite.Marginal(1)
        print('marginal mu', pmf_m.Mean(), pmf_m.Var())
        print('marginal sigma', pmf_s.Mean(), pmf_s.Var())

        # PlotMarginals(suite)

    PlotCoefVariation(suites)