def MakeHists(live): """Plot Hists for live births live: DataFrame others: DataFrame """ hist = mystats.Hist(live.birthwgt_lb, label="birthwgt_lb") myplots.Hist(hist) myplots.Save(root="first_wgt_lb_hist", xlabel="pounds", ylabel="frequency", axis=[-1, 14, 0, 3200]) hist = mystats.Hist(live.birthwgt_oz, label="birthwgt_oz") myplots.Hist(hist) myplots.Save(root="first_wgt_oz_hist", xlabel="ounces", ylabel="frequency", axis=[-1, 16, 0, 1200]) hist = mystats.Hist(np.floor(live.agepreg), label="agepreg") myplots.Hist(hist) myplots.Save(root="first_agepreg_hist", xlabel="years", ylabel="frequency") hist = mystats.Hist(live.prglngth, label="prglngth") myplots.Hist(hist) myplots.Save(root="first_prglngth_hist", xlabel="weeks", ylabel="frequency", axis=[-1, 53, 0, 5000])
def RunModel(self): n = sum(self.data) values = [1, 2, 3, 4, 5, 6] rolls = np.random.choice(values, n, replace=True) hist = mystats.Hist(rolls) freqs = hist.Freqs(values) return freqs
def RunModel(self): heads, tails = self.data n = heads + tails sample = [random.choice("HT") for _ in range(n)] hist = mystats.Hist(sample) data = hist["H"], hist["T"] return data
def MakeComparison(firsts, others): """Plots histograms of pregnancy length for first babies and others. firsts: DataFrame others: DataFrame """ first_hist = mystats.Hist(firsts.prglngth, label="first") other_hist = mystats.Hist(others.prglngth, label="other") width = 0.45 myplots.PrePlot(2) myplots.Hist(first_hist, align="right", width=width) myplots.Hist(other_hist, align="left", width=width) myplots.Save( root="first_nsfg_hist", title="Histogram", xlabel="weeks", ylabel="frequency", axis=[27, 46, 0, 2700], )
def PrintExtremes(live): """Plots the histogram of pregnancy lengths and prints the extremes. live: DataFrame of live births """ hist = mystats.Hist(live.prglngth) myplots.Hist(hist, label="live births") myplots.Save(root="first_nsfg_hist_live", title="Histogram", xlabel="weeks", ylabel="frequency") print("Shortest lengths:") for weeks, freq in hist.Smallest(10): print(weeks, freq) print("Longest lengths:") for weeks, freq in hist.Largest(10): print(weeks, freq)
def ChiSquared(self, lengths): hist = mystats.Hist(lengths) observed = np.array(hist.Freqs(self.values)) expected = self.expected_probs * len(lengths) stat = sum((observed - expected)**2 / expected) return stat