def main(): df = hinc.ReadData() log_sample = InterpolateSample(df, log_upper=6.0) log_cdf = thinkstats2.Cdf(log_sample) print("median", thinkstats2.Median(log_sample)) print("pearson's median skewness", thinkstats2.PearsonMedianSkewness(log_sample)) print("skewness", thinkstats2.Skewness(log_sample)) print("mean", log_cdf.Mean()) print( "the higher our log_upper, the more right-skewed (according to g_1) or at least less left-skewed (according to g_p) things get" ) print("the mean moves to the right a bit, too.") print("proportion of the population with income < mean", log_cdf.Prob(log_cdf.Mean())) print( "the higher the upper bound, the greater the proprtion below the mean." ) thinkplot.Cdf(log_cdf) thinkplot.Show(xlabel='household income', ylabel='CDF')
def Summarize(data): mean = data.mean() std = data.std() median = thinkstats2.Median(data) print('mean', mean) print('std', std) print('median', median) print('skewness', thinkstats2.Skewness(data)) print('pearson skewness', thinkstats2.PearsonMedianSkewness(data)) return mean, median
def Summarize(data): """Prints summary statistics. data: pandas Series """ mean = data.mean() std = data.std() median = thinkstats2.Median(data) print('mean', mean) print('std', std) print('median', median) print('skewness', thinkstats2.Skewness(data)) print('pearson skewness', thinkstats2.PearsonMedianSkewness(data)) return mean, median
def describe_inc_dist(log_upper): log_sample = hinc2.InterpolateSample(df, log_upper=j) incomes = np.power(10, log_sample) inc_mean = thinkstats2.Mean(incomes) inc_med = thinkstats2.Median(incomes) inc_skew = thinkstats2.Skewness(incomes) inc_pearskew = thinkstats2.PearsonMedianSkewness(incomes) print('log_upper = ', j) print('Mean Income: ', inc_mean) print('Median Income: ', inc_med) print('Skewness: ', inc_skew) print('Pearson Median Skewness: ', inc_pearskew) cdf = thinkstats2.Cdf(incomes) inc_below_mean = cdf.Prob(inc_mean) print('Pct. below mean: ', inc_below_mean) print('\n')
def main(): df = hinc.ReadData() log_sample = InterpolateSample(df, log_upper=6.0) log_cdf = thinkstats2.Cdf(log_sample) thinkplot.Cdf(log_cdf) thinkplot.Show(xlabel='household income', ylabel='CDF') sample = np.power(10, log_sample) mean = np.mean(sample) cdf = thinkstats2.Cdf(sample) print "Median:", np.median(sample) print "Mean:", mean print "Skewness:", thinkstats2.Skewness(sample) print "Pearson's Skewness:", thinkstats2.PearsonMedianSkewness(sample) print "Percent of people with incomes <= mean:", cdf[mean] pdf = thinkstats2.EstimatedPdf(sample) thinkplot.Pdf(pdf)
SampleDistrPLot(lams, n, lam) thinkplot.Config(xlabel='L estimate', ylabel='CDF', title='Sampling distribution', xlim=[0, 4], legend=True) #--- Chapter6 Ex1 df = hinc.ReadData() log_sample = hinc2.InterpolateSample(df, log_upper=6.0) sample = np.power(10, log_sample) print('Mean = ', sample.mean()) print('Median =', thinkstats2.Median(sample)) print('Skewness =', thinkstats2.Skewness(sample)) print('Pearson Median Skweness =', thinkstats2.PearsonMedianSkewness(sample)) income_cdf = thinkstats2.Cdf(sample) print(income_cdf.Prob(sample.mean()) * 100) #--- Chapter8 Ex3 def SimulateGame(lam): t = 0 goals = 0 while True: time_int = random.expovariate(lam) t += time_int if t > 1: break goals += 1 return goals