def test(zpoints, data, cdf, rvs, ddof = 1): obs_freq = fp.calculate_data_freq(zpoints, data) exp_prob = fp.calculate_exp_prob(zpoints, cdf) exp_freq = fp.convert_prob_2_freq(exp_prob, len(data)) print "Observations freq", obs_freq print "Expected freq", exp_freq print "Run Pearson test" chisq, p = stats.chisquare(obs_freq, exp_freq, ddof) print "p", p print "chisq", chisq chi2val = stats.chi2.ppf(0.95, len(obs_freq) - 1 - ddof) print "chi2 border value", chi2val print "H0 is accepted" if chisq < chi2val else "H0 is rejected " obs_data = data obs_data.sort() rand_data = rvs(len(obs_data)) rand_data = [x if x > 1 else 1 for x in rand_data] rand_data.sort() x_values = xrange(len(obs_data)) plt.ylabel("age") plt.plot(obs_data, 'b-') plt.plot(rand_data, 'ro') plt.show()
print "Autumn" low_filter = 180 zpoints = [216, 238, 261] workdays, holidays = fp.load_data() print "Workdays loaded:", len(workdays) print "Holidays loaded:", len(holidays) print s1, s2, s3, s4 = fp.get_season_data(workdays) #fp.plot_seasons_data(s1, s2, s3, s4) alldata = s4 data = [x for x in alldata if x > low_filter] obs_freq = fp.calculate_data_freq(zpoints, data) print "Observations freq", obs_freq nobs, (min, max), mean, variance, s, k = stats.describe(data) std = math.sqrt(variance) print "Nobs", nobs print "Mean", mean print "Variance", variance print exp_prob = fp.calculate_exp_prob(zpoints, lambda x: stats.norm.cdf(x, mean, std)) print "Expected prob", exp_prob exp_freq = fp.convert_prob_2_freq(exp_prob, nobs) print "Expected freq", exp_freq print