# vim:fileencoding=utf-8 import matplotlib.pyplot as plt import numpy as np import freqanalysis.ecdf as ecdf import freqanalysis.datatools as datatool from scipy.stats import ks_2samp datasetfile = "datasets/20140904-export.txt" print "loading ", datasetfile df = datatool.load_data_as_dataframe(datasetfile) print "Calculating ECDF of all values" all_series, yvals = ecdf.get_ecdf(df['freq']) print "Plotting graph" ecdf.plot_ecdf_curve(all_series, yvals, color="b", label="Alle Werte") df['minute'] = df.time.apply(lambda x: x.minute) hour_df = df[(df.minute >= 58) | (df.minute <= 5)] hour_series, yvals = ecdf.get_ecdf(hour_df['freq']) ecdf.plot_ecdf_curve(hour_series, yvals, color="r", label="Stundenwechsel") not_hour_df = df[(df.minute < 58 ) & (df.minute > 5)] not_hour_series, yvals = ecdf.get_ecdf(not_hour_df['freq']) ecdf.plot_ecdf_curve(not_hour_series, yvals, color="y", linestyle="-", label="unter der Stunde") print "Null hypothesis: the two samples are drawn from the same continuous distribution." D, p_value = ks_2samp(all_series, hour_series)
#plt.plot(eclipse_momentum_df.s_since_midnight.astype(int), # eclipse_momentum_df.momentum, 'r-', label="Momentum") #hfmt = dates.DateFormatter('%H:%M') #ax.xaxis.set_major_formatter(hfmt) # y_formatter = mpl.ticker.ScalarFormatter(useOffset=False) # ax.yaxis.set_major_formatter(y_formatter) # ax.grid(True) f.suptitle("Dichte der Leistungsgradienten") f.autofmt_xdate() plt.savefig("images/sonnenfinsternis-dichte-gradienten.png")#, bbox_inches='tight') plt.clf() friday_series, friday_vals = ecdf.get_ecdf(friday_momentum_df.momentum) ecdf.plot_ecdf_curve(friday_series, friday_vals, color="b", label="Typischer Freitag") eclipse_series, eclipse_vals = ecdf.get_ecdf(eclipse_momentum_df.momentum) ecdf.plot_ecdf_curve(eclipse_series, eclipse_vals, color="r", label="Sonnenfinsternis") print "Mittelwert alle Freitage: %f" % np.median(friday_momentum_df.momentum) print "Mittelwert Sonnenfinsternis: %f" % np.median(eclipse_momentum_df.momentum) # http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.levene.html#scipy.stats.levene W, p_val = stats.levene(friday_momentum_df.momentum, eclipse_momentum_df.momentum, center='median') print ("Levenes Test auf Gleichheit der Varianz: P=%s (gleiche Varianz für p<=0.05)" % p_val) W, p_val = stats.fligner(friday_momentum_df.momentum, eclipse_momentum_df.momentum) print "Fliegners Test auf Gleichheit der Varianz: P=%s" % p_val f.suptitle("ECDF der Leistungsgradienten: Ungleiche Varianzen (Levene, p=%f)" % p_val) plt.savefig("images/sonnenfinsternis-ecdf-gradienten.png")#, bbox_inches='tight')
#hfmt = dates.DateFormatter('%H:%M') #ax.xaxis.set_major_formatter(hfmt) # y_formatter = mpl.ticker.ScalarFormatter(useOffset=False) # ax.yaxis.set_major_formatter(y_formatter) # ax.grid(True) f.suptitle("Dichte der Leistungsgradienten") f.autofmt_xdate() plt.savefig("images/sonnenfinsternis-dichte-gradienten.png" ) #, bbox_inches='tight') plt.clf() friday_series, friday_vals = ecdf.get_ecdf(friday_momentum_df.momentum) ecdf.plot_ecdf_curve(friday_series, friday_vals, color="b", label="Typischer Freitag") eclipse_series, eclipse_vals = ecdf.get_ecdf(eclipse_momentum_df.momentum) ecdf.plot_ecdf_curve(eclipse_series, eclipse_vals, color="r", label="Sonnenfinsternis") print "Mittelwert alle Freitage: %f" % np.median( friday_momentum_df.momentum) print "Mittelwert Sonnenfinsternis: %f" % np.median( eclipse_momentum_df.momentum) # http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.levene.html#scipy.stats.levene W, p_val = stats.levene(friday_momentum_df.momentum, eclipse_momentum_df.momentum, center='median') print(