def reject_outliers(data, m=2): return data[abs(data - np.mean(data)) < m * np.std(data)] # Overall tracking of best function best_funcs_nature_tally = defaultdict(lambda: {"function_tally": list(np.zeros(len(functions))), "avg_mse": 0, "avg_mae": 0, "total_count": 0}) best_funcs_tally = {"function_tally": list(np.zeros(len(functions))), "avg_mse": 0, "avg_mae": 0, "total_count": 0} roads = streets + motorways total_roads = len(roads) for i, (column, street) in enumerate(roads): print ("%s / %s") % (i, total_roads), street training_data = dM.get_data("traffic", "rainfall", [(column, street)], natures, tuple(range(7)), tuple(range(24))) validation_data = dM.get_data("traffic_aug13", "rainfall_aug13", [(column, street)], natures, tuple(range(7)), tuple(range(24))) street = street.replace("''", "'") if not len(training_data) or not len(validation_data): continue training_data['dow'] = training_data['dow'].apply(days_to_binary) validation_data['dow'] = validation_data['dow'].apply(days_to_binary) training_grouped = {nature: nature_df for nature, nature_df in training_data.groupby(['nature'])} validation_grouped = {nature: nature_df for nature, nature_df in validation_data.groupby(['nature'])} street_function_count = list(np.zeros(len(functions)))
class GraphAnalyzer(Frame): def __init__(self, root): Frame.__init__(self, root) self.__root = root self.__data_manager = DataManager() self.__check_button_type = namedtuple('CheckButtonType', 'widget var') self.__natures = [ "Single Carriageway", "Traffic Island Link", "Dual Carriageway", "Roundabout", "Traffic Island Link At Junction", "Slip Road" ] self.__roads = [ "M3","M40","M4","A1(M)","M11","M23","M20","M25","M1","HIGH STREET", "LONDON ROAD","HIGH ROAD","UXBRIDGE ROAD","STATION ROAD", "BRIGHTON ROAD","GREEN LANES","FINCHLEY ROAD","HARROW ROAD", "NORTH CIRCULAR ROAD","KINGSTON ROAD","PORTSMOUTH ROAD","HERTFORD ROAD", "STAINES ROAD","CROYDON ROAD","MAIN ROAD","CHURCH ROAD","PARK ROAD" ] self.__motorways = ["M3","M40","M4","A1(M)","M11","M23","M20","M25","M1"] self.__init_grid() self.__draw_grid() def __init_grid(self): # Road list self.__roads_list_box = Listbox(self.__root, selectmode=MULTIPLE, height=27, exportselection=0) for road in self.__roads: self.__roads_list_box.insert('end', road) # Nature list self.__natures_list_box = Listbox(self.__root, selectmode=MULTIPLE, height=6, width=22, exportselection=0) for nature in self.__natures: self.__natures_list_box.insert('end', nature) # Start with all natures selected self.__natures_list_box.select_set(0, END)\ # Days list self.__days_list_box = Listbox(self.__root, selectmode=MULTIPLE, height=8, width=22, exportselection=0) for day in ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']: self.__days_list_box.insert('end', day) # Hours list self.__hours_list_box = Listbox(self.__root, selectmode=MULTIPLE, height=24, width=7, exportselection=0) for hour in range(24): self.__hours_list_box.insert('end', hour) # Check button draw overall self.__draw_overall_var = IntVar() self.__draw_overall_check_box = \ Checkbutton(self.__root, text = "Draw Overall Curve?", variable = self.__draw_overall_var, onvalue = 1, offvalue = 0, height=2, width = 20) # Check button draw nature self.__draw_nature_var = IntVar() self.__draw_nature_check_box = \ Checkbutton(self.__root, text = "Draw Curve Per Nature?", variable = self.__draw_nature_var, onvalue = 1, offvalue = 0, height=2, width = 20) # Check button show data self.__show_data_var = IntVar() self.__show_data_var.set(1) self.__show_data_check_box = \ Checkbutton(self.__root, text = "Show data?", variable = self.__show_data_var, onvalue = 1, offvalue = 0, height=2, width = 20) # Go button self.__go_button = Button(self.__root, text='GO', command = lambda: self.__generate_graph()) # Errors text box self.__error_text_box = Text(self.__root, height=28, width=18, fg="red") self.__error_text_box.tag_config('justified', justify=CENTER) def __draw_grid(self): # Roads label and list box Label(self.__root, text="Roads", justify=CENTER).grid(row=0, column=0) self.__roads_list_box.grid(row=1, column=0, rowspan=27) # Natures label and list box Label(self.__root, text="Natures", justify=CENTER).grid(row=0, column=1) self.__natures_list_box.grid(row=1, column=1, rowspan=6) # Days label and list box Label(self.__root, text="Days", justify=CENTER).grid(row=7, column=1) self.__days_list_box.grid(row=8, column=1, rowspan=8) # Hours label and list box Label(self.__root, text="Hours", justify=CENTER).grid(row=0, column=3) self.__hours_list_box.grid(row=1, column=3, rowspan=24) # Check boxes Label(self.__root, text="Drawing Options", justify=CENTER).grid(row=0, column=4) self.__draw_overall_check_box.grid(row=1, column=4, rowspan=2) self.__draw_nature_check_box.grid(row=3, column=4, rowspan=2) self.__show_data_check_box.grid(row=5, column=4, rowspan=2) # Go button self.__go_button.grid(row=10, column=4) # Error Column Label(self.__root, text="Error Report", height=1, width=18, justify=CENTER).grid(row=0, column=5) self.__error_text_box.grid(row=1, column=5, rowspan=28) def __generate_graph(self): # Get parameters roads = tuple(self.__roads_list_box.get(road_index) for road_index in self.__roads_list_box.curselection()) roads = [ ("classification" if road in self.__motorways else "street", road) for road in roads] natures = tuple(self.__natures_list_box.get(nature_index) for nature_index in self.__natures_list_box.curselection()) days = self.__days_list_box.curselection() hours = self.__hours_list_box.curselection() errors = self.__error_check(roads, natures, days, hours) if len(errors): self.__error_text_box.delete("1.0",END) for e in errors: self.__error_text_box.insert(END, e + '\n', 'justified') else: data = self.__data_manager.get_data("traffic", "rainfall", roads, natures, hours, days) self.__plot_data(data) def __error_check(self, roads, natures, hours, days): errors = [] if not len(roads): errors.append("No roads selected") if not len(natures): errors.append("No natures selected") if not len(hours): errors.append("No hours selected") if not len(days): errors.append("No days selected") if not (self.__show_data_var.get() or self.__draw_nature_var.get() or self.__draw_overall_var.get()): errors.append("Nothing to draw") return errors def __plot_data(self, data): max_depth = data.depth.max() max_speed = data.speed.max() dfs_to_plot = [] if self.__show_data_var.get(): dfs_to_plot.append(data) if self.__draw_overall_var.get(): dfs_to_plot.append(self.__get_best_fit_curve(data, max_depth, max_speed, "Best fit curve")) if self.__draw_nature_var.get(): for nature, nature_df in data.groupby(['nature']): dfs_to_plot.append(self.__get_best_fit_curve(nature_df, max_depth, max_speed, nature)) data = pd.concat(dfs_to_plot, ignore_index=True) fg = sns.FacetGrid(data=data, hue='nature', aspect=1.9, legend_out=False, size=8) fg.map(plt.scatter, 'depth', 'speed', s=20).add_legend(None, "Legend") axes = fg.axes ylim = 120 if max_speed > 200 else max_speed xlim = 1.0 if max_depth < 1.0 else 2.0 axes[0,0].set_ylim(0,ylim) axes[0,0].set_xlim(0,xlim) sns.plt.show() def __get_best_fit_curve(self, data, max_depth, max_speed, nature_str): try: popt, pcov = curve_fit(self.curve_func, data.depth, data.speed) except RuntimeError: return pd.DataFrame({'depth':[], 'speed':[], 'nature':[], 'identifier':[]}) a = popt[0] b = popt[1] c = popt[2] depths = list(np.arange(0, max_depth, max_depth/10000.0)) speeds = map(lambda x: self.curve_func(x, a, b, c), depths) if max(speeds) > max_speed: speeds = [s for s in speeds if s <= max_speed] depths = depths[0:len(speeds)] natures = [nature_str] * len(depths) identifiers = [''] * len(depths) return pd.DataFrame({'depth':depths, 'speed':speeds, 'nature':natures, 'identifier':identifiers}) def curve_func(self, x, a, b, c): return a * np.exp(-b * x) + c
}) best_funcs_tally = { "function_tally": list(np.zeros(len(functions))), "avg_mse": 0, "avg_mae": 0, "total_count": 0 } roads = streets + motorways total_roads = len(roads) for i, (column, street) in enumerate(roads): print("%s / %s") % (i, total_roads), street training_data = dM.get_data("traffic", "rainfall", [(column, street)], natures, tuple(range(7)), tuple(range(24))) validation_data = dM.get_data("traffic_aug13", "rainfall_aug13", [(column, street)], natures, tuple(range(7)), tuple(range(24))) street = street.replace("''", "'") if not len(training_data) or not len(validation_data): continue training_data['dow'] = training_data['dow'].apply(days_to_binary) validation_data['dow'] = validation_data['dow'].apply(days_to_binary) training_grouped = { nature: nature_df for nature, nature_df in training_data.groupby(['nature'])
class GraphAnalyzer(Frame): def __init__(self, root): Frame.__init__(self, root) self.__root = root self.__data_manager = DataManager() self.__check_button_type = namedtuple('CheckButtonType', 'widget var') self.__natures = [ "Single Carriageway", "Traffic Island Link", "Dual Carriageway", "Roundabout", "Traffic Island Link At Junction", "Slip Road" ] self.__roads = [ "M3", "M40", "M4", "A1(M)", "M11", "M23", "M20", "M25", "M1", "HIGH STREET", "LONDON ROAD", "HIGH ROAD", "UXBRIDGE ROAD", "STATION ROAD", "BRIGHTON ROAD", "GREEN LANES", "FINCHLEY ROAD", "HARROW ROAD", "NORTH CIRCULAR ROAD", "KINGSTON ROAD", "PORTSMOUTH ROAD", "HERTFORD ROAD", "STAINES ROAD", "CROYDON ROAD", "MAIN ROAD", "CHURCH ROAD", "PARK ROAD" ] self.__motorways = [ "M3", "M40", "M4", "A1(M)", "M11", "M23", "M20", "M25", "M1" ] self.__init_grid() self.__draw_grid() def __init_grid(self): # Road list self.__roads_list_box = Listbox(self.__root, selectmode=MULTIPLE, height=27, exportselection=0) for road in self.__roads: self.__roads_list_box.insert('end', road) # Nature list self.__natures_list_box = Listbox(self.__root, selectmode=MULTIPLE, height=6, width=22, exportselection=0) for nature in self.__natures: self.__natures_list_box.insert('end', nature) # Start with all natures selected self.__natures_list_box.select_set(0, END)\ # Days list self.__days_list_box = Listbox(self.__root, selectmode=MULTIPLE, height=8, width=22, exportselection=0) for day in [ 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday' ]: self.__days_list_box.insert('end', day) # Hours list self.__hours_list_box = Listbox(self.__root, selectmode=MULTIPLE, height=24, width=7, exportselection=0) for hour in range(24): self.__hours_list_box.insert('end', hour) # Check button draw overall self.__draw_overall_var = IntVar() self.__draw_overall_check_box = \ Checkbutton(self.__root, text = "Draw Overall Curve?", variable = self.__draw_overall_var, onvalue = 1, offvalue = 0, height=2, width = 20) # Check button draw nature self.__draw_nature_var = IntVar() self.__draw_nature_check_box = \ Checkbutton(self.__root, text = "Draw Curve Per Nature?", variable = self.__draw_nature_var, onvalue = 1, offvalue = 0, height=2, width = 20) # Check button show data self.__show_data_var = IntVar() self.__show_data_var.set(1) self.__show_data_check_box = \ Checkbutton(self.__root, text = "Show data?", variable = self.__show_data_var, onvalue = 1, offvalue = 0, height=2, width = 20) # Go button self.__go_button = Button(self.__root, text='GO', command=lambda: self.__generate_graph()) # Errors text box self.__error_text_box = Text(self.__root, height=28, width=18, fg="red") self.__error_text_box.tag_config('justified', justify=CENTER) def __draw_grid(self): # Roads label and list box Label(self.__root, text="Roads", justify=CENTER).grid(row=0, column=0) self.__roads_list_box.grid(row=1, column=0, rowspan=27) # Natures label and list box Label(self.__root, text="Natures", justify=CENTER).grid(row=0, column=1) self.__natures_list_box.grid(row=1, column=1, rowspan=6) # Days label and list box Label(self.__root, text="Days", justify=CENTER).grid(row=7, column=1) self.__days_list_box.grid(row=8, column=1, rowspan=8) # Hours label and list box Label(self.__root, text="Hours", justify=CENTER).grid(row=0, column=3) self.__hours_list_box.grid(row=1, column=3, rowspan=24) # Check boxes Label(self.__root, text="Drawing Options", justify=CENTER).grid(row=0, column=4) self.__draw_overall_check_box.grid(row=1, column=4, rowspan=2) self.__draw_nature_check_box.grid(row=3, column=4, rowspan=2) self.__show_data_check_box.grid(row=5, column=4, rowspan=2) # Go button self.__go_button.grid(row=10, column=4) # Error Column Label(self.__root, text="Error Report", height=1, width=18, justify=CENTER).grid(row=0, column=5) self.__error_text_box.grid(row=1, column=5, rowspan=28) def __generate_graph(self): # Get parameters roads = tuple( self.__roads_list_box.get(road_index) for road_index in self.__roads_list_box.curselection()) roads = [("classification" if road in self.__motorways else "street", road) for road in roads] natures = tuple( self.__natures_list_box.get(nature_index) for nature_index in self.__natures_list_box.curselection()) days = self.__days_list_box.curselection() hours = self.__hours_list_box.curselection() errors = self.__error_check(roads, natures, days, hours) if len(errors): self.__error_text_box.delete("1.0", END) for e in errors: self.__error_text_box.insert(END, e + '\n', 'justified') else: data = self.__data_manager.get_data("traffic", "rainfall", roads, natures, hours, days) self.__plot_data(data) def __error_check(self, roads, natures, hours, days): errors = [] if not len(roads): errors.append("No roads selected") if not len(natures): errors.append("No natures selected") if not len(hours): errors.append("No hours selected") if not len(days): errors.append("No days selected") if not (self.__show_data_var.get() or self.__draw_nature_var.get() or self.__draw_overall_var.get()): errors.append("Nothing to draw") return errors def __plot_data(self, data): max_depth = data.depth.max() max_speed = data.speed.max() dfs_to_plot = [] if self.__show_data_var.get(): dfs_to_plot.append(data) if self.__draw_overall_var.get(): dfs_to_plot.append( self.__get_best_fit_curve(data, max_depth, max_speed, "Best fit curve")) if self.__draw_nature_var.get(): for nature, nature_df in data.groupby(['nature']): dfs_to_plot.append( self.__get_best_fit_curve(nature_df, max_depth, max_speed, nature)) data = pd.concat(dfs_to_plot, ignore_index=True) fg = sns.FacetGrid(data=data, hue='nature', aspect=1.9, legend_out=False, size=8) fg.map(plt.scatter, 'depth', 'speed', s=20).add_legend(None, "Legend") axes = fg.axes ylim = 120 if max_speed > 200 else max_speed xlim = 1.0 if max_depth < 1.0 else 2.0 axes[0, 0].set_ylim(0, ylim) axes[0, 0].set_xlim(0, xlim) sns.plt.show() def __get_best_fit_curve(self, data, max_depth, max_speed, nature_str): try: popt, pcov = curve_fit(self.curve_func, data.depth, data.speed) except RuntimeError: return pd.DataFrame({ 'depth': [], 'speed': [], 'nature': [], 'identifier': [] }) a = popt[0] b = popt[1] c = popt[2] depths = list(np.arange(0, max_depth, max_depth / 10000.0)) speeds = map(lambda x: self.curve_func(x, a, b, c), depths) if max(speeds) > max_speed: speeds = [s for s in speeds if s <= max_speed] depths = depths[0:len(speeds)] natures = [nature_str] * len(depths) identifiers = [''] * len(depths) return pd.DataFrame({ 'depth': depths, 'speed': speeds, 'nature': natures, 'identifier': identifiers }) def curve_func(self, x, a, b, c): return a * np.exp(-b * x) + c