def back_test_performance(self): if len(self.sim_account.trades) > 0 and self.sim_account.trades[-1].side == 1: self.sim_account.trades.append( Order(price=self.tick_data["ma1"][-1], side=-1, symbol=self.trade_symbol, quantity=self.position, at=self.bolling.minute_q[-1].time_stamp)) self.sim_account.nets.append( self.sim_account.balance + self.sim_account.position * self.tick_data["ma1"][-1]) super(Strategy, self).back_test_performance() def random_color(): import random x = random.random() * 256 * 256 * 256 prefix = '#' for i in range(6 - len(str(hex(int(x))[2:]))): prefix = prefix + '0' return prefix + str(hex(int(x))[2:]).upper() from bokeh.plotting import figure, show, Column buy_orders = {"time": [], "price": []} sell_orders = {"time": [], "price": []} for trade in self.sim_account.trades: assert isinstance(trade, Order) t_time = trade.at + datetime.timedelta(hours=8) if trade.side > 0: buy_orders["time"].append(t_time) buy_orders["price"].append(trade.price) else: sell_orders["time"].append(t_time) sell_orders["price"].append(trade.price) plot = figure(x_axis_type="datetime", width=1300) bar_plot = figure(x_axis_type="datetime", width=1300) plot.line(x=self.tick_data["time"], y=self.tick_data["ma1"], legend="ma10", color=random_color()) plot.circle_x(x=buy_orders["time"], y=buy_orders["price"], color="red", fill_color=None, size=20, line_width=5) plot.square_cross(x=sell_orders["time"], y=sell_orders["price"], color="blue", fill_color=None, size=20, line_width=5) plot.line(x=self.tick_data["time"], y=self.tick_data["ma5"], legend="ma30", color=random_color()) plot.line(x=self.tick_data["time"], y=self.tick_data["ma15"], legend="ma90", color=random_color()) # plot.line(x=self.tick_data["time"], y=self.tick_data["mean"], legend="mean", color='black') # plot.line(x=self.tick_data["time"], y=self.tick_data["upper"], legend="upper", color='black') # plot.line(x=self.tick_data["time"], y=self.tick_data["lower"], legend="lower", color='black') plot.scatter(x=self.tick_data["time"], y=self.tick_data["bid"], legend="bid", color=random_color()) plot.scatter(x=self.tick_data["time"], y=self.tick_data["ask"], legend="ask", color=random_color()) bar_plot.vbar(x=self.tick_data["time"], top=self.tick_data["std"], bottom=0, width=1) bar_plot.line(x=self.tick_data["time"], y=self.tick_data["avg_std"]) show(Column(plot, bar_plot))
def generate_error_report(df, title='Error report', x_type='event_date', save_to='unnamed_error_report.html'): # normal is already deltas p1 = plot_cat_bokeh(df.loc[:, df.count() > 0], title=title, x_type=x_type) p1.title.text_font_size = '18pt' # histogram p2 = plot_hist_bokeh(df, bins=100, color_offset=0) # box plot p3 = plot_boxplot_bokeh(df) p4 = plot_acf_bokeh(df) p = Column(p1, Row(p2, p3), p4) if save_to is None: return p else: output_file(save_to, title=title) save(p)
def Hierarchical(doc): global source, nodes """" # df = pd.read_csv('application/dataSet/GephiMatrix_author_similarity.csv', sep=';') #csv_reader = pd.read_csv('application/dataSet/authors.csv', sep=';') ############################################################# # Make a condensed distance matrix ############################################################ # df_std = (df - df.min(axis=0)) / (df.max(axis=0) - df.min(axis=0)) # df_scaled = df_std * (1.0 - 0.0) + 0.0 # # dist = scipy.spatial.distance.squareform(distancematrix) # linkage_matrix = linkage(dist, "single") # results = dendrogram(linkage_matrix, no_plot=True) # icoord, dcoord = results['icoord'], results['dcoord'] # labels = list(map(int, results['ivl'])) # df = df.iloc[labels] # df_scaled = df_scaled.iloc[labels] # # tms = [] # # # icoord = pd.DataFrame(icoord) args = doc.session_context.request.arguments print(args) file = args.get('file')[0] file = str(file.decode('UTF-8')) with open("media/" + file) as data: csv_reader = csv.reader(data, delimiter=';') nArr = csv_reader.index.values dfArr = csv_reader.values nodes = dfArr names = nArr N = len(names) counts = np.zeros((N, N)) for i in range(0, len(nodes)): for j in range(0, len(nodes)): counts[i, j] = nodes[j][i] counts[j, i] = nodes[j][i] N = len(counts) distancematrix = np.zeros((N, N)) count = 0 for node_1 in counts: distancematrix[count] = node_1 count = count + 1 for m in range(N): for n in range(N): if distancematrix[m][n] == 0: distancematrix[m][n] = float("inf") for l in range(N): distancematrix[l][l] = 0 for k in range(N): for i in range(N): for j in range(N): if distancematrix[i][j] > distancematrix[i][k] + distancematrix[k][j]: distancematrix[i][j] = distancematrix[i][k] + distancematrix[k][j] values = distancematrix """ ######################################################################################################### def getLevelInfo(tree): nodes = [tree.get_left(), tree.get_right()] total_desc = tree.get_count() percents = [0] names = [] for node in nodes: percentage = float(node.get_count()) / float(total_desc) percents.append(float(percentage + percents[-1])) names.append(node.get_id()) return percents, names, nodes def genDataSource(tree): percents, names, nodes = getLevelInfo(tree) # define starts/ends for wedges from percentages of a circle starts = [p * 2 * pi for p in percents[:-1]] ends = [p * 2 * pi for p in percents[1:]] colours = getColours(len(starts)) branchLengths = [node.dist for node in nodes] children = [node.get_count() for node in nodes] source = ColumnDataSource(data=dict(start=starts, end=ends, name=names, colour=colours, branchLength=branchLengths, children=children)) return source, nodes def getColours(Length): colours = [ "red", "green", "blue", "orange", "yellow", "purple", "pink" ] returnColours = colours while len(returnColours) <= Length: returnColours += colours if returnColours[-1] == "red": returnColours[-1] = "orange" return returnColours[0:Length] def calcAngle(x, y): innerProduct = x lengthProduct = math.sqrt(x**2 + y**2) cosAngle = innerProduct / lengthProduct if y < 0 and x > 0: return 2 * pi - math.acos(cosAngle) else: return math.acos(cosAngle) def update(event): print('Click registered') angle = calcAngle(event.x, event.y) print(angle) global source, nodes for i in range(len(source.data['end'])): if source.data['end'][i] > angle and source.data['start'][ i] < angle: clickedNode = i print(i) if nodes[clickedNode].get_count() > 2: new_source, nodes = genDataSource(nodes[clickedNode]) source.data = new_source.data def returnVisualisation(): global source, nodes new_source, nodes = genDataSource(tree) source.data = new_source.data args = doc.session_context.request.arguments file = args.get('file')[0] file = str(file.decode('UTF-8')) try: df = pd.read_csv("media/" + file, sep=';') print('Loaded data succesfully') except: raise Exception("File does not exist") names = df.index.values counts = df.values # If data too large ######################################################### if len(names) > 50: n = 50 while len(names) != 50: names = np.delete(names, (n)) counts = np.delete(counts, (n), axis=0) counts = np.delete(counts, (n), axis=1) counts = np.delete(counts, len(counts), axis=1) # Make a distance matrix ####################################################### N = len(counts) distancematrix = np.zeros((N, N)) count = 0 for node_1 in counts: distancematrix[count] = node_1 count = count + 1 for m in range(N): for n in range(N): if distancematrix[m][n] == 0: distancematrix[m][n] = float("inf") for l in range(N): distancematrix[l][l] = 0 for k in range(N): for i in range(N): for j in range(N): if distancematrix[i][ j] > distancematrix[i][k] + distancematrix[k][j]: distancematrix[i][ j] = distancematrix[i][k] + distancematrix[k][j] X = distancematrix Z = linkage(X, 'ward') tree = to_tree(Z) ## Create the first data source for the root view source, nodes = genDataSource(tree) ## Create buttons and tools to interact with the visualisation returnButton = Button(label="Return") hover = HoverTool() hover.tooltips = [("Name", "@name"), ("Lenght to parent", "@branchLength"), ("Children", "@children")] hover.mode = 'mouse' tools = [hover, 'save'] ## Create the canvas p = figure(x_range=(-1, 1), y_range=(-1, 1), tools=tools) ## Draw the wedges on the canvas according to the tree info p.wedge(x=0, y=0, radius=1, start_angle='start', end_angle='end', color='colour', alpha=0.6, source=source) ## Map actions to events for the interaction p.on_event(events.Tap, update) returnButton.on_click(returnVisualisation) ## Display the visualisation doc.add_root(Column(returnButton, p))
d5['end_datetime_str'].push( d6['end_datetime_str'][draw_index]); d5['start_datetime_dt'].push( d6['start_datetime_dt'][draw_index]); d5['end_datetime_dt'].push( d6['end_datetime_dt'][draw_index]); source_sx_draw_highlight.data = d5; source_sx_draw_highlight.change.emit(); """) silder_draw_index.js_on_change('value', callback) columns = [ TableColumn(field="start_datetime_dt", title="start_datetime_dt"), TableColumn(field="sx_value", title="sx_value") ] data_table = DataTable(source=source_sx_persistency, columns=columns, width=400, height=280) controles=Column(silder_draw_index, slider_sx, sizing_mode='stretch_width') p_sx_layout = Column(p_sx, controles, margin=( 8 , 8 , 8 , 8 )) toggles = Row(toggle1, toggle2) p_crd_layout = Column(p_crd, toggles, slider_alpha_Value, data_table, margin=( 8 , 8 , 8 , 8 )) #最後の部分 plots = Row(p_sx_layout, p_crd_layout) #output_file("MOGE.html") show(plots) curdoc().add_root(plots)
def showGUI(self, pth_to_img, y_form, pred): ''' Method builds the bokeh GUI Parameters ---------- pth_to_img: path to ultrasound image y_form: true form of the lesion pred: predicted form the lesion ''' ############## #Set up a figure ############## p = figure(x_range=(0, self.DIMS[0]), y_range=(0, self.DIMS[1]), tools=self._tools_to_show, plot_width=self.DIMS[0], plot_height=self.DIMS[1], toolbar_location="above") #Add image as background p.image_url(url=[self.root_pth + pth_to_img], x=431, y=302, w=862, h=604, anchor="center") #Nicier plot self._makeShiny(plot=p) ############## #Add lines and plot them ############## src_true, src_pred = self._getData() self._plotLines(plot=p, src_true=src_true, src_pred=src_pred) ############## #Add table ############## table = self._addTable(src_pred=src_pred) ############## #Add polygons ############## true_pol, c_t = self._addLesionForm(form=y_form, color='red', plot=p) pred_pol, c_p = self._addLesionForm(form=pred, color='blue', plot=p) #Add toggles for polygons toggle_true = Toggle(label="Show true form", button_type="primary", active=True) toggle_true.js_link('active', true_pol, 'visible') toggle_true.js_link('active', c_t, 'visible') toggle_pred = Toggle(label="Show predicted form", button_type="primary", active=True) toggle_pred.js_link('active', pred_pol, 'visible') toggle_true.js_link('active', c_p, 'visible') ############## #Add download button ############## button_csv = Button(label="Download", button_type="primary") button_csv.callback = CustomJS(args=dict(source=src_pred), code=open(self.root_pth + "download.js").read()) ############## #Add title div ############## div_title = Div(text="""<div> <b>LESION ADJUSTER</b> </div>""", align='center', style={ 'font-size': '150%', 'color': '#1f77b4' }) ############## #Add description to the buttons ############## div_desc = Div(text="""<div> <b>CONTROLS</b> </div>""", align='center', style={ 'font-size': '110%', 'color': '#1f77b4' }) ############## #Add Div to show euclidean distance and button to recalculate it ############## div_euclid = Div(text=""" <b>Diameter of predicted form is:</b> 334.80 <br> <b>Diameter of true form is:</b> 368.64 <br> <b>RMSE is:</b> 34.13 """, align='center', style={'font-size': '100%'}) p.js_on_event( events.MouseMove, CustomJS(args=dict(div=div_euclid, source_data_pred=src_pred, source_data_true=src_true), code=""" var data_p = source_data_pred.data; var data_t = source_data_true.data; var x_p = data_p['x'] var y_p = data_p['y'] var x_t = data_t['x'] var y_t = data_t['y'] var diam_p = 0 var diam_t = 0 var rmse = 0 //Diameter of pred form diam_p = Math.sqrt(Math.pow((x_p[0]-x_p[1]),2) + Math.pow((y_p[0]-y_p[1]),2)) //Diameter of true form diam_t = Math.sqrt(Math.pow((x_t[0]-x_t[1]),2) + Math.pow((y_t[0]-y_t[1]),2)) //RMSE rmse = Math.sqrt(Math.pow(diam_p - diam_t,2)/1) //Result div.text = "<b>Diameter of predicted form is: </b>" + diam_p.toFixed(2) + "<br> <b>Diameter of true form is: </b>" + diam_t.toFixed(2) + " <br> <b>RMSE is: </b>" + rmse.toFixed(2); """)) ############## #Show ############## show( Column( div_title, Row( Column(p, table), Column(div_desc, toggle_true, toggle_pred, button_csv, div_euclid))))
def main(): # set up main bokeh figure p = figure(x_range=(0, 10), y_range=(0, 10), tools=[], title='Draw points in the network') p.background_fill_color = 'lightgrey' # start off with sample points and their associated flows source = ColumnDataSource({ 'x': [2, 7, 5, 8], 'y': [2, 2, 6, 1], 'flow': ['-2', '-5', '8', '-1'] }) renderer = p.scatter(x='x', y='y', source=source, color='blue', size=10) columns = [ TableColumn(field="x", title="x"), TableColumn(field="y", title="y"), TableColumn(field='flow', title='flow') ] table = DataTable(source=source, columns=columns, editable=True, height=200) draw_tool = PointDrawTool(renderers=[renderer], empty_value='1') p.add_tools(draw_tool) p.toolbar.active_tap = draw_tool titletextbox = Div( text= "<h2>Objective: minimize construction cost of network<p>Construction cost is based on number of pipes and distance between nodes.<br>Additional constraints imposed: flows in network must be balanced.<br></h2>", width=1100, height=150) textbox = Div(text="", width=200, height=100) floating = 1. fixed = 0. button = Button(label='Solve Network') button.on_event( ButtonClick, partial(button_click_event, source=source, textbox=textbox, figure=p)) p.on_event( PanEnd, partial(button_click_event, source=source, textbox=textbox, figure=p)) # set up sliders lumpSumCost = Slider(title="Fixed cost pipe", value=0.0, start=0.0, end=500.0, step=50) floatingCost = Slider(title="Floating cost pipe", value=1.0, start=0.0, end=500.0, step=10.) for w in [lumpSumCost, floatingCost]: w.on_change( 'value', partial(update_data, source=source, textbox=textbox, figure=p, lumpSumCost=lumpSumCost, floatingCost=floatingCost)) # create page layout curdoc().add_root( Column( titletextbox, Row(Column(p, table, width=800), Column(lumpSumCost, floatingCost, button, textbox, width=300)))) curdoc().title = "Network"
def choro_map(): df = pd.read_pickle("./data/Rat_Sightings.pkl") def make_map(var="postalCode", year="All", season="All"): def filter_years_seasons(df=df, season=season, year=year): if year == "All" and season == "All": #group dataframe by indicated column name and rename columns df = (df) elif year != "All" and season == "All": #group dataframe by indicated column name and rename columns df = (df.query("Year == '%s'" % year)) elif year != "All" and season != "All": # select cases for user-selected year and season df = (df.query("Year == '%s'" % year).query("Season == '%s'" % season)) elif year == "All" and season != "All": # only extract cases for the user-selected season df = (df.query("Season == '%s'" % season)) else: # just set it to itself if user-error occurs df = df return df new_df = filter_years_seasons() if var == "postalCode": groupeddf = (new_df.assign( n=0).groupby(var).n.count().reset_index().rename( columns={ "n": "Rat_Sightings" }).merge(df[[ "postalCode", "Neighborhood", "Borough", "xs", "ys" ]]).drop_duplicates(subset=[var]).reset_index(drop=True)) elif var == "Neighborhood": groupeddf = (new_df.assign( n=0).groupby(var).n.count().reset_index().rename( columns={ "n": "Rat_Sightings" }).merge(df[[ "Neighborhood", "Borough", "nhood_xs", "nhood_ys" ]]).drop_duplicates(subset=[var]).reset_index( drop=True).rename(columns={ "nhood_xs": "xs", "nhood_ys": "ys" })) elif var == "Borough": groupeddf = (new_df.assign( n=0).groupby(var).n.count().reset_index().rename( columns={ "n": "Rat_Sightings" }).merge(df[["Borough", "boro_xs", "boro_ys" ]]).drop_duplicates(subset=[var]).reset_index( drop=True).rename(columns={ "boro_xs": "xs", "boro_ys": "ys" })) else: var = "postalCode" groupeddf = (new_df.assign( n=0).groupby(var).n.count().reset_index().rename( columns={ "n": "Rat_Sightings" }).merge(df[[ "postalCode", "Neighborhood", "Borough", "xs", "ys" ]]).drop_duplicates(subset=[var]).reset_index(drop=True)) # instantiate the color mapper color_mapper = LogColorMapper(palette=OrRd9[::-1]) p = figure(x_range=(-8400000, -8100000), y_range=(4950000, 5000000), x_axis_type="mercator", y_axis_type="mercator", plot_height=1200, plot_width=1000) p.axis.visible = False p.grid.grid_line_color = None p.add_tile(CARTODBPOSITRON) p.grid.grid_line_color = None p.patches("xs", "ys", source=ColumnDataSource(data=groupeddf), fill_color={ "field": "Rat_Sightings", "transform": color_mapper }, fill_alpha=0.6) p.add_tools( HoverTool(tooltips=[("Number of Rat Sightings: ", "@Rat_Sightings"), ("%s" % var, "@%s" % var)])) return p year_select = Select(value="All", options=[*df["Year"].unique()] + ["All"], title="Select a year to view: ") season_select = Select(value="All", options=[*df["Season"].unique()] + ["All"], title="Select a season to view: ") agg_level_select = Select( value="postalCode", options=["postalCode", "Neighborhood", "Borough"], title="Select a value to view the data by: ") def update_plot(attr, old, new): layout.children[1] = make_map(year=year_select.value, season=season_select.value, var=agg_level_select.value) year_select.on_change("value", update_plot) season_select.on_change("value", update_plot) agg_level_select.on_change("value", update_plot) layout = Column(Column(agg_level_select, year_select, season_select), make_map()) return layout
from bokeh.models import ColumnDataSource, DataTable, PointDrawTool, TableColumn from bokeh.plotting import Column, figure, output_file, show output_file("tools_point_draw.html") p = figure(x_range=(0, 10), y_range=(0, 10), tools=[], title='Point Draw Tool') p.background_fill_color = 'lightgrey' source = ColumnDataSource({ 'x': [1, 5, 9], 'y': [1, 5, 9], 'color': ['red', 'green', 'yellow'] }) renderer = p.scatter(x='x', y='y', source=source, color='color', size=10) columns = [TableColumn(field="x", title="x"), TableColumn(field="y", title="y"), TableColumn(field='color', title='color')] table = DataTable(source=source, columns=columns, editable=True, height=200) draw_tool = PointDrawTool(renderers=[renderer], empty_value='black') p.add_tools(draw_tool) p.toolbar.active_tap = draw_tool show(Column(p, table))
# print db.get(str(id)+"4"+"x") # print db.get(str(id)+"5"+"x") # print db.get(str(id)+"1"+"y") # print db.get(str(id)+"2"+"y") # print db.get(str(id)+"3"+"y") # print db.get(str(id)+"4"+"y") # print db.get(str(id)+"5"+"y") # print db.getall() #db.deldb() #db.dump() #db.deldb() #db.dump() layout = (Column(p, Row(button1, button))) tools = ["xpan,pan,xwheel_zoom,wheel_zoom,box_zoom,reset,previewsave"] q = figure(x_range=(0, 10), y_range=(0, 10), tools=tools, title='Analysis Peak') q.y_range.start = -10 q.y_range.end = 220 q.x_range.start = -10 q.x_range.end = 180 q.xaxis.axis_label = "Extension [nm]" q.xaxis.axis_label_text_font_size = "20pt" q.xaxis.axis_label_text_font_style = 'bold'
def new_scatter(): df = pd.read_pickle("./data/Rat_Sightings.pkl") def var_var_scatter(df=df, var1="Borough", var2="Neighborhood", year="All", season="All"): """ Allows the user to select whichever variables from the dataset for comparison in the form of a scatter plot. """ # we want the var1 to be the least granular variable name1, name2 = var1, var2 # get the number of classes n_classes1 = df[var1].nunique() n_classes2 = df[var2].nunique() # if the num of classes in var2 is less then reassign variables if n_classes2 < n_classes1: var1 = name2 var2 = name1 # group by if year == "All" and season == "All": #group dataframe by indicated column name and rename columns groupeddf = (df.assign(n=0).groupby([ var1, var2 ]).n.count().reset_index().rename(columns={"n": "rat_sightings"})) elif year != "All" and season == "All": #group dataframe by indicated column name and rename columns groupeddf = (df.query("Year == '%s'" % year).assign(n=0).groupby([ var1, var2 ]).n.count().reset_index().rename(columns={"n": "rat_sightings"})) elif year != "All" and season != "All": #group dataframe by indicated column name and rename columns groupeddf = (df.query("Year == '%s'" % year).query( "Season == '%s'" % season).assign(n=0).groupby( [var1, var2]).n.count().reset_index().rename( columns={"n": "rat_sightings"})) elif year == "All" and season != "All": #group dataframe by indicated column name and rename columns groupeddf = (df.query("Season == '%s'" % season).assign( n=0).groupby([var1, var2]).n.count().reset_index().rename( columns={"n": "rat_sightings"})) else: #group dataframe by indicated column name and rename columns groupeddf = (df.assign(n=0).groupby([ var1, var2 ]).n.count().reset_index().rename(columns={"n": "rat_sightings"})) # get unique values for both vars uniq_vals1 = [*groupeddf[var1].unique()] uniq_vals2 = [*groupeddf[var2].unique()] # now add in percentages to give more information to user # get the total values sum_data = pd.DataFrame(groupeddf.groupby(var1)["rat_sightings"].sum()) col_name = "total_%s_sights" % var1 sum_data.columns = [col_name] # merge the totals in to the dataframe """ Sure this can be applied to the dataframe in some fashion but opting for the conceptually easier solution here. """ groupeddf = groupeddf.merge(sum_data.reset_index()) # get percentages for each location type perc_name = "perc_sights" groupeddf[perc_name] = (groupeddf["rat_sightings"] / groupeddf[col_name]) * 100 # create color mapper mapper = LogColorMapper(palette=Blues9[::-1]) # instantiate figure p = figure(plot_width=800, plot_height=600, y_range=uniq_vals1) p.circle(x="rat_sightings", y=jitter(var1, width=0.6, range=p.y_range), source=ColumnDataSource(data=groupeddf), alpha=0.6, size=30, hover_alpha=0.9, fill_color={ "field": "rat_sightings", "transform": mapper }) p.add_tools( HoverTool(tooltips=[( "%s" % var1, "@%s" % var1), ("%s" % var2, "@%s" % var2), ("Num. of Rat Sightings", "@rat_sightings"), ("Percentage of Rat Sightings within the %ss" % var1, "@perc_sights %")])) return p # define variable selects var1_select = Select(value="Borough", options=[*df.columns], title="Select a variable: ") var2_select = Select(value="Neighborhood", options=[*df.columns], title="Select a variable: ") year_select = Select(value="All", options=[*df["Year"].unique()] + ["All"], title="Select a year to view: ") season_select = Select(value="All", options=[*df["Season"].unique()] + ["All"], title="Select a season to view: ") # define interactivity def update_plot(attr, old, new): layout.children[1] = var_var_scatter(var1=var1_select.value, var2=var2_select.value, year=year_select.value, season=season_select.value) # inc interactivity var1_select.on_change("value", update_plot) var2_select.on_change("value", update_plot) year_select.on_change("value", update_plot) season_select.on_change("value", update_plot) layout = Column( Row(Column(var1_select, var2_select), Column(year_select, season_select)), var_var_scatter()) return layout
def heatmap(): df = pd.read_pickle("./data/Rat_Sightings.pkl") def var_loc_heatmap(df=df, var="Borough", loc_var="Location Type", year="All", season="All"): """ Creates a heatmap for each class in var (Borough, Neighborhood, or Zip Code) and determines which location types are the of the highest intensity for each. Essentially a recreation of the scatter plot but with a heatmap instead of circles. """ # group by if year == "All" and season == "All": #group dataframe by indicated column name and rename columns groupeddf = (df.assign(n=0).groupby( [var, loc_var]).n.count().reset_index().rename(columns={ loc_var: "loc_type", "n": "rat_sightings" })) elif year != "All" and season == "All": #group dataframe by indicated column name and rename columns groupeddf = (df.query("Year == '%s'" % year).assign(n=0).groupby( [var, loc_var]).n.count().reset_index().rename(columns={ loc_var: "loc_type", "n": "rat_sightings" })) elif year != "All" and season != "All": #group dataframe by indicated column name and rename columns groupeddf = (df.query("Year == '%s'" % year).query( "Season == '%s'" % season).assign(n=0).groupby([ var, loc_var ]).n.count().reset_index().rename(columns={ loc_var: "loc_type", "n": "rat_sightings" })) elif year == "All" and season != "All": #group dataframe by indicated column name and rename columns groupeddf = (df.query( "Season == '%s'" % season).assign(n=0).groupby([ var, loc_var ]).n.count().reset_index().rename(columns={ loc_var: "loc_type", "n": "rat_sightings" })) else: #group dataframe by indicated column name and rename columns groupeddf = (df.assign(n=0).groupby( [var, loc_var]).n.count().reset_index().rename(columns={ loc_var: "loc_type", "n": "rat_sightings" })) # get total percentages groupeddf["total_perc"] = (groupeddf.rat_sightings / groupeddf.rat_sightings.sum()) * 100 # get unique value names loc_types = [*groupeddf["loc_type"].unique()] uniq_vals = [*groupeddf[var].unique()] # now add in percentages to give more information to user # get the total values sum_data = pd.DataFrame(groupeddf.groupby(var).rat_sightings.sum()) col_name = "total_%s_sights" % var sum_data.columns = [col_name] # merge the totals in to the dataframe """ Sure this can be applied to the dataframe in some fashion but opting for the conceptually easier solution here. """ groupeddf = groupeddf.merge(sum_data.reset_index()) # get percentages for each location type perc_name = "perc_sights" groupeddf[perc_name] = (groupeddf["rat_sightings"] / groupeddf[col_name]) * 100 # instantiate color mapper mapper = LogColorMapper(palette=Blues9[::-1]) # instantiate plot p = figure(y_range=uniq_vals, x_range=loc_types) # specify plot parameters p.grid.grid_line_color = None p.axis.axis_line_color = None p.axis.major_tick_line_color = None p.axis.major_label_text_font_size = "7pt" p.axis.major_label_standoff = 0 p.xaxis.major_label_orientation = pi / 3 # fill plot with data p.rect(x="loc_type", y=var, width=1, height=1, source=ColumnDataSource(data=groupeddf), alpha=0.6, hover_alpha=0.9, fill_color={ "field": "rat_sightings", "transform": mapper }) p.add_tools( HoverTool(tooltips=[("%s" % var, "@%s" % var), ("Location Type", "@loc_type"), ("Num. of Rat Sightings", "@rat_sightings"), ("Perc. of Rat Sightings Across %ss" % var, "@perc_sights %")], point_policy="follow_mouse")) return p var_select = Select( value="Borough", options=["Borough", "Neighborhood", "postalCode", "Year", "Season"], title="Select the Variable to View: ") year_select = Select(value="All", options=[*df["Year"].unique()] + ["All"], title="Select a year to view: ") season_select = Select(value="All", options=[*df["Season"].unique()] + ["All"], title="Select a season to view: ") # create interactivity components def update_plot(attr, old, new): layout.children[1] = var_loc_heatmap(var=var_select.value, year=year_select.value, season=season_select.value) # add in interactivity component var_select.on_change('value', update_plot) year_select.on_change("value", update_plot) season_select.on_change("value", update_plot) layout = Column(Column(var_select, year_select, season_select), var_loc_heatmap()) return layout
def bar(): df = pd.read_pickle("./data/Rat_Sightings.pkl") def bar_chart(var="Borough", year="All", season="All", month="All"): """ Takes in a dataframe and a column name in a string format as inputs and generates a simple bar chart with hover capability and color mapping the most intense categories. """ if year == "All" and season == "All" and month == "All": #group dataframe by indicated column name and rename columns groupeddf = (df.assign(n=0).groupby(var).n.count().reset_index()) elif year != "All" and season == "All" and month == "All": #group dataframe by indicated column name and rename columns groupeddf = (df.query("Year == '%s'" % year).assign( n=0).groupby(var).n.count().reset_index()) elif year != "All" and season != "All" and month == "All": #group dataframe by indicated column name and rename columns groupeddf = (df.query("Year == '%s'" % year).query( "Season == '%s'" % season).assign(n=0).groupby(var).n.count().reset_index()) elif year != "All" and season != "All" and month != "All": groupeddf = (df.query("Year == '%s'" % year).query( "Season == '%s'" % season).query( "Month == '%s'" % month).assign(n=0).groupby(var).n.count().reset_index()) elif year == "All" and season != "All" and month != "All": #group dataframe by indicated column name and rename columns groupeddf = (df.query("Season == '%s'" % season).query( "Month == '%s'" % month).assign(n=0).groupby(var).n.count().reset_index()) elif year == "ALl" and season == "All" and month != "All": groupeddf = (df.query("Month == '%s'" % month).assign( n=0).groupby(var).n.count().reset_index()) else: #group dataframe by indicated column name and rename columns groupeddf = (df.assign(n=0).groupby(var).n.count().reset_index()) # add in the percentages groupeddf["perc"] = (groupeddf["n"] / groupeddf["n"].sum()) * 100 # assign names of columns groupeddf.columns = [var, var + "_rat_sightings", "perc"] # keep names as list names = [*groupeddf.columns] # keep unique values of col_name uniq_vals = [*groupeddf[var].unique()] # instantiate color mapper mapper = LogColorMapper(palette=Blues9[::-1]) # instantiate figure p = figure(y_range=uniq_vals, title="Rat Sightings by %s" % var) # fill figure p.hbar(y=var, right=names[1], height=0.9, source=ColumnDataSource(data=groupeddf), alpha=0.6, hover_alpha=0.9, fill_color={ "field": names[1], "transform": mapper }) # add hover tool p.add_tools( HoverTool(tooltips=[("%s" % var, "@%s" % var), ("%s" % names[1], "@%s" % (names[1])), ("Percentage of Sightings", "@perc %")], point_policy="follow_mouse")) return p # create a select for users var_select = Select(value="Borough", options=[ "Borough", "postalCode", "Neighborhood", "Location Type", "Year", "Season" ], title="Select the Variable to View: ") year_select = Select(value="All", options=[*df["Year"].unique()] + ["All"], title="Select a year to view: ") season_select = Select(value="All", options=[*df["Season"].unique()] + ["All"], title="Select a season to view: ") month_select = Select(value="All", options=[*df.Month.unique()] + ["All"], title="Select a month to view: ") # create interactivity component def update_plot(attr, old, new): layout.children[1] = bar_chart(var=var_select.value, year=year_select.value, season=season_select.value, month=month_select.value) # add in interactivity component var_select.on_change("value", update_plot) year_select.on_change("value", update_plot) season_select.on_change("value", update_plot) month_select.on_change("value", update_plot) # define layout layout = Column( Column(var_select, year_select, season_select, month_select), bar_chart()) return layout
def scatter(): df = pd.read_pickle("./data/Rat_Sightings.pkl") def var_loc_scatter(var="Borough", loc_var="Location Type", year="All", season="All"): """ This function takes in the dataframe, variable, and location type variable as inputs. The function will group the dataframe by the specified variable and location type to find the counts of various location types for each unique value of the variable. The function will then generate a scatter plot and return the resulting dataframe. """ if year == "All" and season == "All": #group dataframe by indicated column name and rename columns groupeddf = (df.assign(n=0).groupby( [var, loc_var]).n.count().reset_index().rename(columns={ loc_var: "loc_type", "n": "rat_sightings" })) elif year != "All" and season == "All": #group dataframe by indicated column name and rename columns groupeddf = (df.query("Year == '%s'" % year).assign(n=0).groupby( [var, loc_var]).n.count().reset_index().rename(columns={ loc_var: "loc_type", "n": "rat_sightings" })) elif year != "All" and season != "All": #group dataframe by indicated column name and rename columns groupeddf = (df.query("Year == '%s'" % year).query( "Season == '%s'" % season).assign(n=0).groupby([ var, loc_var ]).n.count().reset_index().rename(columns={ loc_var: "loc_type", "n": "rat_sightings" })) elif year == "All" and season != "All": #group dataframe by indicated column name and rename columns groupeddf = (df.query( "Season == '%s'" % season).assign(n=0).groupby([ var, loc_var ]).n.count().reset_index().rename(columns={ loc_var: "loc_type", "n": "rat_sightings" })) else: #group dataframe by indicated column name and rename columns groupeddf = (df.assign(n=0).groupby( [var, loc_var]).n.count().reset_index().rename(columns={ loc_var: "loc_type", "n": "rat_sightings" })) # get total percentages groupeddf["total_perc"] = (groupeddf.rat_sightings / groupeddf.rat_sightings.sum()) * 100 # get local percentages # use map? # create a list of the unique values uniq_vals = [*groupeddf[var].unique()] # create color mapper mapper = LogColorMapper(palette=Blues9[::-1]) # instantiate figure p = figure(y_range=uniq_vals) p.circle(x="rat_sightings", y=jitter(var, width=0.6, range=p.y_range), source=ColumnDataSource(data=groupeddf), alpha=0.6, size=30, hover_alpha=0.9, fill_color={ "field": "rat_sightings", "transform": mapper }) p.add_tools( HoverTool(tooltips=[( "%s" % var, "@%s" % var), ("Location Type", "@loc_type"), ("Num. of Rat Sightings", "@rat_sightings"), ("Percentage of Rat Sightings within the %ss" % var, "@total_perc %")])) return p # create a select for users var_select = Select( value="Borough", options=["Borough", "postalCode", "Neighborhood", "Year", "Season"], title="Select the Variable to View: ") year_select = Select(value="All", options=[*df["Year"].unique()] + ["All"], title="Select a year to view: ") season_select = Select(value="All", options=[*df["Season"].unique()] + ["All"], title="Select a season to view: ") # create interactivity components def update_plot(attr, old, new): layout.children[1] = var_loc_scatter(var=var_select.value, year=year_select.value, season=season_select.value) # add in interactivity component var_select.on_change('value', update_plot) year_select.on_change("value", update_plot) season_select.on_change("value", update_plot) layout = Column(Column(var_select, year_select, season_select), var_loc_scatter()) return layout