def get_interactive_vec_plot(visual_df): """ to visualize text data for better access and exploration :param visual_df: pandas dataframe :return: bokeh plot instance """ datasource = ColumnDataSource(visual_df) colour_mapper = CategoricalColorMapper(factors=["0", "1", "2"], palette=["red", "purple", "green"]) TOOLTIPS = [("id", "@index"), ("text", "@reviews"), ("label", "@labels")] hover = HoverTool(tooltips=TOOLTIPS) hover.attachment = 'right' plot = figure(title='2D-Vector Representation of Reviews', plot_width=600, plot_height=400, tools=('pan, wheel_zoom, reset', 'box_zoom', 'undo')) plot.add_tools(hover) plot.circle('x', 'y', source=datasource, color=dict(field="labels", transform=colour_mapper), legend="labels") return plot
def get_handle(source, figkwds=None): if figkwds is None: figkwds = dict(plot_width=500, plot_height=300, x_axis_label='x', y_axis_label='y', tools="pan,lasso_select,box_select,reset,help") if 'class' in source.data.keys(): class_labels = list(set(source.data['class'])) palette = d3['Category10'][len(class_labels)] color_map = CategoricalColorMapper(factors=class_labels, palette=palette) color = {'field': 'class', 'transform': color_map} legend = 'class' else: color = 'blue' legend = None p1 = figure(active_drag="lasso_select", **figkwds) p1.scatter('x', 'y', source=source, alpha=0.8, color=color, legend=legend) handle = show(p1, notebook_handle=True) return handle
def run_clustering(*args): method = self.w.clu_method.value stats, trans = run_dr_2() if method == 'dbscan': labels = run_dbscan(trans, self.w.clu_dbscan_eps.value) else: lg.warning('Not implemented cluster method: {}'.format(method)) return self.thelenota._CLUSTER_LABELS[self.dr_name()] = labels colv = labels color_mapper = CategoricalColorMapper( palette=bokeh.palettes.Category20[20], factors=list(colv.value_counts().index)) colorbar_mapper = LinearColorMapper(palette='Inferno256', low=0, high=0) bcolorbar.color_mapper = colorbar_mapper if not bfigure.legend[0].items: bfigure.legend[0].items.append(blegend) bplot.data_source.data['score'] = colv bplot.glyph.fill_color['transform'] = color_mapper bplot.glyph.line_width = 0 bplot.glyph.line_alpha = 0 bokeh_io.push_notebook(handle=bhandle)
def generate_bokeh_new_graph(self): #Creating an output file output_file("bokeh_graph.html") #loading the csv to the file file = 'hate_crimes.csv' #Reading and then storing the csv file as a variable. data = pd.read_csv(file) #Turning the data into a ColumnDataSource hate_crime_data = ColumnDataSource(data) #This line will be what sets up a color code between which states voted for clinton or trump. color_mapper = CategoricalColorMapper(factors=['Trump', 'Clinton'], palette=['red', 'blue']) plot = figure(x_axis_label='Percentage of Non-white', y_axis_label='Hate Crimes / 100,000', plot_width=600, plot_height=500, tools='pan,wheel_zoom,box_zoom,reset,hover,save', title='Percentage of Non-white Versus Voted Trump') plot.circle(x='share_non_white', y='share_voters_voted_trump', source=hate_crime_data, size=15, color=dict(field='won_state', transform=color_mapper)) hover = plot.select_one(HoverTool) hover.tooltips = [('state', '@state'), ('Share of Non White', '@share_non_white'), ('Voted Trump', '@share_voters_voted_trump')] show(plot)
def make_plot_newc(newc, df, cities): category_map = CategoricalColorMapper(factors=cities, palette=Category20[20]) pn = newc.cross( x='data', y='new', source=df.loc[df.denominazione_provincia.isin(cities), :], color={ 'field': 'denominazione_provincia', 'transform': category_map }, alpha=0.99, size=4, legend_field='denominazione_provincia', ) newc.legend.location = 'top_left' for name, color in zip(cities, itertools.cycle(Category20[20])): evalDF = loess(df.loc[df.denominazione_provincia == name, 'new'], alpha=0.9, poly_degree=1) newc.line(x=df.loc[df.denominazione_provincia == name, 'data'].to_numpy(), y=evalDF['g'].to_numpy()[1:], color=color, legend_label='Trend - ' + name)
def stabTrimPlot(dataframe): hover = HoverTool( tooltips=[('altitude', '@ALTITUDE'), ('time', '@DATETIME{%H:%M:%S}')], # format time to H:M:S without the date. details: DatetimeTickFormatter formatters={'@DATETIME': 'datetime'}, # display a tooltip whenever the cursor is vertically in line with a glyph mode='vline') mapper = CategoricalColorMapper( factors=['setosa', 'virginica', 'versicolor'], palette=['#247ba0', '#f25f5c', '#ffe066']) source = ColumnDataSource(dataframe) plot = figure(title='Aircraft Flight Envelope', tools=[hover, 'pan', 'box_zoom', 'reset'], x_axis_label='UTC Time (hh:mm:ss)', y_axis_label='Aircraft Altitude (ft)', x_axis_type='datetime') plot.line('DATETIME', 'ALTITUDE', source=source, line_width=2, color="blue") plot_styler(plot) return plot
def draw_plots_layouts(df): """ Grid layout plots with Box and Lasso select tools """ source = ColumnDataSource(df) color_mapper = CategoricalColorMapper( factors=['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], palette=['red', 'blue', 'green']) p1 = figure(x_axis_label='sepal length', y_axis_label='sepal width', tools='box_select,lasso_select') p1.circle('sepal length', 'sepal width', size=8, source=source, legend='class', color=dict(field='class', transform=color_mapper)) p2 = figure(x_axis_label='petal length', y_axis_label='petal width', tools='box_select,lasso_select') p2.circle('petal length', 'petal width', size=8, source=source, legend='class', color=dict(field='class', transform=color_mapper)) p2.legend.location = 'bottom_right' p3 = figure(x_axis_label='sepal length', y_axis_label='petal length', tools='box_select,lasso_select') p3.circle('sepal length', 'petal length', size=8, source=source, legend='class', color=dict(field='class', transform=color_mapper)) p3.legend.location = 'bottom_right' p4 = figure(x_axis_label='sepal width', y_axis_label='petal width', tools='box_select,lasso_select') p4.circle('sepal width', 'petal width', size=8, source=source, legend='class', color=dict(field='class', transform=color_mapper)) p4.legend.location = 'center_right' # Grid Layout row1 = [p1, p2] row2 = [p3, p4] layout = gridplot([row1, row2]) # Add the hover tool to the figure p ### To-DO ### output_file('gridplot_select_tools.html') show(layout)
def make_plot(): categ=['Visa Not Required','Visa On Arrival','ETA Required','eVisa Required','Visa Required','Selected Country'] color_mapper2 = CategoricalColorMapper(factors=categ, palette=[RdYlBu5[0], RdYlBu5[1],RdYlBu5[2],RdYlBu5[3],RdYlBu5[4],RdGy5[0]]) #Define custom tick labels for color bar. #tick_labels1 = {'0': 'Visa Not Required', '1': 'Visa On arrival', '3':'ETA Required', '4':'Visa Required', '5':'Admission Refused'} # #Create color bar. # color_bar = ColorBar(color_mapper=color_mapper2) # ColorBar(color_mapper=color_mapper2 , label_standoff=12, location=(0,0)) #Add hover tool hover = HoverTool(tooltips = [ ('Country/region','@country'),('travel category', '@cat')]) #Create figure object. p11 = figure(title = ('Where can a selected Country Travel to?'), plot_height = 600 , plot_width = 950, toolbar_location = None, tools = [hover]) p11.xgrid.grid_line_color = None p11.ygrid.grid_line_color = None #Add patch renderer to figure. p11.patches('xs','ys', source = geosource,fill_color = {'field' :'cat', 'transform' : color_mapper2}, line_color = 'black', line_width = 0.25, fill_alpha = 1) for factor, color in zip(color_mapper2.factors, color_mapper2.palette): p11.circle(x=[], y=[], fill_color=color, legend=factor) #Specify figure layout. return p11
def render_hist( df: pd.DataFrame, x: str, meta: ColumnMetadata, plot_width: int, plot_height: int ) -> Figure: """ Render a histogram """ if is_categorical(meta["dtype"]): tooltips = [ (x, "@x"), ("Count", "@count"), ("Label", "@label"), ] else: df = df.copy() df["repr"] = [ f"[{row.lower_bound:.0f}~{row.upper_bound:.0f})" for row in df.itertuples() ] tooltips = [ (x, "@repr"), ("Frequency", "@count"), ("Label", "@label"), ] cmapper = CategoricalColorMapper(palette=Category10[3], factors=LABELS) if is_categorical(df["x"].dtype): radius = 0.99 x_range = FactorRange(*df["x"].unique()) else: radius = df["x"][1] - df["x"][0] x_range = Range1d(df["x"].min() - radius, df["x"].max() + radius) y_range = Range1d(0, df["count"].max() * 1.05) fig = tweak_figure( Figure( x_range=x_range, y_range=y_range, plot_width=plot_width, plot_height=plot_height, tools="hover", toolbar_location=None, tooltips=tooltips, ) ) fig.vbar( x="x", width=radius, top="count", source=df, fill_alpha=0.3, color={"field": "label", "transform": cmapper}, legend_field="label", ) relocate_legend(fig, "right") return fig
def categorical_colormapper(self): if self.discrete_intrinsic: return CategoricalColorMapper( palette=bokeh.palettes.Category20[20], factors=list(self.score.value_counts().index)) else: return self.linear_colormapper
def plot_data(df): output_file("scatter.html") p = figure(plot_width=1000, plot_height=600) p.yaxis.axis_label = 'Length' p.xaxis.axis_label = 'Rating' #p.title = "Correlation of Book Length and Rating" print(df) color_mapper = CategoricalColorMapper(factors=df['top_genre'].unique(), palette=Set1[5]) # add a circle renderer with a size, color, and alpha for ind, genre in enumerate(df['top_genre'].unique()): p.circle(source=df[df['top_genre'] == genre], x='avg_rating_all_editions', y='num_pages', size=2, color=Set1[5][ind], legend=genre, alpha=0.5) p.legend.click_policy = "hide" # show the results show(p)
def plot_clustering(eval_folder, embeddings, embeddings_file, reverse_dictionary): """ Evaluate embeddings by visualizing the clustering plot :param eval_folder: folder in which to write analogy results :param embeddings: embedding matrix to evaluate :param embeddings_file: file in which the embedding matrix is stored :param reverse_dictionary: [keys=statement index, values=statement] """ print('\n--- Starting clustering plot') # Create folder in which to save plots folder_clusterplot = os.path.join(eval_folder, "clusterplot") if not os.path.exists(folder_clusterplot): os.makedirs(folder_clusterplot) if FLAGS.taglist is None: print('Taglist must be defined') return 1 print('Loading/creating labels') if FLAGS.newtags: flags_file = FLAGS.taglist + '.new' else: flags_file = FLAGS.taglist if os.path.exists(flags_file): print('Loaded from tags file', flags_file) [targets, labels] = pickle.load(open(flags_file, 'rb')) else: print('Recomputing tags file') targets, labels = create_tags(reverse_dictionary, embeddings) pickle.dump([targets, labels], open(flags_file, 'wb')) if FLAGS.tsne: embedding = TSNE(metric=FLAGS.metric, verbose=FLAGS.verbose).fit_transform(embeddings) np_file = os.path.join(folder_clusterplot, 'tsne_' + embeddings_file[:-2].replace('/', '_') + '.np') html_file = os.path.join(folder_clusterplot, 'tsne_' + embeddings_file[:-2].replace('/', '_') + '.html') else: embedding = umap.UMAP(metric=FLAGS.metric, verbose=FLAGS.verbose).fit_transform(embeddings) np_file = os.path.join(folder_clusterplot, 'umap_' + embeddings_file[:-2].replace('/', '_') + '.np') html_file = os.path.join(folder_clusterplot, 'umap_' + embeddings_file[:-2].replace('/', '_') + '.html') # Save plots to file embedding.tofile(np_file) output_file(html_file) print('Plotting') source = ColumnDataSource(dict( x=[e[0] for e in embedding], y=[e[1] for e in embedding], label=labels)) cmap = CategoricalColorMapper(factors=targets, palette=Category20[len(targets)]) p = figure(title="test umap") p.circle(x='x', y='y', source=source, color={"field": 'label', "transform": cmap}, legend='label') show(p)
def _plot_state_data(self): factors = ['OK', 'Arriving', 'Warn', 'Severe'] palette = [colors[4][0], colors[4][1], colors[4][2], colors[4][3]] color_mapper = CategoricalColorMapper(factors=factors, palette=palette) # color_mapper = LinearColorMapper(palette=palette) # color_mapper = LogColorMapper(palette=palette) viz_schema = dict( x=self.county_xs, y=self.county_ys, name=self.county_names, rate=self.county_rates, cats=self.rate_categories, ) TOOLS = "pan,wheel_zoom,reset,hover,save" p = figure( title="WA State, Covid19 Cases", tools=TOOLS, x_axis_location=None, y_axis_location=None, plot_width=800, plot_height=500, tooltips=[ ("Name", "@name"), ("Status", "@cats"), ("Confirmed Cases", "@rate"), ("(Long, Lat)", "($x, $y)") ]) p.grid.grid_line_color = None p.hover.point_policy = "follow_mouse" p.patches('x', 'y', source=viz_schema, fill_color={'field': 'cats', 'transform': color_mapper}, fill_alpha=0.7, line_color="white", line_width=0.5) show(p)
def make_color_map(palette, n, field, mode="linear"): """ Parameters ---------- palette : bokeh palette n : int field : str mode : 'linear', 'log' or 'categorical', default 'linear' Returns ------- cmap : dict """ if callable(palette): palette = palette(n) else: palette = palette[n] if mode == "linear": mapper = LinearColorMapper(low=0, high=n, palette=palette) elif mode == "log": mapper = LogColorMapper(low=0, high=n, palette=palette) elif mode == "categorical": mapper = CategoricalColorMapper(factors=[str(i) for i in range(n)], palette=palette) else: raise ValueError("Unrecognized mode.") return {"field": field, "transform": mapper}
def better_bokeh_chart(data, size_coeff, chart_title): '''make a chart with supplied data''' mapper = CategoricalColorMapper(factors=['AMERICA', 'ASIA', 'EUROPE'], palette=['#aa0000', '#00aa00', '#0000aa']) # define data source source = ColumnDataSource( data={ 'weight': [int(item[0]) for item in data], 'consumption': [round(item[1], 1) for item in data], 'origin': [item[3] for item in data], 'hp': [int(item[2]) for item in data], 'brand': [item[4] for item in data], 'model': [item[5] for item in data], 'year': [int(item[6]) for item in data], 'size': [item[2] * size_coeff for item in data], }) # chart defaults color = "#111e6c" # define info to be shown when hovering over the points hover = HoverTool(tooltips=[ ('brand', '@brand'), ('model', '@model'), ('year', '@year'), ('origin', '@origin'), ('weight [kg]', '@weight'), ('fuel consumption [l/100km]', '@consumption'), ('engine power [HP]', '@hp'), ]) # create a scatter plot fig = figure(title=chart_title, tools=[hover, 'pan', 'wheel_zoom'], x_axis_label='weight [kg]', y_axis_label='fuel consumption [l/100 km]') fig.scatter('weight', 'consumption', source=source, marker="circle", color={ 'field': 'origin', 'transform': mapper }, size='size', alpha=0.5, legend='origin') fig.legend.location = 'top_left' #grab the static resources js_resources = INLINE.render_js() css_resources = INLINE.render_css() #get the resources from the figure components script, div = components(fig) return script, div, js_resources, css_resources
def getCategoricalColorMapperObj(colorMapperData): if (colorMapperData): factors = colorMapperData['factors'] palette = colorMapperData['palette'] transform = CategoricalColorMapper(factors=factors, palette=palette) field = colorMapperData['field']['index'] return dict(field=field, transform=transform) return None
def scatter_bokeh(self, x, y, x_axis_label, y_axis_label, hue='continent', palette_category='Category10', tools='box_select,lasso_select,pan,wheel_zoom,box_zoom', source=None, x_range=None, y_range=None, maintime=None): if source == None and maintime == None: source = ColumnDataSource(data=self.dataframe) list_of_hues = list([str(k) for k in self.dataframe[hue].unique()]) pal=d3[palette_category][len(list_of_hues)] color_mapper = CategoricalColorMapper(factors=list_of_hues, palette=pal) else: source=source list_of_hues = list([str(k) for k in self.dataframe.loc[maintime][hue].unique()]) pal=d3[palette_category][len(list_of_hues)] color_mapper = CategoricalColorMapper(factors=list_of_hues, palette=pal) if x_range == None and y_range==None: fig = figure(x_axis_label=x_axis_label, y_axis_label=y_axis_label, tools=tools) else: fig = figure(x_axis_label=x_axis_label, y_axis_label=y_axis_label, tools=tools, x_range=x_range, y_range=y_range) fig.circle(source=source, x=x, y=y, color={'field':hue, 'transform':color_mapper}, legend=hue, selection_alpha=1, nonselection_alpha=0.1, hover_alpha=0.5, alpha=1) return fig
def render_scatter( itmdt: Intermediate, plot_width: int, plot_height: int, palette: Sequence[str] ) -> Figure: """ Render scatter plot with a regression line and possible most influencial points """ # pylint: disable=too-many-locals df = itmdt["data"] xcol, ycol, *maybe_label = df.columns tooltips = [(xcol, f"@{{{xcol}}}"), (ycol, f"@{{{ycol}}}")] fig = Figure( plot_width=plot_width, plot_height=plot_height, toolbar_location=None, title=Title(text="Scatter Plot & Regression Line", align="center"), tools=[], x_axis_label=xcol, y_axis_label=ycol, ) # Scatter scatter = fig.scatter(x=df.columns[0], y=df.columns[1], source=df) if maybe_label: assert len(maybe_label) == 1 mapper = CategoricalColorMapper(factors=["=", "+", "-"], palette=palette) scatter.glyph.fill_color = {"field": maybe_label[0], "transform": mapper} scatter.glyph.line_color = {"field": maybe_label[0], "transform": mapper} # Regression line coeff_a, coeff_b = itmdt["coeffs"] line_x = np.asarray([df.iloc[:, 0].min(), df.iloc[:, 0].max()]) line_y = coeff_a * line_x + coeff_b fig.line(x=line_x, y=line_y, line_width=3) # Not adding the tooltips before because we only want to apply tooltip to the scatter hover = HoverTool(tooltips=tooltips, renderers=[scatter]) fig.add_tools(hover) # Add legends if maybe_label: nidx = df.index[df[maybe_label[0]] == "-"][0] pidx = df.index[df[maybe_label[0]] == "+"][0] legend = Legend( items=[ LegendItem(label="Most Influential (-)", renderers=[scatter], index=nidx), LegendItem(label="Most Influential (+)", renderers=[scatter], index=pidx), ], margin=0, padding=0, ) fig.add_layout(legend, place="right") return fig
def _lisa_cluster_fig(geo_source, moran_loc, cluster_labels, colors5, bounds, region_column='', title=None, plot_width=500, plot_height=500, tools=''): # make data aspect ration match the figure aspect ratio # to avoid map distortion (1km=1km) x_min, x_max, y_min, y_max = calc_data_aspect(plot_height, plot_width, bounds) # Create figure fig = figure(title=title, toolbar_location='right', plot_width=plot_width, plot_height=plot_height, x_range=(x_min, x_max), y_range=(y_min, y_max), tools=tools) fill_color = { 'field': 'labels_lisa', 'transform': CategoricalColorMapper(palette=colors5, factors=cluster_labels) } fig.patches('xs', 'ys', fill_color=fill_color, fill_alpha=0.8, nonselection_fill_alpha=0.2, nonselection_fill_color=fill_color, line_color='white', selection_line_color='firebrick', selection_fill_color=fill_color, line_width=0.5, source=geo_source) if 'hover' in tools: # add hover tool hover = fig.select_one(HoverTool) hover.point_policy = "follow_mouse" hover.tooltips = [("Region", "@" + region_column), ("Significance", "@moranloc_psim{0.00}"), ("Quadrant", "@moranloc_q{0}")] # add legend with add_legend() add_legend(fig, cluster_labels, colors5) # change layout fig.xgrid.grid_line_color = None fig.ygrid.grid_line_color = None fig.axis.visible = None return fig
def map_test(): # Import bokeh areas with open('counties', 'rb') as in_strm: counties = dill.load(in_strm) # Generate current values df = pd.read_csv('map_test.csv') df.startDate = pd.to_datetime(df.startDate) # Inputs for the plot county = list(df['county']) predicted = model.predict(df) # Generate plot county_xs = [c["lons"] for c in counties.values()] county_ys = [c["lats"] for c in counties.values()] county_cs = [c["name"] for c in counties.values()] indexes = [county.index(c) for c in county_cs] risk = [predicted[i] for i in indexes] color_mapper = CategoricalColorMapper(palette=["red", "green"], factors=[True, False]) source = ColumnDataSource(data=dict( x=county_xs, y=county_ys, name=county_cs, risk=risk, )) TOOLS = "pan,wheel_zoom,reset,hover,save" p = figure( title="Current risk of fire in California", tools=TOOLS, x_axis_location=None, y_axis_location=None ) p.grid.grid_line_color = None p.patches('x', 'y', source=source, fill_color={'field': 'risk', 'transform': color_mapper}, fill_alpha=0.7, line_color="white", line_width=0.5) hover = p.select_one(HoverTool) hover.point_policy = "follow_mouse" hover.tooltips = [ ("County", "@name") ] script, div = components(p) return render_template("map.html", script=script, div=div)
def make_figure(d): #arg{'fcnn','rf','en'} mapper2 = CategoricalColorMapper(palette=['white', '#933b41'], factors=['free_flow', 'congestion']) for k in ['fcnn', 'rf', 'en']: source = ColumnDataSource(d) p = figure(title='{}_0{}_{}'.format(highway, direction, date), x_range=list(sorted(set(d['kp']))), y_range=Range1d( start=list(reversed(list(sorted(set(d['dtime'])))))[0], end=list(reversed(list(sorted(set(d['dtime'])))))[-1]), y_axis_type='datetime', plot_height=288 * 3, x_axis_location='above', plot_width=20 * len((set(d['kp'])))) p.rect('kp', 'dtime', width=1, height=1000 * 60 * 5, source=source, fill_color={ 'field': '{}_status'.format(k), 'transform': mapper2 }, fill_alpha=0.5, line_color=None, legend='{}_status'.format(k)) p.xaxis.axis_label = 'KP(km)' #p.xaxis.formatter= PrintfFormatter(format='{}') p.yaxis.axis_label = 'Time' p.yaxis.formatter = DatetimeTickFormatter(hours=['%H:%M'], days=['%H:%M'], months=['%H:%M'], years=['%H:%M']) p.ygrid[0].ticker.desired_num_ticks = 24 p.toolbar.logo = None p.xaxis.major_label_orientation = 45 p.toolbar.logo = None p.toolbar_location = None try: os.mkdir('../../summary/result_/_pngfile/{}'.format(k)) except OSError: pass try: os.mkdir('../../summary/result_/_pngfile/{}/{}_0{}'.format( k, highway, direction)) except OSError: pass export_png( p, filename='../../summary/result_/_pngfile/{}/{}_0{}/{}_0{}_{}.png'. format(k, highway, direction, highway, direction, date))
def initialize_data(self): self.df_rfm = pd.read_csv('rfm_data.csv') #self.df_rfm.sort_values('M_TotSum') self.cID = 'CustID' self.cf1 = 'RecencyScore' self.cf2 = 'NrTransactions' self.cf3 = 'TotalValue' self.cf3_log = 'LogTotalValue' self.cfc = 'Segment' self.nr_cl = 4 self.nr_tree_lvl = 3 self.levels = ['AVERAGE', 'GOOD', 'VIP', 'LOW'] self.origin_fields = [self.cf1, self.cf2, self.cf3] self.scale_fields = [self.cf1, self.cf2, self.cf3_log] self.cluster_fields = ['R_Score', 'F_Score', 'M_Score'] # log transform total value self.df_rfm[self.cf3_log] = np.log(self.df_rfm[self.cf3]) if True: # use MinMax scaler minmaxScaler = preprocessing.MinMaxScaler() np_arr = minmaxScaler.fit_transform(self.df_rfm[self.scale_fields]) else: # user Standardization scaler (X-mean)/std np_arr = preprocessing.scale(self.df_rfm[self.scale_fields]) print("Initial object shape: {}".format(self.df_rfm.shape)) self.df_rfm = pd.concat([ self.df_rfm, pd.DataFrame(data=np_arr, columns=self.cluster_fields) ], axis=1) print("Final object shape: {} \n columns:{}".format( self.df_rfm.shape, self.df_rfm.columns)) res_set = self.generate_clusters(self.df_rfm, nr_clusters=self.nr_cl, predictors=self.cluster_fields, cluster_column=self.cfc) self.df_rfm, clf = res_set clf_tree, acc = self.train_tree(df_rfm, predictors=self.origin_fields, label_column=self.cfc, nr_lvl=self.nr_tree_lvl) labels = list(np.unique(df_rfm[self.cfc])) self.color_mapper = CategoricalColorMapper(factors=list(range(8)), palette=pal)
def make_plot(store_now, store_future, ecom_zipcode_all): # Source: http://www.bigendiandata.com/2017-06-27-Mapping_in_Jupyter/ from bokeh.io import output_file, output_notebook, show from bokeh.models import GMapOptions, ColumnDataSource, CategoricalColorMapper, HoverTool, LassoSelectTool from bokeh.palettes import RdBu3 from bokeh.plotting import gmap map_options = GMapOptions(lat=42.37, lng=-71.23, map_type="roadmap", zoom=10) plot = gmap(map_options=map_options, google_api_key='AIzaSyCrnuAv4K0j80AZzUsYFS2NwyY49-yMXRI', plot_width=780, plot_height=780, output_backend="webgl") plot.title.text = "PUMA Retail Store and Ecommerce Transactions" mapper = CategoricalColorMapper( factors=['Unreached', 'Reached', 'Future Reach'], palette=[RdBu3[1], RdBu3[0], RdBu3[2]]) plot1 = plot.square(x="lng", y="lat", size=20, color='blue', source=store_now) plot2 = plot.square(x="lng", y="lat", size=20, color='red', source=store_future) plot3 = plot.circle(x="lng", y="lat", size='Size', fill_color={ 'field': 'inrange', 'transform': mapper }, source=ecom_zipcode_all, legend='inrange') tooltips1 = [("Ship To ZipCode", "@store_zip"), ("Ship To City", "@in_city"), ('Ecom Transactions', '@Transactions')] plot.add_tools(HoverTool(tooltips=tooltips1, renderers=[plot3])) tooltips3 = [("Store ZipCode", "@store_zip"), ("City located", "@in_city"), ('Ecom Transactions in range', '@Transactions')] plot.add_tools(HoverTool(tooltips=tooltips3, renderers=[plot2])) plot.add_tools(LassoSelectTool()) return plot
def update_value(attr, old, new): state_data1 = whole[whole.state == text.value] categoricaldata1 = state_data1.categories.str.contains(search.value) open_data1 = state_data1.loc[ state_data1[select_day.value.lower()] != 'None'] new_data1 = open_data1.where(categoricaldata1).dropna( thresh=18).sort_values(by=['stars'], ascending=False) update_data = { 'x': new_data1.longitude, 'y': new_data1.latitude, 'rating': new_data1.stars.astype(str), 'state': new_data1.state, 'name': new_data1.name, 'address': new_data1.address, 'city': new_data.city, 'state': new_data.state, 'Zip': new_data.postal_code, 'status': new_data1.is_open.astype(str), 'Day': new_data1.name } histo_source1 = Counter(new_data1.stars) keys1, values1 = list(histo_source1.keys()), list(histo_source1.values()) update_data1 = {'x1': keys1, 'y1': values1} total1 = sum(histo_source1.values()) great1 = histo_source1[4.0] + histo_source1[4.5] + histo_source1[5.0] ratio12 = great1 / total1 update_data2 = { 'start': [0, ratio12 * 2 * pi], 'end': [ratio12 * 2 * pi, 2 * pi], 'ratio': [ratio12, 1 - ratio12], 'color': ['darkgreen', 'darkred'] } source.data = update_data source1.data = update_data1 pie_source1.data = update_data2 xmin, xmax = (new_data1.longitude.median() - 10), (new_data1.longitude.median() + 10) ymin, ymax = (new_data1.latitude.median() - 5), (new_data1.latitude.median() + 5) rating_list = new_data1.stars.unique().astype(str).tolist() palettes_list = len(rating_list) if palettes_list < 3: mapper == False else: mapper = CategoricalColorMapper(palette=RdYlGn[palettes_list], factors=rating_list) plot.title.text = 'Yelp Rating about %s in %s on %s' % ( search.value, text.value, select_day.value)
def create_scatter_plot(main_df): """ This plot is used to depict the GDP vs Life Expectancy of countries in different regions of the world. The size of the buble is influenced by the population of a country and the colour depicts the region """ fig = figure(title='Life Expectancy Against GDP in 2013', x_axis_type='log', x_range=(200, 200000), y_range=(20, 100), plot_width=1300, plot_height=400) data_2013 = main_df.loc[2013] source = ColumnDataSource( dict(x=data_2013.gdp, y=data_2013.life, country=data_2013.Country, population=data_2013.population, region=data_2013.region)) hover = HoverTool(tooltips="@country : $@x") size_mapper = LinearInterpolator( x=[main_df.population.min(), main_df.population.max()], y=[5, 75]) colour_mapper = CategoricalColorMapper(factors=list( main_df.region.unique()), palette=Spectral6) fig.circle(x='x', y='y', size={ 'field': 'population', 'transform': size_mapper }, color={ 'field': 'region', 'transform': colour_mapper }, alpha=0.7, legend='region', source=source) fig.xaxis[0].formatter = NumeralTickFormatter(format="$0,") fig.add_tools(hover) fig.legend.location = 'top_center' fig.legend.orientation = "horizontal" fig.xaxis.axis_label = "GDP Per Capita" fig.yaxis.axis_label = "Life Expectancy" return fig
def map_colors(data, sns_palette, n_colors): pal = sns.color_palette( sns_palette, n_colors) # 3 is the number of colors to extract from the palette colors = pal.as_hex( ) # get the values of those colors. We could also have written the name/numbers of some colors #print(colors) # you can observe that this is just a string of color values colormap = CategoricalColorMapper(palette=colors, factors=data['Country'].unique()) return colormap, colors
def make_plot(legend=False): source = ColumnDataSource( dict(x=data.loc[1950].income, y=data.loc[1950].life, country=data.loc[1950].Country, population=data.loc[1950].population, region=data.loc[1950].region)) if legend: width = 800 else: width = 600 p = figure(height=400, width=width, x_axis_type='log', x_range=(100, 100000), y_range=(0, 100), title='Gapminder', x_axis_label='Income', y_axis_label='Life expectancy', toolbar_location=None, tools=[HoverTool(tooltips='@country', show_arrow=False)]) label = Label(x=100, y=0, text=str(1950), text_font_size='70pt', text_color='#eeeeee') p.add_layout(label) size_mapper = LinearInterpolator( x=[data.population.min(), data.population.max()], y=[5, 50]) color_mapper = CategoricalColorMapper( factors=list(data.region.unique()), palette=Spectral6, ) p.xaxis[0].formatter = NumeralTickFormatter(format="$0,") p.circle(x='x', y='y', size={ 'field': 'population', 'transform': size_mapper }, color={ 'field': 'region', 'transform': color_mapper }, alpha=0.6, source=source, legend='region') p.legend.border_line_color = None p.right.append(p.legend[0]) if legend: p.legend.location = (5, -15) else: p.legend.visible = False return p, source, label
def deaths(): if select.value == 'latest': result4 = data.loc[data["Date_reported"] == ( data["Date_reported"].max() - datetime.timedelta(days=1)).strftime('%Y/%m/%d')] else: result4 = data.loc[data["Date_reported"] == select.value] result4 = data.loc[data["Date_reported"] == select.value] result4 = result4.sort_values(" Cumulative_cases", ascending=False) yesterday = data.loc[data["Date_reported"] == ( result4['Date_reported'].max() - datetime.timedelta(days=1))] yesterday = yesterday[[" Cumulative_cases", " Country"]] yesterday = yesterday.rename( columns={' Cumulative_cases': 'yesterday_cases'}, inplace=False) result4 = pd.merge(result4, yesterday, left_on=' Country', right_on=' Country', how="left") result4['dzienny_przyrost'] = ( result4[' Cumulative_cases'] - result4['yesterday_cases']) / result4[' Cumulative_cases'] result4['dzienny_przyrost'] = result4['dzienny_przyrost'].round(decimals=3) plt.x_range.factors = [] plt.x_range.factors = list(result4[' Country_code'][0:20]) source2 = ColumnDataSource( data={ 'kraj': list(result4[' Country'][0:20]), 'kod': list(result4[' Country_code'][0:20]), 'cases': list(result4[' Cumulative_cases'][0:20]), 'new': list(result4[' New_cases'][0:20]), 'share': list(result4['share_of_all'][0:20]), 'deaths': list(result4[' New_deaths'][0:20]), 'increment': list(result4['dzienny_przyrost'][0:20]), 'in_pop': list(result4['udzial_w_populacji'][0:20]) }) source.data = dict(source2.data) table = result4.copy() table["Date_reported"] = table["Date_reported"].dt.strftime('%d/%m/%Y') table = table[[ 'Date_reported', ' Country', ' Country_code', ' New_cases', ' Cumulative_cases', ' New_deaths', ' Cumulative_deaths', ' WHO_region', 'udzial_w_populacji' ]] Columns = [TableColumn(field=Ci, title=Ci) for Ci in table.columns] data_table_source2 = ColumnDataSource(table) data_table_source.data = dict(data_table_source2.data) b = result4.sort_values(" New_deaths", ascending=False)[0:20] b = b[0:3] lista = list(b[' Country_code']) mapper = CategoricalColorMapper( palette=['red', 'red', 'red'], factors=list(result4.loc[result4[' Country_code'].isin(lista)] [' Country'][0:3])) a.glyph.fill_color = dict(field='kraj', transform=mapper)
def show_embedding(emb, alldat, res, savefig=False, showfig=True): emb.rename(columns={ emb.columns[0]: "x", emb.columns[1]: "y" }, inplace=True) emb['turns'] = [str(x) for x in alldat.turns] emb['index'] = [str(x) for x in alldat.index] emb['image'] = list(map(embeddable_image, res)) datasource = ColumnDataSource(emb) #color_mapping = CategoricalColorMapper(factors=[str(9 - x) for x in digits.target_names], #palette=Spectral10) color_mapping = CategoricalColorMapper( factors=np.unique(emb['specie']).tolist(), palette=[ '#1F77B4', '#FF7F0E', '#2CA02C', '#D62728', '#9467BD', '#8C564B' ]) plot_figure = figure( title='UMAP projection on diffusion components of all tracks', plot_width=900, plot_height=900, tools=('pan, wheel_zoom, reset')) plot_figure.title.text_font_size = '18pt' plot_figure.add_tools( HoverTool(tooltips=""" <div> <div> <img src='@image' style='float: left; margin: 5px 5px 5px 5px'/> </div> <div> <span style='font-size: 16px'>@specie</span> <span style='font-size: 16px'>turns: @turns</span> <span style='font-size: 16px'>idx: @index</span> </div> </div> """)) plot_figure.circle('x', 'y', source=datasource, color=dict(field='specie', transform=color_mapping), line_alpha=0.6, fill_alpha=0.6, size=4) if savefig: output_file("UMAP_exploration.html", mode="inline") save(plot_figure, resources="inline") if showfig: show(plot_figure)
def make_dataset(select_site, text_input, slider_time): global color_map, convert_t, groupby path = '/Users/farewell/Documents/EMCT/WOT/Bokeh_app/data/' df = pd.read_csv(path + select_site.value + '.csv') fun = pd.read_csv(path + '2017_format.csv') # checke time time_start = slider_time.value[0] time_end = slider_time.value[1] assert time_start < time_end, "Start must be less than end!" # filter data df = df[(df['時間'] >= time_start) & (df['時間'] <= time_end)] # calculate thershold converted by linear regression fun = fun[fun['Location'] == select_site.value].values funList = fun.tolist() thersheld = abs(float(text_input.value)) convert_t = (thersheld - funList[0][1]) / funList[0][2] # condition defined condition = [] moveValue = list(df['移動平均']) hourValue = list(df['小時值']) for ix in range(len(df)): #if value > if moveValue[ix] >= float( text_input.value) and hourValue[ix] >= round(convert_t, 2): condition.append('TP') elif moveValue[ix] >= float( text_input.value) and hourValue[ix] < round(convert_t, 2): condition.append('FP') elif moveValue[ix] < float( text_input.value) and hourValue[ix] > round(convert_t, 2): condition.append('FN') elif moveValue[ix] < float( text_input.value) and hourValue[ix] < round(convert_t, 2): condition.append('TN') else: condition.append('ERROR') df['cat'] = condition df = df.loc[:, ['移動平均', '小時值', 'cat']] groupby = df.groupby('cat')['小時值'].size() # color palette = d3['Category10'][len(df['cat'].unique())] color_map = CategoricalColorMapper(factors=df['cat'].unique(), palette=palette) return ColumnDataSource(df)