def get_plot(df): #Make plot and customize p = Scatter(df, x='sepal_length', y='sepal_width', xlabel='Sepal Length [cm]', ylabel='Sepal Width [cm]', title='Sepal width vs. length') p.xaxis.axis_label_text_font_size = "14pt" p.xaxis.major_label_text_font_size = '10pt' p.yaxis.axis_label_text_font_size = "14pt" p.yaxis.major_label_text_font_size = '10pt' p.title.text_font_size = '16pt' p.add_tools(HoverTool()) #Need to configure tooltips #Return the plot return(p)
def main(): """ This function will -load data from a csv -impute missing data with the column's mean -perform kmneans clustering -produce an html scatter plot """ #load data from a CSV to a dataframe with open(settings["crime_data"]) as in_data: crime_data = pd.DataFrame.from_csv(in_data, sep=',') crime_data=crime_data.fillna(value=-999) #load all numeric data into an array. The offense column from the crime data #is excluded as_array = np.asfarray(crime_data[["X","Y"]]) #number of groups n_clusters=40 #Correct missing data imputer = Imputer(missing_values=-999, strategy="mean") patched = imputer.fit_transform(as_array) #cluster data cluster = KMeans(n_clusters=n_clusters) cluster.fit(patched) #assigned grouped labels to the crime data labels = cluster.labels_ crime_data["labels"]=labels pdict = create_ordered_dict(crime_data, "labels") crime_data.to_csv(r'C:\users\andrew_woizesko\desktop\knn.csv') np.savetxt(r'C:\users\andrew_woizesko\desktop\centers.csv', cluster.cluster_centers_) #location of output graph file_name = os.path.join("..", 'tests', "kmeans_clusters_{0}.html".format(time_stamp())) output_file(file_name) #create out graph TOOLS="pan,wheel_zoom,box_zoom,reset" scatter = Scatter(pdict.values(), title="Crime Clusters", filename=file_name, tools=TOOLS) scatter.show()
def create_chart(df, attr1, attr2): values = {'PTS': 15, 'TRB': 10, 'AST': 7, 'BLK': 1, 'STL': 1} full_names = { 'PTS': 'Points', 'TRB': 'Rebounds', 'AST': 'Assists', 'BLK': 'Blocks', 'STL': 'Steals' } query_string1 = attr1 + " > " + str(values[attr1]) query_string2 = attr2 + " > " + str(values[attr2]) df = df.query(query_string1) df = df.query(query_string2) label1 = full_names[attr1] + ' PER 36' label2 = full_names[attr2] + ' PER 36' tooltips = [('Player', '@Player'), (attr1, '@' + attr1), (attr2, '@' + attr2), ('Team', '@Tm')] p = Scatter(df, x=attr1, y=attr2, xlabel=label1, ylabel=label2, tooltips=tooltips) output_file('stats.html') show(p)
def scatter(df,x_axis,y_axis,title): plot = Scatter(df, x=x_axis, y=y_axis, color=x_axis, title=title, legend='top_right', xlabel=x_axis, ylabel=y_axis) return plot
def Test(): with sqlite3.connect('DublinMarathons.db') as conn: cursor = conn.cursor() cursor.execute(""" Select Splits.Split, Splits.Split_Start_Time, Splits.Split_Finish_Time, Splits.Split_Duration, RaceDetails.Year, Runner.Name from ((Splits INNER JOIN Runner ON Splits.RunnerID=Runner.RunnerID) INNER JOIN RaceDetails ON Splits.RaceID=RaceDetails.RaceID) where Runner.Name="Barry,Smyth"; """) data = [] for row in cursor.fetchall(): data.append(row) df = pd.DataFrame(data, columns=['Split', 'Split_Start_Time', 'Split_Finish_Time', 'Split_Duration', 'Year', 'Name']) df.Split_Duration = df.Split_Duration.map(Convert) TOOLS = "hover" output_file('Scatter.html') source=ColumnDataSource(data=df) hover = HoverTool(tooltips=[ ("Name", "$Name"), ("Split", "$Split"), ("StartTime", "$Split_Start_Time"), ("FinishTime", "$Split_Finish_Time") ]) p = Scatter(df, x='Split_Duration', y='Year', tools=TOOLS) show(p)
def abc_detail(abc_id): history = app.config["HISTORY"] history.id = abc_id abc = ABCInfo(history.get_abc()) model_probabilities = history.get_model_probabilities() model_ids = model_probabilities.columns model_probabilities.columns = list( map(lambda x: "{}".format(x), model_probabilities.columns)) model_probabilities = model_probabilities.reset_index() if len(model_probabilities) > 0: populations = history.get_all_populations() populations = populations[populations.t >= 0] particles = ( history.get_nr_particles_per_population().reset_index().rename( columns={ "index": "t", "t": "particles" }).query("t >= 0")) melted = pd.melt(model_probabilities, id_vars="t", var_name="m", value_name="p") prob_plot = Bar(melted, label="t", stack="m", values="p") prob_plot.ylabel = "p" plot = Tabs(tabs=[ Panel(child=prob_plot, title="Probability"), Panel(child=Scatter(x="t", y="nr_samples", data=populations), title="Samples"), Panel(child=Scatter(x="t", y="particles", data=particles), title="Particles"), Panel(child=Scatter(x="t", y="epsilon", data=populations), title="Epsilon") ]) plot = PlotScriptDiv(*components(plot)) return render_template("abc_detail.html", abc_id=abc_id, plot=plot, BOKEH=BOKEH, model_ids=model_ids, abc=abc) return render_template("abc_detail.html", abc_id=abc_id, plot=PlotScriptDiv("", "Exception: No data found."), BOKEH=BOKEH, abc=abc)
def createBokehChart(self): data = self.getWorkingPandasDataFrame() return Scatter(data, x=self.getKeyFields()[0], y=self.getValueFields()[0], xlabel=self.getKeyFields()[0], ylabel=self.getValueFields()[0], legend=self.showLegend(), color=self.options.get("color"))
def create_plot(brand='AMC'): data_df = df_auto[df_auto.brand == brand] p = Scatter(data_df, x='trunk', y='length', title="Length vs Trunk", legend="top_left", xlabel="Trunk", ylabel="Length") show(p)
def plot(): ticker = request.form['name_ticker'] apicall = 'https://www.quandl.com/api/v3/datasets/WIKI/' + ticker + '/data.csv?column_index=4&start_date=2012-11-01&end_date=2013-11-30' apikey = '&api_key=yRdMoLRR-tk-oNmDdQpd' strcall = apicall + apikey response = requests.get(strcall) df = pd.read_csv(io.BytesIO(response.content), delimiter=',', sep="\n") #prices = (df.columns, df.shape) p = Scatter(df, x='sepal_length', y='sepal_width', title='Sepal width vs. length') p.title.text_font_size = '16pt' p.add_tools(HoverTool()) #Need to configure tooltips for a good HoverTool script, div = components(p) return render_template('home.html', script=script, div=div)
def scatter(self, dataframe, x=None, y=None, width=None, height=None, color=None, title=None, xaxis_label=None, yaxis_label=None, label=None): color = self.__default_options__.get('color', None) if color is None else color width = self.__default_options__.get('width', None) if width is None else width width, height = self._width_height(width, height) scatter = Scatter(dataframe, x=x, y=y, width=width, height=height, color=color, title=title, tools=TOOLS + ',hover' if label else '') if label: hover = scatter.select_one(dict(type=HoverTool)) hover.tooltips = [("Id", "@%s" % label)] renderer = scatter.select_one(dict(type=GlyphRenderer)) renderer.data_source.data[label] = dataframe[label].tolist() if xaxis_label: scatter._xaxis.axis_label = xaxis_label if yaxis_label: scatter._yaxis.axis_label = yaxis_label return scatter
def bokeh_high_level_scatter(ag2): pal = [ '#7fc97f', '#beaed4', '#fdc086', '#ffff99', '#386cb0', '#f0027f', '#bf5b17' ] tooltips = [("Cat", "@Cat"), ("Dog", "@Dog"), ("Livestock", "@Livestock"), ("ZipCode", "@ZipCode")] s = Scatter(ag2, x='Cat', y='Dog', color=color('Livestock', palette=pal), tooltips=tooltips) output_file('bokeh_high_scatter.html') show(s)
def eitc_plot(eitc): '''This plots earned eitc as a function of earned income''' p = Scatter(eitc, x='earned_income', y='eitc', color='blue', title="EITC for Earned Income", legend='top_right', xlabel="earned_income", ylabel="eitc") output_file("eitc.html") show(p)
def scatter_plot(X, Y, xlabel='', ylabel='', title='', height=3): dict = {'X': X, 'Y': Y} df = pd.DataFrame(dict) plot = Scatter(df, 'X', 'Y', title=title, plot_height=int(height * ht), sizing_mode='scale_width') plot.xaxis.axis_label = xlabel plot.yaxis.axis_label = ylabel return plot
def main(): # check recalculation request if 'recalculate' in request.args: if request.args.get('recalculate') == 'True': betalyzer.recalculate() # build sector betas bar chart sector_betas = betalyzer.df_tickers.groupby('sector')['beta'].mean() bk_sector_betas = Bar(sector_betas, plot_width=550, plot_height=400, legend=None) bk_sector_betas_script, bk_sector_betas_div = components(bk_sector_betas) # build market cap betas bar chart mktcap_betas = betalyzer.df_tickers.groupby( 'market_cap_decile')['beta'].mean() bk_mc_betas = Bar(mktcap_betas, plot_width=550, plot_height=400, legend=None) bk_mc_betas_script, bk_mc_betas_div = components(bk_mc_betas) # build market cap scatter plot scatter = Scatter(betalyzer.df_tickers, x='market_cap_log', y='beta', plot_width=550, plot_height=400) scatter_script, scatter_div = components(scatter) # build line plot for top three stocks top_tickers = betalyzer.df_tickers['ticker'].head(3) bk_history = Line(betalyzer.df_betas[top_tickers], plot_width=550, plot_height=400) bk_history_script, bk_history_div = components(bk_history) return render_template( 'main.html', dt_tickers=betalyzer.df_tickers.to_dict(orient='records'), bk_sector_betas_script=bk_sector_betas_script, bk_sector_betas_div=bk_sector_betas_div, bk_mc_betas_script=bk_mc_betas_script, bk_mc_betas_div=bk_mc_betas_div, scatter_script=scatter_script, scatter_div=scatter_div, bk_history_script=bk_history_script, bk_history_div=bk_history_div)
def scatter(): s = Scatter( flowers, title="Fisher's Iris data set", tools='tap,box_select,save', x=blend('petal_length', name='Length'), y=blend('petal_width', name='Width'), color='species', palette=Spectral4, legend=True, ) # Lets move the legend off-canvas! legend = s.legend[0] legend.border_line_color = None legend.orientation = 'horizontal' legend.location = (0, 0) s.above.append(legend) return s
def kmeanspp(X, k, num_iter=100): centroids = init_centroids(X, k) for _ in range(num_iter): D = assign_to_clusters(X, centroids) centroids = recalc_centroids(X, D) df = pd.DataFrame(X) D = [d[1] for d in D] df = pd.concat([df, pd.Series(D)], axis=1) df.columns = ['x', 'y', 'cluster'] p = Scatter(df, x='x', y='y', color='cluster') show(p)
def plot_2d_scatter(df, vectorizer, text='Body', to_plot='Label'): X = vectorizer.transform(df[text]) cos_dist = 1 - cosine_similarity(X.todense()) mds = MDS(n_components=2, dissimilarity="precomputed", random_state=0) pos = mds.fit_transform(cos_dist) # shape (n_components, n_samples) distance_df = pd.DataFrame(pos, columns=['x', 'y']) distance_df[to_plot] = df[to_plot] p = Scatter(distance_df, x='x', y='y', title="MDS: White House Posts", color=to_plot, legend="top_right") show(p)
def generate_scatter(table_data, form_data): """Generate scatter plot.""" if form_data['marker'] == '': form_data['marker'] = None if form_data['color'] == '': form_data['color'] = None plot = Scatter(table_data, x=form_data['x'], y=form_data['y'], title=form_data['x'] + ' vs ' + form_data['y'], color=form_data['color'], marker=form_data['marker'], tools='pan,wheel_zoom,box_zoom,reset,resize,hover,save') plot.title.text_font_style = "bold" output_file("output.html") save(plot) return build_html()
def plt_scatter1(data, param_dict, mode='object', output_path=None): #to_date(data,'%Y-%m-01') tooltips = create_tooltips(param_dict) title = "{} by {} (dot colors show {})".format(param_dict['y'].upper(), param_dict['x'].upper(), param_dict['color'].upper()) try: color = param_dict['color'] except KeyError: color = 'red' # p = figure(tooltips=tooltips) # p.scatter(data=data, x=param_dict['x'], y=param_dict['y'], color=color, title=title, # xlabel=param_dict['x'], ylabel=param_dict['y'], # plot_width=1000, plot_height=600, tooltips = tooltips, # legend_sort_field = 'color', # legend_sort_direction = 'ascending') p = Scatter(data=data, x=param_dict['x'], y=param_dict['y'], color=color, title=title, xlabel=param_dict['x'], ylabel=param_dict['y'], plot_width=1000, plot_height=600, tooltips=tooltips, legend_sort_field='color', legend_sort_direction='ascending') p.legend.background_fill_alpha = 0.8 try: p.radius = param_dict['size'] except KeyError: try: p.marker = param_dict['color'] except KeyError: pass if type(data[0][param_dict['x']]) == str: x_rng = sort_axis(data, param_dict['x'], False, True) p.x_range = x_rng if type(data[0][param_dict['y']]) == str: y_rng = sort_axis(data, param_dict['y'], False, True) p.y_range = y_rng return do_output(p, mode, output_path)
def ticker(ticker): # build line line = Line(betalyzer.df_betas[ticker], plot_width=1000, plot_height=400) bokeh_script, bokeh_div = components(line) # build scatter scatter = Scatter(betalyzer.df_changes.head(betalyzer.window), x=betalyzer.market, y=ticker, plot_width=550, plot_height=400) scatter_script, scatter_div = components(scatter) # build histogram df_hist = betalyzer.df_changes[[ticker, 'SPY']].head(500).unstack().reset_index() df_hist.rename(columns={'level_0': 'ticker', 0: 'change'}, inplace=True) hist = Histogram(df_hist, values='change', color='ticker', bins=20, legend='top_right', plot_width=550, plot_height=400) hist_script, hist_div = components(hist) return render_template( 'ticker.html', ticker=ticker, bokeh_script=bokeh_script, bokeh_div=bokeh_div, scatter_script=scatter_script, scatter_div=scatter_div, hist_script=hist_script, hist_div=hist_div, window=betalyzer.window, dt_ticker=betalyzer.df_tickers.loc[ticker].to_dict())
output_file("lines.html", title="line.py example") vline = Line(xyvalues, title="Lines VLine", ylabel='measures', width=500, height=300, tools=TOOLS) hline = Line(xyvalues, title="Lines HLine", ylabel='measures', width=500, height=300, tools=TOOLS) int_vline = Line(xyvalues, title="Lines VLine Interp", ylabel='measures', width=500, height=300, tools=TOOLS) int_hline = Line(xyvalues, title="Lines HLine Interp", ylabel='measures', width=500, height=300, tools=TOOLS) svalues = {} # svalues['Business'] = [(i, v) for i, v in zip(index, xyvalues['Business'])] for k in xyvalues.columns: svalues[k] = [(i, v) for i, v in zip(index, xyvalues[k])] # # import pdb; pdb.set_trace() scatter_point = Scatter(svalues, title="Scatter mouse", ylabel='measures', width=500, height=300, legend=True, tools=TOOLS) scatter = Scatter(svalues, title="Scatter V Line", ylabel='measures', width=500, height=300, legend=True, tools=TOOLS) int_point_line = Line(xyvalues, title="Lines Mouse Interp.", ylabel='measures', width=500, height=300, tools=TOOLS) point_line = Line(xyvalues, title="Lines Mouse", ylabel='measures', width=500, height=300, tools=TOOLS) hhover = hline.select(dict(type=HoverTool)) hhover.mode = 'hline' hhover.line_policy = 'next'
from bokeh.charts import Scatter, output_file, show from bokeh.sampledata.autompg import autompg as df p = Scatter(df, x='mpg', y='hp', title="HP vs MPG", xlabel="Miles Per Gallon", ylabel="Horsepower") output_file("scatter.html") show(p)
import seaborn as sns # In[2]: # 导入数据 exercise = sns.load_dataset('exercise') output_notebook() #output_file('test.html') # * bokeh.charts # In[3]: # 散点图 p = Scatter(data=exercise, x='id', y='pulse', title='exercise dataset') show(p) # In[4]: # 柱状图 p = Bar(data=exercise, values='pulse', label='diet', stack='kind', title='exercise dataset') show(p) # In[5]: # 盒子图
s2.triangle(x, y1, size=10, color="firebrick", alpha=0.5) # NEW: create a new plot and share only one range s3 = figure(width=250, height=250, x_range=s1.x_range, title=None) s3.square(x, y2, size=10, color="olive", alpha=0.5) # NEW: put the subplots in a gridplot p = gridplot([[s1, s2, s3]], toolbar_location=None) # show the results show(p) ### Example from the high-level charts tutorial from bokeh.charts import Scatter, output_file, show from bokeh.sampledata.autompg import autompg as df print(df) p = Scatter(df, x='mpg', y='weight', title="HP vs weight", color="navy", xlabel="Miles Per Gallon", ylabel="weight") output_file("scatter.html") show(p)
from bokeh.charts import Scatter # we fill a df with the data of interest and create a groupby pandas object df = flowers[["petal_length", "petal_width", "species"]] xyvalues = g = df.groupby("species") # here we only drop that groupby object into a dict .. pdict = OrderedDict() for i in g.groups.keys(): labels = g.get_group(i).columns xname = labels[0] yname = labels[1] x = getattr(g.get_group(i), xname) y = getattr(g.get_group(i), yname) pdict[i] = zip(x, y) # any of the following commented are valid Scatter inputs #xyvalues = pdict #xyvalues = pd.DataFrame(xyvalues) #xyvalues = xyvalues.values() #xyvalues = np.array(xyvalues.values()) TOOLS="resize,crosshair,pan,wheel_zoom,box_zoom,reset,previewsave" scatter = Scatter( xyvalues, filename="iris_scatter.html", tools=TOOLS, ylabel='petal_width', facet=False ) scatter.title("iris dataset").legend("top_left") scatter.width(600).height(400).show()
""" This example uses the Iris data to demonstrate the specification of combined variables using chart operations. This specific instance uses a blend, which stacks columns, and renames the combined column. This can be used where the column itself is a type of categorical variable. Here, length and width are derived from the petal and sepal measurements. """ from bokeh.charts import Scatter, output_file, show from bokeh.charts.operations import blend from bokeh.sampledata.iris import flowers as data scatter = Scatter( data, x=blend('petal_length', 'sepal_length', name='length'), y=blend('petal_width', 'sepal_width', name='width'), color='species', title= 'x=petal_length+sepal_length, y=petal_width+sepal_width, color=species', legend='top_right') output_file("iris_blend.html", title="iris_blend.py example") show(scatter)
value_name='Count', var_name='Degree') vline = Line(data, y='Count', color='Degree', title="Lines VLine", ylabel='measures', tools=TOOLS) hline = Line(data, y='Count', color='Degree', title="Lines HLine", ylabel='measures', tools=TOOLS) int_vline = Line(data, y='Count', color='Degree', title="Lines VLine Interp", ylabel='measures', tools=TOOLS) int_hline = Line(data, y='Count', color='Degree', title="Lines HLine Interp", ylabel='measures', tools=TOOLS) scatter_point = Scatter(data, x='Year', y='Count', color='Degree', title="Scatter mouse", ylabel='measures', legend=True, tools=TOOLS) scatter = Scatter(data, x='Year', y='Count', color='Degree', title="Scatter V Line", ylabel='measures', legend=True, tools=TOOLS) int_point_line = Line(data, x='Year', y='Count', color='Degree', title="Lines Mouse Interp.", ylabel='measures', tools=TOOLS) point_line = Line(data, x='Year', y='Count', color='Degree', title="Lines Mouse", ylabel='measures', tools=TOOLS) hhover = hline.select(HoverTool) hhover.mode = 'hline' hhover.line_policy = 'next'
from bokeh.charts import Scatter, output_file, show from bokeh.sampledata.iris import flowers as data scatter = Scatter(data, x='petal_length', y='petal_width', color='species', marker='species', title='Iris Dataset Color and Marker by Species', legend=True) output_file("iris_simple.html", title="iris_simple.py example") show(scatter)
scatter4 = Scatter( df, x='mpg', y='hp', color='cyl', marker='origin', title="x='mpg', y='hp', color='cyl', marker='origin'", xlabel="Miles Per Gallon", ylabel="Horsepower", legend='top_right') # Example with nested json/dict like data, which has been pre-aggregated and pivoted df2 = df_from_json(data) df2 = df2.sort('total', ascending=False) df2 = df2.head(10) df2 = pd.melt(df2, id_vars=['abbr', 'name']) scatter5 = Scatter( df2, x='value', y='name', color='variable', title="x='value', y='name', color='variable'", xlabel="Medals", ylabel="Top 10 Countries", legend='bottom_right') scatter6 = Scatter(flowers, x=blend('petal_length', 'sepal_length', name='length'), y=blend('petal_width', 'sepal_width', name='width'), color='species', title='x=petal_length+sepal_length, y=petal_width+sepal_width, color=species', legend='top_right') scatter6.title_text_font_size = '10pt' output_file("scatter_multi.html", title="scatter_multi.py example") show(vplot( hplot(scatter0, scatter1), hplot(scatter2, scatter3), hplot(scatter4, scatter5), hplot(scatter6) ))
def scatter_groups(xyvalues, fname, title, xlabel, ylabel): TOOLS="resize,crosshair,pan,wheel_zoom,box_zoom,reset,previewsave" scatter = Scatter(xyvalues, filename=fname, title=title, legend ="top_left", tools=TOOLS, xlabel=xlabel, ylabel=ylabel) return scatter
import pandas as pd from bokeh.charts import Scatter, output_file, show, vplot, hplot, defaults from bokeh.charts.operations import blend from bokeh.charts.utils import df_from_json from bokeh.sampledata.autompg import autompg as df from bokeh.sampledata.iris import flowers from bokeh.sampledata.olympics2014 import data defaults.plot_width = 450 defaults.plot_height = 400 scatter0 = Scatter(df, x='mpg', title="x='mpg'", xlabel="Miles Per Gallon") scatter1 = Scatter(df, x='mpg', y='hp', title="x='mpg', y='hp'", xlabel="Miles Per Gallon", ylabel="Horsepower", legend='top_right') scatter2 = Scatter(df, x='mpg', y='hp', color='cyl', title="x='mpg', y='hp', color='cyl'", xlabel="Miles Per Gallon", ylabel="Horsepower", legend='top_right')
from bokeh.charts import Scatter, output_file, show from bokeh.sampledata.autompg import autompg as df p = Scatter(df, x='displ', y='hp', marker='square', title="HP vs DISPL", legend="top_left", xlabel="Displacement", ylabel="Horsepower") output_file("scatter.html") show(p)
from bokeh.charts import Scatter, output_file, show from bokeh.sampledata.autompg import autompg as df p = Scatter(df, x='mpg', y='hp', title="HP vs MPG", xlabel="Miles Per Gallon", ylabel="Horsepower") p.logo = None # remove Bokeh's logo output_file("scatter.html") show(p)
"""Uses bokeh for plotting""" from bokeh.charts import Scatter, output_file, show import pandas DF = pandas.DataFrame(columns=['X', 'Y']) DF['X'] = [1, 2, 3, 4, 5] DF['Y'] = [5, 6, 4, 5, 3] # pylint: disable=line-too-long SC = Scatter(DF, x='X', y='Y', title='Temperature Observations', xlabel='Day of observations', ylabel='Temperature') output_file('../output/Scatter_charts.html') show(SC)
color='Degree', title="Lines VLine Interp", ylabel='measures', tools=TOOLS) int_hline = Line(data, y='Count', color='Degree', title="Lines HLine Interp", ylabel='measures', tools=TOOLS) scatter_point = Scatter(data, x='Year', y='Count', color='Degree', title="Scatter mouse", ylabel='measures', legend=True, tools=TOOLS) scatter = Scatter(data, x='Year', y='Count', color='Degree', title="Scatter V Line", ylabel='measures', legend=True, tools=TOOLS) int_point_line = Line(data, x='Year',