def visualisation(): current_city_name = request.args.get("city_name") if current_city_name == None: current_city_name = "Mumbai" subset = df_iplace[df_iplace['city'] == dropdown.value] #plot = create_figure(current_city_name) plot = bc.Bar(subset, "placename", values="placename", agg="count", plot_width=1000, plot_height=1000, title="Popular places") # Generate the script and HTML for the plot script, div = components(plot) # Return the webpage return """ <!doctype html> <head> <title>Popular places</title> {bokeh_css} </head> <body> <h1>Most Popular Places visited by people! {div} {bokeh_js} {script} </body> """.format(script=script, div=div, bokeh_css=CDN.render_css(), bokeh_js=CDN.render_js())
def visuals(): plot = bc.Bar(data=top_ten_states_dataFrame, values='count', label='State') script, div = components(plot) error = None try: return ''' <!DOCTYPE html> <html> <head> <title>Train Wreck Header</title> <meta charset="utf-8"> <link rel="stylesheet" type="text/css" href="../static/css/bootstrap.min.css"> <script type="text/javascript" href="../static/js/bootstrap.min.js"></script> <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.2.1/jquery.min.js"></script> {bokeh_css} </head> <header style="background-color: #e6e6e6;"> <!-- <p> started working </p> --> <nav class="navbar navbar-inverse" style="background-color: #4d4d4d;"> <div class="container-fluid"> <div class="navbar-header"> <a class="navbar-brand" href="/" style="color: white;">Train Wreck Analysis</a> </div> <ul class="nav navbar-nav"> <li><a href="/">Home</a></li> <li><a href="/map/">Map</a></li> <li><a href="/visuals/">Bar Chart</a></li> </ul> </div> </nav> </header> <body> <div class="container-fluid" style="margin-left: 20px; float:left; border: solid; border-style: groove; border-width: 5px; height: 650px; width: 700px; padding-top: 20px;"> {div} {bokeh_js} {script} </div> </body> </html> '''.format(script=script, div=div, bokeh_css=CDN.render_css(), bokeh_js=CDN.render_js()) except Exception as e: flash(e) return render_template('visuals.html', error=error)
def bokeh_bar(df): """stolen more or less directly from http://bokeh.pydata.org/en/0.11.1/docs/user_guide/charts.html """ p = bkc.Bar(df, 'animal', values='number', title='aminals <3', tools=BOKEH_TOOLS) hover = p.select({'type': bkm.HoverTool}) hover.tooltips = """<div>Aminal: @x</div><div>nubmer: @height</div>""" return bke.components(p)
def states_visualization(): ''' create a dataframe for city and states values ''' city_State_dataFrame=modifyRow(df2) ''' finding top ten states with most accidents ''' states_dataFrame=createStatesList(city_State_dataFrame) states_dataFrame.apply(lambda x: x.astype(str).str.upper()) states_dataFrame['State'] = states_dataFrame['State'].map(lambda x: x.strip()) #states_dataFrame['States'].str.split(',').str.get(1) states_dataFrame['State'] = states_dataFrame['State'].apply(lambda x: x.split(',', 1)[-1]) states_dataFrame['State'] = states_dataFrame['State'].map(lambda x: x.strip()) #print states_dataFrame top_states_dataFrame=states_dataFrame.groupby(['State']).size().reset_index().rename(columns={0:'count'}) #print top_states_dataFrame all_states_dataFrame=top_states_dataFrame.sort_values(by='count', ascending=False) # print all_states_dataFrame all_states=[] all_states_count=[] #all_states_tpl=() for a, b in all_states_dataFrame.itertuples(index=False): all_states.append(a) all_states_count.append(b) #all_states_tpl=all_states_tpl+(a,) #all_states=zip(all_states,all_states_count) #all_states=dict(all_states) #print all_states top_ten_states_dataFrame=top_states_dataFrame.sort_values(by='count', ascending=False).head(10) print top_ten_states_dataFrame #states={} state=[] count=[] states_tpl=() for a, b in top_ten_states_dataFrame.itertuples(index=False): state.append(a) count.append(b) #states_tpl=states_tpl+(a,) #states=zip(state,count) #states=dict(states) #print states output_file('bar_graph.html') plot = bc.Bar(data=top_ten_states_dataFrame,count='count',label='State') show(plot) bc.Bar
def learn4(): df1 = ss.read.csv('F:/Research/data/ccFraud.csv', header=True, inferSchema=True) df1.show() data1 = df1.select('balance').rdd.flatMap(lambda row: row) print('1:', data1) data1 = data1.histogram(5) print('2:', data1) data = {'bins': data1[0][:-1], 'freq': data1[1]} print('3:', data) # Matplotlib绘图 plt1 = plt.figure(figsize=(12, 9)) subplot1 = plt1.add_subplot(2, 2, 1) subplot1.bar(x=data['bins'], height=data['freq'], width=4000) subplot1.set_title('balance') subplot2 = plt1.add_subplot(2, 2, 4) subplot2.bar(x=data['bins'], height=data['freq'], width=500) subplot2.set_title('balance') plt1.show() # Boken绘图 charts1 = charts.Bar(data, values='freq', label='bins', title='Histogram of \'balance\'') charts.show(charts1) # 在性别中各抽取0.02的男女数量,并将抽取数据中['balance', 'numTrans', 'numIntlTrans']三个列提取出来 data_sample1 = df1.sampleBy('gender', { 1: 0.0002, 2: 0.0002 }).select(['balance', 'numTrans', 'numIntlTrans']) print('0.02%采样后的表:') data_sample1.show() # 绘制2D点状图 data_multi = dict([ (elem, data_sample1.select(elem).rdd.flatMap(lambda row: row).collect()) for elem in ['balance', 'numTrans', 'numIntlTrans'] ]) print('点状图表:') print(len(data_multi), data_multi) data2 = {data_multi['balance'], data_multi['numTrans']} charts2 = charts.Scatter(data=data_multi, x='balance', y='numTrans') charts.show(charts2) charts2 = charts.Scatter(data=data2, x='balance', y='numTrans') charts.show(charts2)
def popular_places(): try: db = mysql.connect() mycursor = db.cursor() mycursor.execute("select distinct(city) from placedetails") cities = [] for row in mycursor: cities.append(row[0]) form = AdminForm() form.city.choices = [(city, city) for city in cities] if request.method == 'POST' and form.validate_on_submit(): cname = request.form['city'] subset = df_iplace[df_iplace['city'] == cname] places = subset['placename'].value_counts().to_frame() plot = bc.Bar(places.iloc[0:5, ], label="index", values="placename", plot_width=1000, plot_height=700, legend="top_right", bar_width=0.3, min_border=30, xlabel="Places", ylabel="Count") script, div = components(plot) return render_template( "admin.html", form=form, script=script, title_text="Top 5 Places visited by people in ", div=div, bokeh_css=CDN.render_css(), bokeh_js=CDN.render_js(), city=cname) return render_template("admin.html", form=form) except: print("Exception occured in admin") finally: db.close()
def loadplot(city): cname = city subset = df_iplace[df_iplace['city'] == cname] places = subset['placename'].value_counts().to_frame() plot = bc.Bar(places.iloc[0:5, ], label="index", values="placename", plot_width=1000, plot_height=700, legend="top_right", bar_width=0.3, min_border=30, xlabel="Places", ylabel="Count") script, div = components(plot) return render_template("plot.html", script=script, title_text="Top 5 Places visited by people in ", div=div, bokeh_css=CDN.render_css(), bokeh_js=CDN.render_js(), city=cname)
import pandas as pd import pymongo from bokeh import charts from bokeh.io import push_notebook, show, output_notebook output_notebook() client = pymongo.MongoClient() db = client['hfut'] student_df = pd.DataFrame(list(db['student'].find()), columns=['学号', '姓名', '性别']) student_df['入学年份'] = student_df['学号'] // 1000000 p = charts.Bar(student_df, label='入学年份', values='性别', agg='count', stack='性别') show(p)
# Pclass survived_pclass = df.groupby('Pclass')['Survived'].value_counts().unstack() survived_pclass['Rate'] = survived_pclass[1]/(survived_pclass[1] + survived_pclass[0]) survived_pclass # How does it look graphically? # In[ ]: bkp.output_notebook() bar1 = bkc.Bar(df, values = 'Survived', label = 'Pclass', agg = 'count', tools='pan,box_zoom,reset,resize,save,hover', stack=bkc.attributes.cat(columns='Survived', sort=False), legend='top_left', plot_width=600, plot_height=300) hover = bar1.select(dict(type = bkm.HoverTool)) hover.tooltips = dict([("Num", "@height{int}")]) bar1.yaxis.axis_label = 'Number of passengers' bkp.show(bar1) # Apparently, passengers from higher classes are more likely to survive, both in terms of number and percentage. Furthermore, the survival rates for Pclass 1 and 3 are all quite different from 0.5, hence, 'quite pure'. In another word, during prediction, if I see a passenger is from Pclass 1(3), I would likely to bet he/she will (not) survive. We should take this into account into our future models. # - *Name* # # The next column in the data is 'Name'. What information that we can possibly get from them? Humm.. how about the titles? # In[ ]: