예제 #1
0
def visualisation():
    current_city_name = request.args.get("city_name")
    if current_city_name == None:
        current_city_name = "Mumbai"
    subset = df_iplace[df_iplace['city'] == dropdown.value]
    #plot = create_figure(current_city_name)
    plot = bc.Bar(subset,
                  "placename",
                  values="placename",
                  agg="count",
                  plot_width=1000,
                  plot_height=1000,
                  title="Popular places")

    # Generate the script and HTML for the plot
    script, div = components(plot)

    # Return the webpage
    return """
    <!doctype html>
    <head>
     <title>Popular places</title>
     {bokeh_css}
    </head>
    <body>
     <h1>Most Popular Places visited by people!
     {div}
    
     {bokeh_js}
     {script}
    </body>
     """.format(script=script,
                div=div,
                bokeh_css=CDN.render_css(),
                bokeh_js=CDN.render_js())
def visuals():

    plot = bc.Bar(data=top_ten_states_dataFrame, values='count', label='State')
    script, div = components(plot)

    error = None
    try:
        return '''
		<!DOCTYPE html>
		<html>
		<head>
			<title>Train Wreck Header</title>
			<meta charset="utf-8">
			<link rel="stylesheet" type="text/css" href="../static/css/bootstrap.min.css">
			<script type="text/javascript" href="../static/js/bootstrap.min.js"></script>
			<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.2.1/jquery.min.js"></script>
			{bokeh_css}
		</head>
		<header style="background-color: #e6e6e6;">
		<!--
			<p> started working </p> -->

			<nav class="navbar navbar-inverse" style="background-color: #4d4d4d;">
				<div class="container-fluid">

					<div class="navbar-header">
						<a class="navbar-brand" href="/" style="color: white;">Train Wreck Analysis</a>
					</div>

					<ul class="nav navbar-nav">
						<li><a href="/">Home</a></li>
						<li><a href="/map/">Map</a></li>
						<li><a href="/visuals/">Bar Chart</a></li>
					</ul>

				</div>
			</nav>
		</header>

		<body>
			<div class="container-fluid" style="margin-left: 20px; float:left; border: solid; border-style: groove; border-width: 5px; height: 650px; width: 700px; padding-top: 20px;">
			 {div}
			 {bokeh_js}
 			 {script}
			</div>
		</body>
		</html>
		'''.format(script=script,
             div=div,
             bokeh_css=CDN.render_css(),
             bokeh_js=CDN.render_js())

    except Exception as e:
        flash(e)
        return render_template('visuals.html', error=error)
예제 #3
0
def bokeh_bar(df):
    """stolen more or less directly from
    http://bokeh.pydata.org/en/0.11.1/docs/user_guide/charts.html

    """
    p = bkc.Bar(df,
                'animal',
                values='number',
                title='aminals <3',
                tools=BOKEH_TOOLS)
    hover = p.select({'type': bkm.HoverTool})
    hover.tooltips = """<div>Aminal: @x</div><div>nubmer: @height</div>"""
    return bke.components(p)
def states_visualization():
	'''
		create a dataframe for city and states values
	'''
	city_State_dataFrame=modifyRow(df2)
	'''
		finding top ten states with most accidents
	'''
	states_dataFrame=createStatesList(city_State_dataFrame)
	states_dataFrame.apply(lambda x: x.astype(str).str.upper())
	states_dataFrame['State'] = states_dataFrame['State'].map(lambda x: x.strip())
	#states_dataFrame['States'].str.split(',').str.get(1)
	states_dataFrame['State'] = states_dataFrame['State'].apply(lambda x: x.split(',', 1)[-1])
	states_dataFrame['State'] = states_dataFrame['State'].map(lambda x: x.strip())
	#print states_dataFrame
	top_states_dataFrame=states_dataFrame.groupby(['State']).size().reset_index().rename(columns={0:'count'})
	#print top_states_dataFrame
	all_states_dataFrame=top_states_dataFrame.sort_values(by='count', ascending=False)
#	print all_states_dataFrame
	all_states=[]
	all_states_count=[]
	#all_states_tpl=()
	for a, b in all_states_dataFrame.itertuples(index=False):
	    	all_states.append(a)
	    	all_states_count.append(b)
	    	#all_states_tpl=all_states_tpl+(a,)
	#all_states=zip(all_states,all_states_count)
	#all_states=dict(all_states)
	#print all_states

	top_ten_states_dataFrame=top_states_dataFrame.sort_values(by='count', ascending=False).head(10)
	print top_ten_states_dataFrame
	#states={}
	state=[]
	count=[]
	states_tpl=()
	for a, b in top_ten_states_dataFrame.itertuples(index=False):
	    	state.append(a)
	    	count.append(b)
	    	#states_tpl=states_tpl+(a,)
	#states=zip(state,count)
	#states=dict(states)
	
	#print states
	output_file('bar_graph.html')
	plot = bc.Bar(data=top_ten_states_dataFrame,count='count',label='State')
	show(plot)

	bc.Bar
예제 #5
0
def learn4():
    df1 = ss.read.csv('F:/Research/data/ccFraud.csv',
                      header=True,
                      inferSchema=True)
    df1.show()
    data1 = df1.select('balance').rdd.flatMap(lambda row: row)
    print('1:', data1)
    data1 = data1.histogram(5)
    print('2:', data1)
    data = {'bins': data1[0][:-1], 'freq': data1[1]}
    print('3:', data)
    # Matplotlib绘图
    plt1 = plt.figure(figsize=(12, 9))
    subplot1 = plt1.add_subplot(2, 2, 1)
    subplot1.bar(x=data['bins'], height=data['freq'], width=4000)
    subplot1.set_title('balance')

    subplot2 = plt1.add_subplot(2, 2, 4)
    subplot2.bar(x=data['bins'], height=data['freq'], width=500)
    subplot2.set_title('balance')
    plt1.show()
    # Boken绘图
    charts1 = charts.Bar(data,
                         values='freq',
                         label='bins',
                         title='Histogram of \'balance\'')
    charts.show(charts1)
    # 在性别中各抽取0.02的男女数量,并将抽取数据中['balance', 'numTrans', 'numIntlTrans']三个列提取出来
    data_sample1 = df1.sampleBy('gender', {
        1: 0.0002,
        2: 0.0002
    }).select(['balance', 'numTrans', 'numIntlTrans'])
    print('0.02%采样后的表:')
    data_sample1.show()
    # 绘制2D点状图
    data_multi = dict([
        (elem,
         data_sample1.select(elem).rdd.flatMap(lambda row: row).collect())
        for elem in ['balance', 'numTrans', 'numIntlTrans']
    ])
    print('点状图表:')
    print(len(data_multi), data_multi)
    data2 = {data_multi['balance'], data_multi['numTrans']}
    charts2 = charts.Scatter(data=data_multi, x='balance', y='numTrans')
    charts.show(charts2)
    charts2 = charts.Scatter(data=data2, x='balance', y='numTrans')
    charts.show(charts2)
예제 #6
0
def popular_places():
    try:
        db = mysql.connect()
        mycursor = db.cursor()
        mycursor.execute("select distinct(city) from placedetails")
        cities = []
        for row in mycursor:
            cities.append(row[0])
        form = AdminForm()
        form.city.choices = [(city, city) for city in cities]
        if request.method == 'POST' and form.validate_on_submit():
            cname = request.form['city']
            subset = df_iplace[df_iplace['city'] == cname]
            places = subset['placename'].value_counts().to_frame()
            plot = bc.Bar(places.iloc[0:5, ],
                          label="index",
                          values="placename",
                          plot_width=1000,
                          plot_height=700,
                          legend="top_right",
                          bar_width=0.3,
                          min_border=30,
                          xlabel="Places",
                          ylabel="Count")
            script, div = components(plot)
            return render_template(
                "admin.html",
                form=form,
                script=script,
                title_text="Top 5 Places visited by people in ",
                div=div,
                bokeh_css=CDN.render_css(),
                bokeh_js=CDN.render_js(),
                city=cname)
        return render_template("admin.html", form=form)
    except:
        print("Exception occured in admin")
    finally:
        db.close()
예제 #7
0
def loadplot(city):
    cname = city
    subset = df_iplace[df_iplace['city'] == cname]
    places = subset['placename'].value_counts().to_frame()
    plot = bc.Bar(places.iloc[0:5, ],
                  label="index",
                  values="placename",
                  plot_width=1000,
                  plot_height=700,
                  legend="top_right",
                  bar_width=0.3,
                  min_border=30,
                  xlabel="Places",
                  ylabel="Count")
    script, div = components(plot)
    return render_template("plot.html",
                           script=script,
                           title_text="Top 5 Places visited by people in ",
                           div=div,
                           bokeh_css=CDN.render_css(),
                           bokeh_js=CDN.render_js(),
                           city=cname)
예제 #8
0
import pandas as pd
import pymongo
from bokeh import charts
from bokeh.io import push_notebook, show, output_notebook
output_notebook()

client = pymongo.MongoClient()
db = client['hfut']
student_df = pd.DataFrame(list(db['student'].find()),
                          columns=['学号', '姓名', '性别'])
student_df['入学年份'] = student_df['学号'] // 1000000
p = charts.Bar(student_df, label='入学年份', values='性别', agg='count', stack='性别')
show(p)

# Pclass 
survived_pclass = df.groupby('Pclass')['Survived'].value_counts().unstack()
survived_pclass['Rate'] = survived_pclass[1]/(survived_pclass[1] + survived_pclass[0])
survived_pclass


# How does it look graphically?

# In[ ]:


bkp.output_notebook()
bar1 = bkc.Bar(df, values = 'Survived', label = 'Pclass', agg = 'count',
            tools='pan,box_zoom,reset,resize,save,hover', 
               stack=bkc.attributes.cat(columns='Survived', sort=False), 
            legend='top_left', plot_width=600, plot_height=300)
hover = bar1.select(dict(type = bkm.HoverTool))
hover.tooltips = dict([("Num", "@height{int}")])
bar1.yaxis.axis_label = 'Number of passengers'
bkp.show(bar1)


# Apparently, passengers from higher classes are more likely to survive, both in terms of number and percentage. Furthermore, the survival rates for Pclass 1 and 3 are all quite different from 0.5, hence, 'quite pure'. In another word, during prediction, if I see a passenger is from Pclass 1(3), I would likely to bet he/she will (not) survive. We should take this into account into our future models.

# - *Name*
# 
# The next column in the data is 'Name'. What information that we can possibly get from them? Humm.. how about the titles? 

# In[ ]: