def ratioPlot(self):
        ratioInit = nds.ratioAnalysis(self.menStats,self.womenStats)
        ratioData = ratioInit.ratioData

        hover = HoverTool(
            tooltips = [
                ("ratio","@ratio"),
                ("", "@desc"),
            ]
        )

        ratioPlot = figure(plot_width=450, plot_height=450,tools=[hover])
        ratioSource = ColumnDataSource(
        data=dict(
            x=ratioData['x'],
            y=ratioData['y'],
            desc=ratioData['desc'],
            ratio=ratioData['ratio']
            )
        )
        ratioPlot.scatter(
            'x', 'y',
            size=10,
            fill_color=[
                '#F4C2C2','#FF0000','#D73B3E','#800000',
                '#98FB98','#03C03C','#008000','#006400'
            ],
            source=ratioSource,line_color=None,tools=[hover]
        )

        show(ratioPlot)
Esempio n. 2
0
	def exibir(self):
		for i in self.dados.keys():
			if len(self.dados[i]) > 0: continue
			else: return
		defaults.width, defaults.height = 400, 300
		# prepare some data
		# input options
		hist_pontuacao = Histogram(self.dados["pontos"],
						title="Grades por pontuação",
						xlabel="Pontuação",
						ylabel="Número de grades",
						responsive=True,
						bins=30)

		hist_tamanho = Histogram(self.dados["tamanhos"],
						title="Grades por quantidade de disciplinas",
						xlabel="Número de disciplinas",
						ylabel="Número de grades",
						responsive=True,
						bins=8)
		
		hist_pop = Histogram(self.dados["popularidade"],
						title="Ocorrências da disciplina x",
						xlabel="Disciplina",
						ylabel="Ocorrências nas grades",
						responsive=True,
						bins=46)
		
		output_file("html/histograms.html")

		show(hplot(	hist_pontuacao,
					hist_tamanho,
					hist_pop
		))		
Esempio n. 3
0
def plot_histogram(same, diff):
    df = {"PAINS vs. PAINS": same, "PAINS vs. ChEMBL": diff}
    output_file("histogram_pains_self_diff.html")
    hist = Histogram(df, bins=20, density=True, legend=True)
    hist.x_range = Range1d(0, 1)
    #hist.legend.orientation = "top_right"
    show(hist)
Esempio n. 4
0
 def shows(self):
     bars = []
     for i in range(0, len(self.__vkey)):
         bar = Bar(self.__data[i], values=self.__vkey[i], label=self.__lkey[i], agg='sum', title=self.__title[i], legend=self.__legend, width=1000)
         bars.append(bar)
     
     show(vplot(*bars))
def school_types_bar_chart():
    schools_2016 = get_schools('urn/2016.urns.list', 2016)
    schools_2015 = get_schools('urn/2015.urns.list', 2015)

    y_data = { 'SR2015': [], 'SR2016': [] }
    x_categories = []
    est_types = {}

    def _update_types(schools_dict):
        for s in schools_dict:
            est_type = s.data['typeOfEstablishment']['label']
            year = s.data['comp']
            try:
                est_types[est_type][year] += 1
            except KeyError:
                est_types[est_type] = { 2015: 0, 2016: 0 }
                est_types[est_type][year] += 1

    _update_types(schools_2015)
    _update_types(schools_2016)

    for est_type, yc in est_types.iteritems():
        y_data['SR2015'].append(yc[2015])
        y_data['SR2016'].append(yc[2016])
        x_categories.append(est_type)

    output_file('visuals/establisment_types.html')
    bar = Bar(y_data, cat=x_categories, title='Establishment Types and count', xlabel='Type of Establishment', ylabel='Count', width=1000, height=600, legend=True)

    show(bar)
def dotplot(pname, filename, data, title, ylabel):
    titlex=pname+" : "+title
    filename=filename+"_"+title
    output_file(filename+".html")
    values = [value for name, value in data.items()]
    names = [name for name, value in data.items()]
    dots = Dot(values, cat=names, title=titlex, ylabel=ylabel, legend=False)
    show(dots)
def ts_vis_auto(steps,path):
    try:
        index_name,my_trend = parse_csv(path)
        if my_trend.count(0) <= 30:
            index_name,index_name_future,my_trend,future = holt_pred(steps,path) 
        else:
            index_name,index_name_future,my_trend,future = sarima(steps,path) 
        slope = np.polyfit(range(len(my_trend[-156:])),my_trend[-156:],1)[0]
        if slope >= 0:
            print 'This is a upward trending with slope: ', slope
            html_path = '/Users/royyang/Desktop/trending_project/html/'+path.split('/')[-1][:-4]+'.html'
    #            html_path = 'example.html'            
            output_file(html_path, title="bohek example")
            source1 = ColumnDataSource(
                    data=dict(
                        x1=index_name,
                        y1=my_trend,
                        Time1=[str(var).split()[0] for var in index_name],
                        Intensity1=my_trend 
                    )
                )
                
            source2 = ColumnDataSource(
                    data=dict(
                        x2=index_name_future,
                        y2=future,
                        Time1=[str(var).split()[0] for var in index_name_future],
                        Intensity1=[np.round(var,0) for var in future]
                    )
                )
            
            TOOLS = "pan,wheel_zoom,box_zoom,reset,save,hover"
            
            p = figure(x_axis_type="datetime",plot_width=1000, plot_height=600, tools=TOOLS)
            
            p.line('x1','y1', color='red',legend='Past',source=source1)
            p.circle('x1','y1',size = 5,color = 'red',source=source1)
            p.line('x2','y2', color='blue', legend='Future',source=source2)
            p.circle('x2','y2',size = 8,color = 'blue',source=source2)
            p.xaxis.axis_label="Time"
            p.yaxis.axis_label="Search Intensity"
            p.title = "Search Prediction of "+path.split('/')[-1].split('.')[0]
            p.background_fill= "#cccccc"
            p.grid.grid_line_color="white"
            p.legend.label_standoff = 20
            p.legend.glyph_width = 50
            p.legend.legend_spacing = 15
            p.legend.legend_padding = 1
            p.legend.orientation = "top_left"
            hover = p.select(dict(type=HoverTool))
            hover.tooltips = OrderedDict([
                ('Time', '@Time1'),
                ('Intensity', '@Intensity1'),
            ])
    #            save(p)
            show(p)
    except Exception as err:
        print 'There is no content in file: '+path
Esempio n. 8
0
 def _chart(self, chartcls, **kwargs):
   opts = dict(width=1000, height=500, legend='bottom_left')
   show = kwargs.pop('show', True)
   opts.update(self.kwargs())
   opts.update(kwargs)
   p = chartcls(self.frame, **opts)
   if show: charts.show(p)
   else: charts.save(p)
   return p
Esempio n. 9
0
def getDistrib():
        if request.method == 'GET':
                return render_template('distrib.html')
        else:
                bronx=[[2009,'https://data.cityofnewyork.us/resource/en2c-j6tw.json'],[2010,'https://data.cityofnewyork.us/resource/n2s5-fumm.json'],[2011,'https://data.cityofnewyork.us/resource/bawj-6bgn.json'],[2012,'https://data.cityofnewyork.us/resource/3qfc-4tta.json']]
                brooklyn=[[2009,'https://data.cityofnewyork.us/resource/rmv8-86p4.json'],[2010,'https://data.cityofnewyork.us/resource/w6yt-hctp.json'],[2011,'https://data.cityofnewyork.us/resource/5mw2-hzqx.json'],[2012,'https://data.cityofnewyork.us/resource/bss9-579f.json']]
                manhattan=[[2009,'https://data.cityofnewyork.us/resource/956m-xy24.json'],[2010,'https://data.cityofnewyork.us/resource/ad4c-mphb.json'],[2011,'https://data.cityofnewyork.us/resource/ikqj-pyhc.json'],[2012,'https://data.cityofnewyork.us/resource/dvzp-h4k9.json']]
                queens=[[2009,'https://data.cityofnewyork.us/resource/m59i-mqex.json'],[2010,'https://data.cityofnewyork.us/resource/crbs-vur7.json'],[2011,'https://data.cityofnewyork.us/resource/s3zn-tf7c.json'],[2012,'https://data.cityofnewyork.us/resource/jcih-dj9q.json']]
                statenIsland=[[2009,'https://data.cityofnewyork.us/resource/cyfw-hfqk.json'],[2010,'https://data.cityofnewyork.us/resource/wv4q-e75v.json'],[2011,'https://data.cityofnewyork.us/resource/a5qt-5jpu.json'],[2012,'https://data.cityofnewyork.us/resource/tkdy-59zg.json']]
                featureNames=[['comparable_rental_2_market_value_per_sqft',  'Market value per square foot'],['comparable_rental_2_full_market_value',      'Full market value'],['comparable_rental_2_year_built',             'Year Built'],['comparable_rental_2_gross_income_per_sqft', 'Gross income per square foot']]
                #request was a POST (get the var from the form)
		#... All Boroughs are selected by default
                app_xplor.vars['feat'] = request.form['feat']
                app_xplor.vars['year'] = request.form['year']

                #Translating name of the feature into the name in the original database
                dbFeatureName = convertField(app_xplor.vars['feat'],featureNames)

		#Building the queries
		queryA = buildQuery(int(app_xplor.vars['year']),bronx,dbFeatureName)
		queryB = buildQuery(int(app_xplor.vars['year']),brooklyn,dbFeatureName)
		queryC = buildQuery(int(app_xplor.vars['year']),manhattan,dbFeatureName)
		queryD = buildQuery(int(app_xplor.vars['year']),queens,dbFeatureName)
		queryE = buildQuery(int(app_xplor.vars['year']),statenIsland,dbFeatureName)
                #executing the queries on the tables
                rawA = pd.read_json(queryA)
                rawB = pd.read_json(queryB)
                rawC = pd.read_json(queryC)
                rawD = pd.read_json(queryD)
		rawE = pd.read_json(queryE)
		
                #Managind the data to be input for a boxplot 
                rawA['Borough']='Bronx'
                rawB['Borough']='Brooklyn'
                rawC['Borough']='Manhattan'
                rawD['Borough']='Queens'
		rawE['Borough']='Staten Island'
		
                allData = pd.concat([rawA, rawB, rawC, rawD, rawE])
                cleanData= allData.dropna()
                cleanData.columns=[app_xplor.vars['feat'],'Borough']
		
                #plot
                defaults.width = 450
                defaults.height = 350   
                box_plot = BoxPlot(cleanData, label='Borough',title=str(app_xplor.vars['year']))
		#box_plot = BoxPlot(cleanData, label='Borough',title='Year')
                output_file("templates/results.html")
                show(
                    vplot(
                        hplot(box_plot)
                    )
                )
                return redirect('/goDistrib')
Esempio n. 10
0
    def plot(self, pct=False,
            output_file_path='temp_plot.html', title="", legend=True):
        """
        Allows the user to plot the timeseries data in self.content 
        using Bokeh. 

        Parameters
        ----------
        pct: bool
            Transformes the data to be percent change.
        output_file_path: str
            Path, including the name, for the output file.
        title: str
            The title of the graph and the html page.
        legend: bool
             Whether to include the legend or not.
        """
        # Output to static HTML file
        output_file(output_file_path, title=title)

        no_cols_needed = len(self.content.columns)
        if no_cols_needed == 3: Spectral = bokeh.palettes.Spectral3
        if no_cols_needed == 4: Spectral = bokeh.palettes.Spectral4
        if no_cols_needed == 5: Spectral = bokeh.palettes.Spectral5
        if no_cols_needed == 6: Spectral = bokeh.palettes.Spectral6
        if no_cols_needed == 7: Spectral = bokeh.palettes.Spectral7
        if no_cols_needed == 8: Spectral = bokeh.palettes.Spectral8
        if no_cols_needed == 9: Spectral = bokeh.palettes.Spectral9
        if no_cols_needed == 10: Spectral = bokeh.palettes.Spectral10
        if no_cols_needed >= 11: Spectral = bokeh.palettes.Spectral11


        data = self.content
        # Bokeh stumbles if the series starts with a nan.  
        # Hopefully will be fixed in Bokeh 0.9.4
        data = data.dropna(thresh=len(data.columns), axis=1)
        data.iloc[0] = data.iloc[0].fillna(0)

        p = TimeSeries(data, legend=legend, title=title, 
                width=800, height=350)
                # width=800, height=350, palette=Spectral)

        if pct:
            data2 = self.content.pct_change()
            data2 = data2.dropna(thresh=len(data2.columns), axis=1)
            data2.iloc[0] = data2.iloc[0].fillna(0)
            p2 = TimeSeries(data2, legend=legend, title="Percent Change", 
                width=800, height=350)
                # width=800, height=350, palette=Spectral)
            # show(vplot(p,p2))
            show(p2)
        else:
            show(p)
    def export():
        nonlocal data

        import bokeh.charts as bch
        from bokeh.layouts import column
        # from bokeh.models import HoverTool, GlyphRenderer
        bch.output_file("Chart.html")

        data = data.iloc[1:, :]

        # TOOLS = "pan, wheel_zoom, box_zoom, crosshair, resize, reset "# , hover"

        title = "History (total result: {0:.2f} €)".format(result)
        if bonus > 0:
            title = title[:-1] + ", excluding Bonus: {0:.2f} €)".format(bonus)

        cols = ["Paid-in", "Balance", "Normalised", "Total Result"]
        if bonus > 0:
            cols = ["Paid-in", "Balance", "Normalised",
                    "Total Result incl Bonus", "Total Result"]

        tsline = bch.TimeSeries(data,
                                x="Time",
                                y=cols,
                                title=title,  # tools=TOOLS,
                                ylabel='Euro', legend=True,
                                width=1250, height=550)
        """
        from bokeh.models import HoverTool
        hover = HoverTool(
            tooltips=[
                ("index", "$index"),
                ("(x,y)", "($x, $y)"),
                ("desc", "$balance"),
                # ("test", data.iloc["$index", 4])
            ]
        )

        tsline.add_tools(hover)
        """

        if open_browser:
            bch.show(column(tsline))
        else:
            bch.save(column(tsline))

        import matplotlib.pyplot as plt
        import matplotlib
        matplotlib.style.use('ggplot')

        data.plot(x="Time", y=cols)
        plt.savefig("Chart.pdf")
Esempio n. 12
0
File: bok.py Progetto: vlall/pyrista
	def __init__(self, local_ip='n/a', public_ip='n/a', arp = []):

		# Set palette of colors for n vlans 
		palette = [
		"#004529", "#006837", "#238443", "#41ab5d", "#78c679", "#addd8e", 
		"#d9f0a3", "#f7fcb9", "#ffffe5","#084081", "#0868ac", "#2b8cbe", "#4eb3d3", "#7bccc4", 
		"#a8ddb5", "#ccebc5", "#e0f3db", "#f7fcf0","#4d004b", "#810f7c", "#88419d", "#8c6bb1",
		"#EFEFEF","#CFCFCF","#5F5F5F","#000000", "#8c96c6", "#9ebcda", "#bfd3e6", "#e0ecf4", 
		"#f7fcfd","#fff7fb", "#ece2f0", "#d0d1e6", "#a6bddb", "#67a9cf", "#3690c0", "#02818a", 
		"#016c59", "#014636","#67001f", "#980043", "#ce1256","#e7298a", "#df65b0", "#c994c7", 
		"#d4b9da", "#e7e1ef", "#f7f4f9", "#fff7ec", "#fee8c8", "#fdd49e", "#fdbb84", "#fc8d59", 
		"#ef6548", "#d7301f", "#b30000", "#7f0000"
		]
		try:
		    import pandas as pd
		except ImportError as e:
		    raise RuntimeError("Data requires pandas (http://pandas.pydata.org) to be installed")

		data = pd.read_csv(join(dirname(__file__), "vlan.csv"))
		# pandas magic
		df = data[data.columns[:-1]]
		df2 = df.set_index(df[df.columns[0]].astype(str))
		df2.drop(df.columns[0], axis=1, inplace=True)
		df3 = df2.transpose()
		output_file("output/switch.html", title = 'vlan map')
		# text_input = TextInput(value="VLAN NAME", title="Make Vlan:", callback= )

		# Make Heapmap
		hm = HeatMap(df3, title="VLANs", width=950, palette=palette)

		# Make Ip/Arp Table		
		x = [ 'Local', 'Public']
		y = [ local_ip,public_ip]
		for i in arp:
			valueX = i[1].strip().strip("()")
			valueY = i[0].strip().strip("()")			
			x.append(valueX)
			y.append(valueY)
		data = dict(
		    sourceCol=x,
		    ipCol=y,
		)
		source = ColumnDataSource(data)
		columns = [
		    TableColumn(field="sourceCol", title="Source"),
		    TableColumn(field="ipCol", title="IP Address"),
		]

		data_table = DataTable(source=source, columns=columns, width=650, height=450)
		p = vform(hm,data_table)
		show(p)
def plot_accuracies_bokeh(accuracies, proteins, title, directory='C:\\uday\\gmu\\ngrams\\july_2016_results\\', ext='html'):
    if not os.path.exists(directory):
        os.makedirs(directory)
    filename = "%s.%s" % (title, ext)
    path_plus_filename = os.path.join(directory, filename)
    output_file(path_plus_filename)
    methods = ['RF', 'SVM', 'KNN', 'GNB']
    accuracies_dict = {}
    proteins_and_accuracies = zip(proteins, accuracies)
    TOOLS = "pan,wheel_zoom,box_zoom,reset,save"
    for protein, accuracy in proteins_and_accuracies:
        accuracies_dict[protein] = accuracy
    bar = Bar(SortedDict(accuracies_dict), methods, title=title, stacked=False, legend='top_right', ylabel="accuracy", tools=TOOLS)
    show(bar)
Esempio n. 14
0
def aggregate(transaction_history):
    net = dict()
    buy = dict()
    sell = dict()
    interest = dict()
    dividend = dict()
    historical = dict()
    
    for t in transaction_history:
        quarter = "%s-Q%i" % (t.date.year, (t.date.month-1)//3+1)
        net[quarter] = float(handleNull(net.get(quarter))) + float(t.cashflow)
        if t.kind == Transaction.BUY:
            buy[quarter] = float(handleNull(buy.get(quarter))) + float(t.cashflow)
        elif t.kind == Transaction.SELL:
            sell[quarter] = float(handleNull(sell.get(quarter))) + float(t.cashflow)
        elif t.kind == Transaction.INTEREST:
            interest[quarter] = float(handleNull(interest.get(quarter))) + float(t.cashflow)
        elif t.kind == Transaction.DIVIDEND:
            dividend[quarter] = float(handleNull(dividend.get(quarter))) + float(t.cashflow)
        elif t.kind == Transaction.HISTORICAL or t.kind == Transaction.CURRENT:
            historical[quarter] = float(handleNull(historical.get(quarter))) + float(t.cashflow)

    net = addMissingQuarters(net)
    buy = addMissingQuarters(buy)
    sell = addMissingQuarters(sell)
    interest = addMissingQuarters(interest)
    dividend = addMissingQuarters(dividend)
    historical = addMissingQuarters(historical)

    d = {'net': pd.Series(net),
         'buy': pd.Series(buy), 
         'sell': pd.Series(sell),
         'interest':pd.Series(interest),
         'dividend':pd.Series(dividend),
         'historical':pd.Series(historical)}

    df = pd.DataFrame(d)
    df['label']=df.index
    p1 = Bar(df, 
            values = blend('buy','sell','interest','dividend','historical',name='cashflow', labels_name='cf'),
            label=cat(columns='label',sort=False),
            stack=cat(columns='cf',sort=False))

    p2 = Bar(df,
             values = blend('net'),
             label='label')

    output_file("test.html")
    
    show(vplot(p1, p2))
Esempio n. 15
0
    def itemBarPlot(self):
        itemBar = Bar(
            {'men':self.menPlot.itemZip.values(),'women':self.womenPlot.itemZip.values()},
            cat=self.menPlot.itemZip.keys(),
<<<<<<< HEAD
            legend=True,title="Items",
=======
            legend=True,title="Item",
>>>>>>> 447cf52eeef4c4ff3f94f33024d29e60d9041787
            width=900,height=450,
            xlabel='Items',ylabel="Count",
            tools=None
            )
        show(itemBar)
Esempio n. 16
0
def kmeanspp(X, k, num_iter=100):
    centroids = init_centroids(X, k)

    for _ in range(num_iter):
        D = assign_to_clusters(X, centroids)
        centroids = recalc_centroids(X, D)

    df = pd.DataFrame(X)
    D = [d[1] for d in D]
    df = pd.concat([df, pd.Series(D)], axis=1)
    df.columns = ['x', 'y', 'cluster']

    p = Scatter(df, x='x', y='y', color='cluster')

    show(p)
Esempio n. 17
0
def show_histogram(values, title, filename):
    output_file('../results/{0}'.format(filename), title=title)
    distributions = OrderedDict(stars=values)
    df = pandas.DataFrame(distributions)
    distributions = df.to_dict()

    for k, v in distributions.items():
        distributions[k] = v.values()

    hist = Histogram(df, bins=5, legend=True,
                     title=title,
                     ylabel="Frequency",
                     xlabel="Ratings",
                     width=800, height=800)
    show(hist)
Esempio n. 18
0
def plot_PRC(y_test, predicted):
    """
    Plots the PRC curve for the model
    input: true label vector of the data, predicted value vector
    """
    # Get Precision & Recall metrics on test set
    precision, recall, thresholds = precision_recall_curve(y_test, predicted)
    # Plot PRC
    p = figure(title="Model Metrics (PRC)", plot_width=600, plot_height=600)

    p.line(x=recall, y=precision, color="firebrick", line_width=4)
    p.xaxis.axis_label = "Recall"
    p.yaxis.axis_label = "Precision"

    show(p)
Esempio n. 19
0
def plot_ROC(y_test, predicted):
    """
    Plots the ROC curve for the model
    input: true label vector of the data, predicted value vector
    """
    # Get true positive rate & false positive rate
    fpr, tpr, thresholds = roc_curve(y_test, predicted)
    # Plot ROC
    p = figure(title="Model Metrics (ROC)", plot_width=600, plot_height=600)

    p.line(x=fpr, y=tpr, color="navy", line_width=4)
    p.xaxis.axis_label = "False Positive Rate"
    p.yaxis.axis_label = "True Positive Rate"

    show(p)
Esempio n. 20
0
def plot_box(df, component):
    dfComp = df[df['component'] == component]
    dfComp_x = dfComp[dfComp['metric'] == 'x']
    dfComp_y = dfComp[dfComp['metric'] == 'y']
    dfComp_t = dfComp[dfComp['metric'] == 'theta']
    finalxDf = lineup(dfComp_x, 'x')
    finalyDf = lineup(dfComp_y, 'y')
    finaltDf = lineup(dfComp_t, 'theta')
    frames = [finalxDf, finalyDf, finaltDf]
    
    xytDf = pd.concat(frames)
    
    p = BoxPlot(xytDf, values='value', label=['SN', 'metric'], color='SN', legend=None, title=componentb)
    #p.add_tools(HoverTool())
    show(p)
Esempio n. 21
0
def plot_size_count(dir,title):
    data = read_csv()
    data_size = map(lambda x: x/1024, data.SIZE.tolist())
    data_dir = data.D.tolist()
    length = len(data_size)
    data_to_show = []
    for i in range(0,length):
        if(data_dir[i] == dir):
            data_to_show.append(data_size[i])

    print "Total data:", sum(data_to_show) , "MB"
    
    
    p = charts.Histogram(data_to_show,bins=100,color='#FB9A99',title=title)
    charts.output_file("/tmp/"+title+".html",title=title)
    charts.show(p)
Esempio n. 22
0
def plot_shuffle_size_count(csv_file,dir,title):
    data = read_csv(csv_file)
    data_size = map(lambda x: x/1024, data.SIZE.tolist())
    data_dir = data.D.tolist()
    data_path = data.PATHNAME.tolist()
    length = len(data_size)
    data_to_show = []
    for i in range(0,length):
        if(data_dir[i] == dir and 'shuffle' in data_path[i]):
            data_to_show.append(data_size[i])

    print "Total shuffle data:", sum(data_to_show) , "MB"


    p = charts.Histogram(data_to_show,bins=100,color='#1F78B4',title=title)
    charts.output_file("/tmp/"+title+".html",title=title)
    charts.show(p)
Esempio n. 23
0
def make_plot(which_cuisine):
    reviewDf = pd.read_pickle("review2.pkl")
    businessDf = pd.read_pickle("business2.pkl")
    # best indian restaurant
    p4 = Bar(
        businessDf[businessDf.category_id == which_cuisine],
        values="avg_star",
        label="name",
        agg="max",
        color="wheat",
        title="Best " + which_cuisine + " by star rating alone",
        xlabel="Restaurant name",
        ylabel="Star rating",
    )
    output_file("templates/plots.html")
    # p = vplot(p4)
    show(p4)
Esempio n. 24
0
def epochs_perf_plot(hist):
    """
    Create plot of model performance by epoch
    input: nn history object, # epochs
    returns bokeh line plot
    """
    epochs = len(hist.history['acc'])
    p = figure(title="Model Performance (Training Set)", plot_width=600, plot_height=600)

    p.line(x=range(0, epochs), y=hist.history['loss'],
           color="firebrick", line_width=4, legend="Loss")
    p.line(x=range(0, epochs), y=hist.history['acc'],
           color="navy", line_width=4, legend="Accuracy")

    p.legend.orientation = "bottom_left"
    p.xaxis.axis_label = "Epoch"

    show(p)
def output_chart(issues_df,output_mode='static'):
    import datetime
    import bokeh
    from bokeh.models import HoverTool


    # Add timestamp to title
    
    issues_chart = Bar(issues_df, label='value_delivered', 
               values='status', agg='count', stack='status',
               title=ISSUES_TITLE+" (Updated "+datetime.datetime.now().strftime('%m/%d/%Y')+")", 
               xlabel="Value Delivered",ylabel="Number of Use Cases",
               legend='top_right',
               tools='hover',
               color=brewer["GnBu"][3]
              )

    issues_chart.plot_width  = DESTINATION_FRAME_WIDTH  - (HTML_BODY_MARGIN * 2)
    issues_chart.plot_height = DESTINATION_FRAME_HEIGHT - (HTML_BODY_MARGIN * 2)
    issues_chart.logo = None
    issues_chart.toolbar_location = None

    hover = issues_chart.select(dict(type=HoverTool))
    hover.tooltips = [ ("Value Delivered", "$x")]


    #--- Configure output ---
    reset_output()

    if output_mode == 'static':
        # Static file.  CDN is most space efficient
        output_file(ISSUES_FILE, title=ISSUES_TITLE, 
            autosave=False, mode='cdn', 
            root_dir=None
               )   # Generate file
        save(issues_chart,filename=ISSUES_FILE)
    elif output_mode == 'notebook':
        output_notebook()   # Show inline
        show(issues_chart)
    else:
        # Server (using internal server IP, rather than localhost or external)
        session = bokeh.session.Session(root_url = BOKEH_SERVER_IP, load_from_config=False)
        output_server("ddod_chart", session=session)
        show(issues_chart)
def locations_bar_chart():
    schools_2016 = get_schools("urn/2016.urns.list", 2016)
    schools_2015 = get_schools("urn/2015.urns.list", 2015)

    y_data = {"SR2015": [], "SR2016": []}
    x_categories = []
    locations = {}

    def _update_locations(schools_dict):
        for s in schools_dict:
            addr = s.data["address"]
            try:
                town = addr["town"]
            except KeyError:
                town = addr["address3"]
            year = s.data["comp"]
            try:
                locations[town][year] += 1
            except KeyError:
                locations[town] = {2015: 0, 2016: 0}
                locations[town][year] += 1

    _update_locations(schools_2015)
    _update_locations(schools_2016)

    for loc, yc in locations.iteritems():
        y_data["SR2015"].append(yc[2015])
        y_data["SR2016"].append(yc[2016])
        x_categories.append(loc)

    output_file("visuals/locations_count.html")
    bar = Bar(
        y_data,
        cat=x_categories,
        title="School count and locations",
        xlabel="Locations",
        ylabel="Count",
        width=1000,
        height=600,
        legend=True,
    )

    show(bar)
Esempio n. 27
0
def graph_feature_importances(model, feature_names, autoscale=True, headroom=0.05, width=10, summarized_columns=None):
	'''
	Author: Mike Bernico
	Purpose:
	Graphs the feature importances of a random decision forest using a horizontal bar chart.

	Parameters
	----------
	ensemble = Name of the ensemble whose features you would like graphed.
	feature_names = A list of the names of those features, displayed onthe Y axis.
	autoscale = True (Automatically adjust the X axis size to the largest feature +.headroom) / False = scale from 0 to 1
	headroom = used with auroscale, 0.05 default
	width=figure width in inches
	summarized_columns = a list of column prefixes to summarize on, for dummy variables (e.g. ["day_"] would summarize all )
	
	'''
	if autoscale:
		x_scale = model.feature_importances_.max()+ headroom
	else:
		x_scale = 1
	feature_dict=dict(zip(feature_names, model.feature_importances_))


	if summarized_columns:
		#some dummy columns to be summarized
		for col_name in summarized_columns:
			#sum all of the features that contain col_name, store in temp sum_value
			sum_value = sum(x for i, x in feature_dict.items() if col_name in i)
			#now remove all keys that are part of col_name
			keys_to_remove = [i for i in feature_dict.keys() if col_name in i]
			for i in keys_to_remove:
				feature_dict.pop(i)
			#lastly, read the summarized field
			feature_dict[col_name] = sum_value
	#Create a graph		
	results = pd.Series(feature_dict.values(), index=feature_dict.keys())
	p = Bar(results, ylabel="Feature Importance", legend=None, logo=None)
	output_file("featureImportance.html")
	show(p)

	print feature_dict
	feature_dict = pd.DataFrame(feature_dict)
Esempio n. 28
0
def plot_addr_count(direction, title):
    data = read_csv()
    data_addr = data.BLOCK.tolist()
    data_dir = data.D.tolist()
    map_dir2addr = zip(data_dir, data_addr)

    if direction == 'R':
        dic_out = []
        for item in map_dir2addr:
            if item[0]=='R':
                dic_out.append(item[1])
    elif direction == 'W':
        dic_out = []
        for item in map_dir2addr:
            if item[0]=='W':
                dic_out.append(item[1])

    p = charts.Histogram(dic_out, bins=100, color='#FB9A99', title=title)
    charts.output_file("/tmp/%s.html" % title)
    charts.show(p)
Esempio n. 29
0
def get_trains_week(stat_code):
    sbn.set_style("white")
    stat_vals = dis_trains[dis_trains.code == stat_code]
    all_trains = stat_vals.times.values
    xx = all_trains
    days = np.array(xx[0])/1440
    tot_mins = np.array(xx[0])%1440
    hour = tot_mins/60
    mins = tot_mins % 60
    train_time = zip(days,hour,mins)
    hist, edges = np.histogram(xx[0], bins = range(0,10081,120))
    fig = figure(x_range = (0,10080), y_range = (0,max(hist+1)))    
    d = np.sin(3*gradient)
    fig.image(image = [d],x = 0, y = 0, dw = 10080, dh = max(hist)+1)
    fig.quad(top=hist, bottom=0, left=edges[:-1],right=edges[1:],fill_color="#036564",line_color="#033649")
    fig.xaxis[0].ticker=FixedTicker(ticks=[])
    fig.xaxis.major_label_orientation = "vertical"
    output_file("test_bg_image.html", title = "Background image")
    show(fig)
    return hist,edges
Esempio n. 30
0
def bar_response(results_list, output_path):

    output_dir = os.path.join(output_path, "charts")
    if os.path.isdir(output_dir) is False:
        os.mkdir(output_dir)

    tools = "pan,wheel_zoom,box_zoom,reset,hover,save"

    for df in results_list:
        print(df)
        p = Bar(df, label='hh_type', values='perc_res', stack='digital', title="a_title",
                legend='top_right', tools=tools)

        hover = p.select_one(HoverTool)
        hover.point_policy = "follow_mouse"
        hover.tooltips = [
            ("count", "@height"),
        ]
        output_file_path = os.path.join(output_dir, 'test bar.html')
        output_file(output_file_path)
        show(p)
Esempio n. 31
0
bar = Bar(data, values='data',\
          label=cat(columns='x', sort=False),\
          title="Top Hashtags Used By Trump", \
          legend = False,
          xlabel="Hashtags", ylabel="Number of Occurance")

labels_2, freq_2 = zip(*trump_most_common_mentions)
data_2 = {'data_2': freq_2, 'x_2': labels_2}
bar_2 = Bar(data_2, values='data_2',\
          label=cat(columns='x_2', sort=False),\
          title="Top User Mentions By Trump", \
          legend = False,
          xlabel="User Mentions", ylabel="Number of Occurance")

output_file("trump_top_mentions.png.html")
show(row(bar, bar_2))

#%%
# Let's Geo Track Trump
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap

# Coordinates
coord_frame = pd.DataFrame(
    tweet_df[tweet_df.handle == 'realDonaldTrump']['place'])
coord_frame[['Long', 'Lat']] = coord_frame['place'].apply(pd.Series)

lon_min, lon_max = -130, -55
lat_min, lat_max = 20, 50

plt.figure(2, figsize=(12, 6))
from bokeh.charts import Histogram, output_file, show
from bokeh.sampledata.autompg import autompg as df

df.sort_values(by='cyl', inplace=True)

hist = Histogram(df,
                 values='hp',
                 color='cyl',
                 title="HP Distribution by Cylinder Count",
                 legend='top_right')

output_file("histogram_single.html", title="histogram_single.py example")

show(hist)
Esempio n. 33
0
df2 = pd.melt(df2, id_vars=['abbr', 'name'])

scatter5 = Scatter(df2,
                   x='value',
                   y='name',
                   color='variable',
                   title="x='value', y='name', color='variable'",
                   xlabel="Medals",
                   ylabel="Top 10 Countries",
                   legend='bottom_right')

scatter6 = Scatter(
    flowers,
    x=blend('petal_length', 'sepal_length', name='length'),
    y=blend('petal_width', 'sepal_width', name='width'),
    color='species',
    title=
    'x=petal_length+sepal_length, y=petal_width+sepal_width, color=species',
    legend='top_right')

output_file("scatter_multi.html", title="scatter_multi.py example")

show(
    gridplot(scatter0,
             scatter2,
             scatter3,
             scatter4,
             scatter5,
             scatter6,
             ncols=2))
Esempio n. 34
0
# RowPlot-Histogram

from bokeh.charts import Histogram, output_file, show
from bokeh.layouts import row
from bokeh.sampledata.autompg import autompg as df

##################################################

hist = Histogram(df, values='mpg', title="Auto MPG Histogram", plot_width=400)

# Compare Both Data Specifications

hist2 = Histogram(df, values='mpg', label='cyl', color='cyl', legend='top_right',
                  title="MPG Histogram by Cylinder Count", plot_width=400)

#################################################

output_file('hist.html')
show(row(hist, hist2))
    elif (userFlag == '5'):

        if (dataOptions == 2):
            bubbleSortA(diseaseCounter, singleDiseaseList)

            ascendingData = {
                'Disease Name': singleDiseaseList,
                '# of Occurances': diseaseCounter
            }

            asc = Bar(ascendingData,
                      values='# of Occurances',
                      label=CatAttr(columns=['Disease Name'], sort=False),
                      title='U.S. Chronic Diseases(2007-2013)',
                      color=userColor)
            output_file("bar2.html")
            show(asc)
        elif (dataOptions == 3):
            bubbleSortD(diseaseCounter, singleDiseaseList)
            descendingData = {
                'Disease Name': singleDiseaseList,
                '# of Occurances': diseaseCounter
            }
            des = Bar(descendingData,
                      values='# of Occurances',
                      label=CatAttr(columns=['Disease Name'], sort=False),
                      title='U.S. Chronic Diseases(2007-2013)',
                      color=userColor)
            output_file("bar3.html")
            show(des)
Esempio n. 36
0
                     dash=['IBM', 'MSFT', 'AAPL'],
                     title="Timeseries (Line Explicit)",
                     tools=TOOLS,
                     ylabel='Stock Prices')

# step
tsstep = TimeSeries(data,
                    x='Date',
                    y=['IBM', 'MSFT', 'AAPL'],
                    legend=True,
                    builder_type='step',
                    title="Timeseries (Step)",
                    tools=TOOLS,
                    ylabel='Stock Prices')

# point
tspoint = TimeSeries(data,
                     x='Date',
                     y=['IBM', 'MSFT', 'AAPL'],
                     legend=True,
                     builder_type='point',
                     marker=['IBM', 'MSFT', 'AAPL'],
                     color=['IBM', 'MSFT', 'AAPL'],
                     title="Timeseries (Point)",
                     tools=TOOLS,
                     ylabel='Stock Prices')

output_file("timeseries.html", title="timeseries.py example")

show(vplot(tsline, tsline2, tsstep, tspoint))
# # Using Bokeh

# In[44]:

from bokeh.io import output_notebook
output_notebook()

# In[45]:

from bokeh.charts import Histogram, output_file, show

# create a new plot with a title and axis labels
p1 = Histogram(samp['SCORE'])
# output_file("histogram.html")

show(p1)

# In[46]:

from bokeh.charts import Histogram, output_file, show
from bokeh.sampledata.autompg import autompg as df

p2 = Histogram(mRests,
               'SCORE',
               color='GRADE',
               title="Score Grouped by Grade",
               bins=15,
               legend='top_right')

# output_file("histogram_color.html")
Esempio n. 38
0
              stat='mean',
              legend='top_right')

hm9 = HeatMap(fruits, y='year', x='fruit', values='fruit_count', stat=None)

hm10 = HeatMap(unempl,
               x='Year',
               y='Month',
               values='Unemployment',
               stat=None,
               sort_dim={'x': False},
               width=900,
               plot_height=500)

output_file("hm11.html", title="Bokeh heatmap example (hm11.py)")

show(
    column(
        gridplot(hm1,
                 hm2,
                 hm3,
                 hm4,
                 hm5,
                 hm6,
                 hm7,
                 hm8,
                 hm9,
                 ncols=1,
                 plot_width=800,
                 plot_height=800), hm10))
Esempio n. 39
0
class Dado:
    def __init__(self, numero_de_dados, numero_de_lados):
        self.numero_de_dados = numero_de_dados
        self.numero_de_lados = numero_de_lados

    def tirar_dados(self):
        resultado = []
        for _ in range(1, self.numero_de_dados + 1):
            resultado.append(random.randint(1, self.numero_de_lados))
        return resultado


if __name__ == "__main__":
    dado_6 = Dado(2, 6)
    #realizacion = dado_6.tirar_dados()
    #print(realizacion)
    #print(sum(realizacion))
    sumas = []
    tiradas = 10000
    for _ in range(tiradas):
        realizacion = dado_6.tirar_dados()
        suma = sum(realizacion)
        sumas.append(suma)
    print(len(sumas))

    grafica = Histogram(sumas,
                        title='Suma de dos dados con 10000 realizaciones')

    output_file('suma_de_dados.html')
    show(grafica)
              width=800)

    output_file('ECG_DataQuality_of_' + str(ids[i]) + ".html",
                title="ECG_BAR_Plot")

    source = ColumnDataSource(data)

    columns = [
        TableColumn(field="Date", title="Date"),
        TableColumn(field="ACCEPTABLE", title="ACCEPTABLE(Hours)"),
        TableColumn(field="UNACCEPTABLE", title="UNACCEPTABLE(Hours)"),
        TableColumn(field="Total", title="Total(Hours)")
    ]
    data_table = DataTable(source=source,
                           columns=columns,
                           width=500,
                           height=400)

    show(hplot(bar, vform(data_table)))
    dataf['ACCEPTABLE'] = dataf['ACCEPTABLE'] + data['ACCEPTABLE']
    dataf['UNACCEPTABLE'] = dataf['UNACCEPTABLE'] + data['UNACCEPTABLE']
    dataf['Date'] = dataf['Date'] + data['Date']
    dataf['Total'] = dataf['Total'] + data['Total']
    dataf['ParticipantID'] = dataf['ParticipantID'] + [
        ids[i] for f in range(len(data['Total']))
    ]

df = pd.DataFrame(dataf)

df.to_excel('ECG.xlsx')
Esempio n. 41
0
import pandas as pd
from bokeh.charts import Horizon, output_file, show

# read in some stock data from the Yahoo Finance API
AAPL = pd.read_csv(
    "http://ichart.yahoo.com/table.csv?s=AAPL&a=0&b=1&c=2000&d=0&e=1&f=2010",
    parse_dates=['Date'])

MSFT = pd.read_csv(
    "http://ichart.yahoo.com/table.csv?s=MSFT&a=0&b=1&c=2000&d=0&e=1&f=2010",
    parse_dates=['Date'])

IBM = pd.read_csv(
    "http://ichart.yahoo.com/table.csv?s=IBM&a=0&b=1&c=2000&d=0&e=1&f=2010",
    parse_dates=['Date'])

data = dict([('AAPL', AAPL['Adj Close']), ('Date', AAPL['Date']),
             ('MSFT', MSFT['Adj Close']), ('IBM', IBM['Adj Close'])])

hp = Horizon(data,
             x='Date',
             plot_width=800,
             plot_height=300,
             title="horizon plot using stock inputs")

output_file("horizon.html")

show(hp)
Esempio n. 42
0
# -*- coding:utf-8 -*-
__author__ = 'chunhui.zhang'

from bokeh.charts import Line, show, output_file

# build a dataset where multiple columns measure the same thing
data = dict(
    python=[2, 3, 7, 5, 26, 221, 44, 233, 254, 265, 266, 267, 120, 111],
    pypy=[12, 33, 47, 15, 126, 121, 144, 233, 254, 225, 226, 267, 110, 130],
    jython=[22, 43, 10, 25, 26, 101, 114, 203, 194, 215, 201, 227, 139, 160],
    test=[
        'foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar',
        'foo', 'bar', 'foo', 'bar'
    ])

# create a line chart where each column of measures receives a unique color and dash style
line = Line(data,
            y=['python', 'pypy', 'jython'],
            dash=['python', 'pypy', 'jython'],
            color=['python', 'pypy', 'jython'],
            legend_sort_field='color',
            legend_sort_direction='ascending',
            title="Interpreter Sample Data",
            ylabel='Duration',
            legend=True)

output_file("line_single.html", title="line_single.py example")

show(line)
Esempio n. 43
0
# line simple
tsline = TimeSeries(
    data, y=['IBM', 'MSFT', 'AAPL'], legend=True,
    title="Timeseries (Line)", tools=TOOLS, ylabel='Stock Prices',
    xlabel='Date')

# line explicit
tsline2 = TimeSeries(
    data, y=['IBM', 'MSFT', 'AAPL'], legend=True,
    color=['IBM', 'MSFT', 'AAPL'], dash=['IBM', 'MSFT', 'AAPL'],
    title="Timeseries (Line Explicit)", tools=TOOLS, ylabel='Stock Prices',
    xlabel='Date')

# step
tsstep = TimeSeries(
    data, y=['IBM', 'MSFT', 'AAPL'], legend=True, builder_type='step',
    title="Timeseries (Step)", tools=TOOLS, ylabel='Stock Prices',
    xlabel='Date')

# point
tspoint = TimeSeries(
    data, y=['IBM', 'MSFT', 'AAPL'], legend=True, builder_type='point',
    marker=['IBM', 'MSFT', 'AAPL'], color=['IBM', 'MSFT', 'AAPL'],
    title="Timeseries (Point)", tools=TOOLS, ylabel='Stock Prices',
    xlabel='Date')

output_file("timeseries.html", title="timeseries.py example")

show(column(tsline, tsline2, tsstep, tspoint))
Esempio n. 44
0
from bokeh.charts import Dot, show, output_file

# best support is with data in a format that is table-like
data = {
    'sample': ['1st', '2nd', '1st', '2nd', '1st', '2nd'],
    'interpreter': ['python', 'python', 'pypy', 'pypy', 'jython', 'jython'],
    'timing': [-2, 5, 12, 40, 22, 30],
}

# x-axis labels pulled from the interpreter column, stacking labels from sample column
dots = Dot(data,
           values='timing',
           label='interpreter',
           group='sample',
           agg='mean',
           title="Python Interpreter Sampling",
           legend='top_right',
           width=600)

output_file("dots.html")

show(dots)
Esempio n. 45
0
    def process_stats(self, input_path_file, output_path_file_csv,
                      output_path_file_html):
        df = pd.read_json(input_path_file, orient='index')
        df.to_csv(output_path_file_csv + '.csv', index=True, sep='\t')

        # plotting read processing(poly/single A removed or unmodified)
        conditions = []
        for row in df.iterrows():
            conditions.extend(
                ['poly(A) removed', 'single(A) removed', 'unmodified'])

        samples1 = []
        for index, row in df.iterrows():
            samples1.extend([index] * 3)

        read_nr = []
        for index, row in df.iterrows():
            read_nr.extend([
                row['polya_removed'], row['single_a_removed'],
                row['unmodified']
            ])

        data1 = {}
        data1['condition'] = conditions
        data1['sample'] = samples1
        data1['Nr. of reads'] = read_nr

        bar1 = Bar(data1,
                   values='Nr. of reads',
                   label='sample',
                   stack='condition',
                   agg='sum',
                   title="Input read types",
                   legend='top_right',
                   palette=['darkseagreen', 'salmon', 'darkslateblue'],
                   tools=[
                       HoverTool(tooltips=[("Sample",
                                            "@x"), ("Nr of reads", "@y")]),
                       PanTool(),
                       BoxSelectTool(),
                       BoxZoomTool(),
                       WheelZoomTool(),
                       ResizeTool(),
                       ResetTool()
                   ])

        samples2 = []
        read_length = []
        frequency = []
        for index, row in df.iterrows():
            for key, value in row[
                    'read_length_after_processing_and_freq'].items():
                samples2.extend([index] * int(self.nr_items_pro(key)))
                read_length.extend([float(key)])
                frequency.extend([value])

        data2 = {}
        data2['samples'] = samples2
        data2['read length'] = read_length
        data2['frequency'] = frequency

        bar2 = Bar(
            data2,
            values='frequency',
            label='read length',
            stack='samples',
            agg='sum',
            title="Input read length and frequency",
            legend='top_left',
            palette=['darkseagreen', 'salmon', 'darkslateblue', 'olive'],
            width=1200,
            bar_width=1.0,
            tools=[
                HoverTool(tooltips=[("Read length", "@x"), ("Frequency",
                                                            "@y")]),
                PanTool(),
                BoxSelectTool(),
                BoxZoomTool(),
                WheelZoomTool(),
                ResizeTool(),
                ResetTool()
            ])

        bar = column(bar1, bar2)
        output_file(output_path_file_html + '.html')
        show(bar)
Esempio n. 46
0
line2.title_text_font_size = '11pt'

line3 = Line(
    df,
    x='date',
    y=['python', 'pypy', 'jython'],
    dash=['python', 'pypy', 'jython'],
    color=['python', 'pypy', 'jython'],
    title=
    "Interpreters (x='date', y, dash, color=['python', 'pypy', 'jython'])",
    ylabel='Duration',
    legend=True)
line3.title_text_font_size = '11pt'

line4 = Line(
    df,
    x='date',
    y=['python', 'pypy', 'jython'],
    dash='test',
    color=['python', 'pypy', 'jython'],
    title=
    "Interpreters (x='date', y, color=['python', 'pypy', 'jython'], dash='test') with tooltips",
    ylabel='Duration',
    legend=True,
    tooltips=[('series', '@series'), ('test', '@test')])

output_file("line_multi.html", title="line examples")

show(vplot(hplot(line), hplot(line0, line1), hplot(line2, line3),
           hplot(line4)))
Esempio n. 47
0
# create a DataFrame with the sample data
df = pd.io.json.json_normalize(data['data'])

# filter by countries with at least one medal and sort
df = df[df['medals.total'] > 0]
df = df.sort("medals.total", ascending=False)

# get the countries and group the data by medal type
countries = df.abbr.values.tolist()
gold = df['medals.gold'].astype(float).values
silver = df['medals.silver'].astype(float).values
bronze = df['medals.bronze'].astype(float).values

# build a dict containing the grouped data
medals = OrderedDict(bronze=bronze, silver=silver, gold=gold)

# any of the following commented are valid BoxPlot inputs
#medals = pd.DataFrame(medals)
#medals = list(medals.values())
#medals = tuple(medals.values())
#medals = np.array(list(medals.values()))

output_file("boxplot.html")

boxplot = BoxPlot(
    medals, marker='circle', outliers=True, title="boxplot test",
    xlabel="medal type", ylabel="medal count", width=800, height=600)

show(boxplot)
Esempio n. 48
0
from bokeh.charts import Scatter, output_file, show
from bokeh.sampledata.autompg import autompg as df

p = Scatter(df,
            x='mpg',
            y='hp',
            color='cyl',
            title="HP vs MPG (shaded by CYL)",
            xlabel="Miles Per Gallon",
            ylabel="Horsepower")

output_file("scatter.html")

show(p)
Esempio n. 49
0
    def handle(self, *args, **options):
        #Initialize all variables
        file_input = '/Users/Jason Plagens/Documents/Spring Semester 2016/ISA 406/Project/Django Project/isa406/sawa/data/Product Reviews - PowerBar Energy Blends.csv'

        #Initialize the sentiment factor lists
        terrible_list = []
        bad_list = []
        neutral_list = []
        good_list = []
        excellent_list = []

        #Initialize empty lists for review information
        activity_text_list = []
        clean_activity_text_list = []
        created_date_list = []
        rating_list = []
        activity_name_list = []
        title_list = []

        #Initialize empty lists for user who posted review information
        username_list = []
        age_list = []
        gender_list = []
        city_list = []

        #Initialize empty lists for media reach of review information
        facebook_list = []
        twitter_list = []
        offline_list = []

        with open(file_input, encoding='utf-8') as csvfile:
            readCSV = csv.reader(csvfile, delimiter=',')

            #Initialize variables
            username = ""
            age = 0
            gender = ""
            city = ""
            activity_text = ""
            created_date = ""
            rating = 0
            activity_name = ""
            title = ""
            facebook = 0
            twitter = 0
            offline = 0

            #For each row in CSV, add review data to associated list
            for row in readCSV:
                activity_text = row[9]
                created_date = row[1]
                rating = row[6]
                activity_name = row[7]
                title = row[8]

                activity_text_list.append(activity_text)
                created_date_list.append(created_date)
                rating_list.append(rating)
                activity_name_list.append(activity_name)
                title_list.append(title)

            #For each row in CSV, add user who posted review data to associated list
            for row in readCSV:
                username = row[0]
                age = row[2]
                gender = row[3]
                city = row[4]

                username_list.append(username)
                age_list.append(age)
                gender_list.append(gender)
                city_list.append(city)

            #For each row in CSV, add media reach of review data to associated list
            for row in readCSV:
                facebook = row[0]
                twitter = row[2]
                offline = row[3]

                facebook_list.append(facebook)
                twitter_list.append(twitter)
                offline_list.append(offline)

            #Print the total number of reviews that are in the file
            print("Total number of reviews = ", len(activity_text_list))

            #For loop to iterate over each item in the review list
            for list_item in activity_text_list:

                #Perform textblob analysis
                review_polarity = TextBlob(list_item).sentiment.polarity

                #If statements that determine the review's sentiment result and which bucket it falls into

                if review_polarity >= -1 and review_polarity < -0.5:

                    #Save into internal list
                    terrible_sentiment = review_polarity
                    terrible_list.append(terrible_sentiment)

                    #Save in db
                    terrible_sentiment = Sentiment(
                        terrible_sentiment=review_polarity)
                    terrible_sentiment.save()

                elif review_polarity >= -0.5 and review_polarity < 0:

                    #Save into internal list
                    bad_sentiment = review_polarity
                    bad_list.append(bad_sentiment)

                    #Save into db
                    bad_sentiment = Sentiment(bad_sentiment=review_polarity)
                    bad_sentiment.save()

                elif review_polarity == 0:

                    #Save into internal list
                    neutral_sentiment = review_polarity
                    neutral_list.append(neutral_sentiment)

                    #Save into db
                    neutral_sentiment = Sentiment(
                        neutral_sentiment=review_polarity)
                    neutral_sentiment.save()

                elif review_polarity > 0 and review_polarity <= 0.5:

                    #Save into internal list
                    good_sentiment = review_polarity
                    good_list.append(good_sentiment)

                    #Save into db
                    good_sentiment = Sentiment(good_sentiment=review_polarity)
                    good_sentiment.save()

                elif review_polarity <= 1 and review_polarity > 0.5:

                    #Save into internal list
                    excellent_sentiment = review_polarity
                    excellent_list.append(excellent_sentiment)

                    #Save into db
                    excellent_sentiment = Sentiment(
                        excellent_sentiment=review_polarity)
                    excellent_sentiment.save()

        #Create object for count of sentiment and save to db
        ct_object = SentimentCount(ct_terrible=len(terrible_list),
                                   ct_bad=len(bad_list),
                                   ct_neutral=len(neutral_list),
                                   ct_good=len(good_list),
                                   ct_excellent=len(excellent_list))

        ct_object.save()

        #Create object for percentage of sentiment and save to db
        pt_object = SentimentPercentage(
            pt_terrible=(len(terrible_list) / len(activity_text_list)),
            pt_bad=(len(bad_list) / len(activity_text_list)),
            pt_neutral=(len(neutral_list) / len(activity_text_list)),
            pt_good=(len(good_list) / len(activity_text_list)),
            pt_excellent=(len(excellent_list) / len(activity_text_list)))

        pt_object.save()

        data = {
            'sentiment factor':
            ['Terrible', 'Bad', 'Neutral', 'Good', 'Excellent'],
            'sentiment count': [
                len(terrible_list),
                len(bad_list),
                len(neutral_list),
                len(good_list),
                len(excellent_list)
            ]
        }

        p = Bar(data,
                values='sentiment count',
                label='sentiment factor',
                title="Sentiment of Reviews",
                legend='top_right',
                width=400)

        output_file("bar.html")

        show(p)
Esempio n. 50
0
from bokeh.charts import BoxPlot, output_file, show
from bokeh.sampledata.autompg import autompg as df

# origin = the source of the data that makes up the autompg dataset
title = "MPG by Cylinders and Data Source, Colored by Cylinders"

# color by one dimension and label by two dimensions
# coloring by one of the columns visually groups them together
box_plot = BoxPlot(df,
                   label=['cyl', 'origin'],
                   values='mpg',
                   color='cyl',
                   title=title)

output_file("boxplot_single.html")

show(box_plot)
Esempio n. 51
0
                    title="label=['cyl', 'origin'] color='cyl'",
                    color='cyl')

# specify custom marker for outliers
box_plot6 = BoxPlot(df,
                    label='cyl',
                    values='mpg',
                    marker='cross',
                    title="label='cyl', values='mpg', marker='cross'")

# color whisker by cylinder
box_plot7 = BoxPlot(df,
                    label='cyl',
                    values='mpg',
                    whisker_color='cyl',
                    title="label='cyl', values='mpg', whisker_color='cyl'")

# remove outliers
box_plot8 = BoxPlot(df,
                    label='cyl',
                    values='mpg',
                    outliers=False,
                    title="label='cyl', values='mpg', outliers=False")

# collect and display
output_file("boxplot.html")

show(
    vplot(hplot(box_plot, box_plot2, box_plot3),
          hplot(box_plot4, box_plot5, box_plot6), hplot(box_plot7, box_plot8)))
Esempio n. 52
0
# Example with nested json/dict like data, which has been pre-aggregated and pivoted
df2 = df_from_json(data)
df2 = df2.sort('total', ascending=False)

df2 = df2.head(10)
df2 = pd.melt(df2, id_vars=['abbr', 'name'])

scatter5 = Scatter(df2,
                   x='value',
                   y='name',
                   color='variable',
                   title="x='value', y='name', color='variable'",
                   xlabel="Medals",
                   ylabel="Top 10 Countries",
                   legend='bottom_right')

scatter6 = Scatter(
    flowers,
    x=blend('petal_length', 'sepal_length', name='length'),
    y=blend('petal_width', 'sepal_width', name='width'),
    color='species',
    title=
    'x=petal_length+sepal_length, y=petal_width+sepal_width, color=species',
    legend='top_right')

output_file("scatter_multi.html")

show(
    vplot(hplot(scatter0, scatter1), hplot(scatter2, scatter3),
          hplot(scatter4, scatter5), hplot(scatter6)))
Esempio n. 53
0
# multiple columns
dot_plot5 = Dot(df, label=['cyl', 'origin'], values='mpg', agg='mean',
                title="label=['cyl', 'origin'] values='mpg' agg='mean'")

dot_plot6 = Dot(df, label='origin', values='mpg', agg='mean', stack='cyl',
                title="label='origin' values='mpg' agg='mean' stack='cyl'",
                legend='top_right')

dot_plot7 = Dot(df, label='cyl', values='displ', agg='mean', group='origin',
                title="label='cyl' values='displ' agg='mean' group='origin'",
                legend='top_right')

dot_plot8 = Dot(df, label='cyl', values='neg_mpg', agg='mean', group='origin',
                color='origin', legend='top_right',
                title="label='cyl' values='neg_mpg' agg='mean' group='origin'")

# infer labels from index
df = df.set_index('cyl')
dot_plot9 = Dot(df, values='mpg', agg='mean', legend='top_right', title='inferred labels')

# collect and display
output_file("dots_multi.html")

show(
    vplot(
        hplot(dot_plot, dot_plot2, dot_plot3),
        hplot(dot_plot4, dot_plot5, dot_plot6),
        hplot(dot_plot7, dot_plot8, dot_plot9)
    )
)
Esempio n. 54
0
                pullList.append(str(pull) + detColorNonFocus(pull))
        for pull in pullList:  # For each individual in a set of 5
            if pull[1] in pullColorStillNeed:  # if its still a color i need
                pullNumber = pullNumber + 1  # I pull it from the set
                totalPulls = totalPulls + 1
                orbCount = orbCount + orbsForPull(
                    pullNumber)  # purchasing and adding to orbs
                if pull[1:] in sampleStillWant:  # if the pull is a focus
                    sampleStillWant.remove(pull[1:])  # remove that
                    pullColorStillNeed.remove(pull[1])
                if int(pull[0]) >= 5:
                    totalPulls = 0
                otherPulls[pull[0]] = otherPulls[pull[0]] + 1

        if len(sampleStillWant) == 0:
            gotAllRare = True

    orbsUsed.append(orbCount)

print("Average orbs required for pulling",
      str(len(wantFocus)) + " focus heroes:", str(np.mean(orbsUsed)))
print("You got an average of:\n", otherPulls['3'] / sampleSize, "3 stars\n",
      otherPulls['4'] / sampleSize, "4 stars\n", otherPulls['5'] / sampleSize,
      "5 stars\n", otherPulls['6'] / sampleSize - len(wantFocus),
      "other focus characters")

print("for an average of",
      sum(otherPulls.values()) / sampleSize, "new characters at a price of",
      round(sum(orbsUsed) / sum(otherPulls.values()), 3), 'orbs per character')
show(Histogram(orbsUsed, plot_width=1800, plot_height=900))
Esempio n. 55
0
import pandas as pd

from bokeh.charts import Bar, output_file, show
from bokeh.sampledata.olympics2014 import data

df = pd.io.json.json_normalize(data['data'])

# filter by countries with at least one medal and sort
df = df[df['medals.total'] > 0]
df = df.sort("medals.total", ascending=False)

# get the countries and we group the data by medal type
countries = df.abbr.values.tolist()
gold = df['medals.gold'].astype(float).values
silver = df['medals.silver'].astype(float).values
bronze = df['medals.bronze'].astype(float).values

# build a dict containing the grouped data
medals = OrderedDict(bronze=bronze, silver=silver, gold=gold)

# any of the following commented are also alid Bar inputs
#medals = pd.DataFrame(medals)
#medals = list(medals.values())

output_file("stacked_bar.html")

bar = Bar(medals, countries, title="Stacked bars", stacked=True)

show(bar)
Esempio n. 56
0
from collections import OrderedDict

from bokeh.charts import Line, show, output_file

xyvalues = OrderedDict(
    python=[2, 3, 7, 5, 26, 221, 44, 233, 254, 265, 266, 267, 120, 111],
    pypy=[12, 33, 47, 15, 126, 121, 144, 233, 254, 225, 226, 267, 110, 130],
    jython=[22, 43, 10, 25, 26, 101, 114, 203, 194, 215, 201, 227, 139, 160],
)

# any of the following commented are also valid Line inputs
#xyvalues = pd.DataFrame(xyvalues)
#xyvalues = xyvalues.values()
#xyvalues = np.array(xyvalues.values())

output_file("lines.html", title="line.py example")

chart = Line(xyvalues, title="Lines", ylabel='measures', legend=True)

show(chart)
Esempio n. 57
0
from bokeh.charts import HeatMap, bins, output_file, show
import pandas as pd
DATA_FILE = '../../samples/GSM188012.CEL'
dtype = {'x': int, 'y': int, 'lux': float}
dataset = pd.read_csv(DATA_FILE, sep='\t', dtype=dtype)
hm = HeatMap(dataset,
             x=bins('x'),
             y=bins('y'),
             values='lux',
             title='Expression',
             stat='mean')
output_file("heatmap7.html", title="heatmap.py example")
show(hm)
Esempio n. 58
0
        workit.signalToBackground()
    ]
    csvwriter.writerow(stats)

    calc_inhibitions = workit.percentInhibition()
    list_inhibitions.append(calc_inhibitions)

all_inhibitions = concat(list_inhibitions)
all_inhibitions.to_csv(inhibitions_output + ' ' + project_code + ' ' +
                       project_date + '.csv',
                       index=False)

# Generate HeatMap visualizations using Bokeh library for each plate in percent inhibitions Dataframe.
output_file(viz_output + ' ' + project_code + ' ' + project_date + '.html')
graphs = []
for plate2 in barcodes:
    aplate = all_inhibitions[(all_inhibitions['Barcode'] == plate2)]
    hm = HeatMap(aplate,
                 x='Column',
                 y='Reverse Row',
                 values='Percent Inhibition',
                 palette=RdYlBu3,
                 title=plate2,
                 stat=None,
                 hover_tool=True)
    graphs.append(hm)

arranged_graphs = tuplize(graphs)
visualization = gridplot(arranged_graphs)
show(visualization)
Esempio n. 59
0
                bar_width=0.7,
                title="(v2) Valence with MSE")
p_aro_r2 = Bar(result_ridge_aro,
               values='r2',
               legend=None,
               color='orange',
               bar_width=0.7,
               title="(a1) Arousal with R2 score")
p_aro_mse = Bar(result_ridge_aro,
                values='mse',
                legend=None,
                color='orange',
                bar_width=0.7,
                title="(a2) Arousal with MSE")
show(
    gridplot([[p_val_r2, p_aro_r2], [p_val_mse, p_aro_mse]],
             plot_width=450,
             plot_height=400))

# In[27]:

result_ridge_val.join(result_ridge_aro, lsuffix='_val', rsuffix='_aro')

# ## SVR

# In[8]:

get_ipython().magic(u'timeit')
val_svr['r2']['all'], val_svr['mse']['all'] = test_regr(
    GridSearchCV(SVR(kernel='rbf', gamma=0.1),
                 cv=5,
                 n_jobs=-1,
Esempio n. 60
0
from bokeh.charts import Donut, show, output_file
from bokeh.charts.utils import df_from_json
from bokeh.sampledata.olympics2014 import data

import pandas as pd

# utilize utility to make it easy to get json/dict data converted to a dataframe
df = df_from_json(data)

# filter by countries with at least one medal and sort by total medals
df = df[df['total'] > 8]
df = df.sort_values(by="total", ascending=False)
df = pd.melt(df,
             id_vars=['abbr'],
             value_vars=['bronze', 'silver', 'gold'],
             value_name='medal_count',
             var_name='medal')

# original example
d = Donut(df,
          label=['abbr', 'medal'],
          values='medal_count',
          text_font_size='8pt',
          hover_text='medal_count')

output_file("donut.html", title="donut.py example")

show(d)