Beispiel #1
0
def plotting(*name):

    try:
        df = plot_df.copy()
    except:
        print 'run interval_rate() first'
        return

    from bokeh.io import output_file, output_notebook, show
    from bokeh.charts import Line
    from bokeh.layouts import row, column

    p = Line(df[['day1', 'day7', 'matched']],
             plot_width=950,
             plot_height=400,
             legend='top_right')
    #    q=Line(df[['matched']],plot_width=950,plot_height=400,color='blue')
    r = Line(df[['single_day_retention']],
             plot_width=950,
             plot_height=400,
             color='purple')
    layout = column(p, r)

    if len(name) > 0:
        output_file(name[0] + '.html')
        show(layout)
    else:
        output_notebook()
        show(layout)
Beispiel #2
0
def fnCreate_Chart_Line(df):
    pd.options.html.border = 1

    plot = Line(df,
                title="Date wise tweet graph",
                legend=False,
                xlabel='Date',
                ylabel='Count')  #, 'green', 'blue']
    plot.logo = None
    script, div = components(plot, CDN)
    return script, div
Beispiel #3
0
def fnCreate_Chart_MultiLine(df):
    pd.options.html.border = 1

    plot = Line(df,
                title="Likes vs retweets visualization",
                legend="top_left",
                xlabel='Date',
                ylabel='Count')
    plot.logo = None
    script, div = components(plot, CDN)
    return script, div
Beispiel #4
0
    def createBokehChart(self):
        keyFields = self.getKeyFields()
        valueFields = self.getValueFields()
        data = self.getWorkingPandasDataFrame().sort_values(keyFields[0])
        subplots = self.options.get("lineChartType", "grouped") == "subplots"
        clusterby = self.options.get("clusterby")

        figs = []

        if clusterby is None:
            if subplots:
                for valueField in valueFields:
                    figs.append(
                        Line(data,
                             x=keyFields[0],
                             y=valueField,
                             legend=self.showLegend(),
                             plot_width=int(800 / len(valueFields))))
            else:
                figs.append(
                    Line(data,
                         x=keyFields[0],
                         y=valueFields,
                         color=valueFields,
                         legend=self.showLegend()))
        else:
            if subplots:
                self.addMessage(
                    "Warning: 'Cluster By' ignored when you have multiple Value Fields but subplots options selected"
                )
                for valueField in valueFields:
                    figs.append(
                        Line(data,
                             x=keyFields[0],
                             y=valueField,
                             legend=self.showLegend(),
                             plot_width=int(800 / len(valueFields))))
            else:
                if len(valueFields) > 1:
                    self.addMessage(
                        "Warning: 'Cluster By' ignored when you have multiple Value Fields but subplots option is not selected"
                    )
                else:
                    self.addMessage(
                        "Warning: 'Cluster By' ignored when grouped option with multiple Value Fields is selected"
                    )
                figs.append(
                    Line(data,
                         x=keyFields[0],
                         y=valueFields,
                         color=valueFields,
                         legend=self.showLegend()))

        return figs
Beispiel #5
0
def fnCreate_Chart_MultiLine(df,strS,j=1): 
    pd.options.html.border=1    
    
    sL=False
    if j ==1:     
        sL="top_center" 
        
    plot = Line(df, title="Average of Ticket Price Date wise" + strS, legend=sL, xlabel='Travel Date',ylabel='Average Ticket Price')
    plot.legend.label_text_font_size = "7pt"
    plot.legend.orientation = "horizontal"
    plot.legend.click_policy="hide"
    plot.logo=None
    script, div = components(plot,CDN)    
    return script, div 
def plot_state(state_data, varaible_names=[]):
    # plot = figure(title='State Variables', x_axis_label='step', y_axis_label='level')

    # data = []
    data = dict()
    for v in varaible_names:
        data[v] = state_data[v]
        # plot.line(range(len(data[v])), data[v], legend=v)

    # xyvalues = np.array([[2, 3, 7, 5, 26], [12, 33, 47, 15, 126], [22, 43, 10, 25, 26]])
    # xyvalues = np.array(data)
    plot = Step(data,
                title="state variables - step graph",
                legend="top_left",
                ylabel='',
                palette=["red", "green", "blue", "orange"])
    plot_line = Line(data,
                     title="state variables - line graph",
                     legend="top_left",
                     ylabel='',
                     palette=["red", "green", "blue", "orange"])

    # output_file('line.html')
    # show(plot)
    return (plot, plot_line)
Beispiel #7
0
def abc_model(abc_id, model_id, t):
    history = app.config["HISTORY"]
    history.id = abc_id
    if t == "max":
        t = history.max_t
    else:
        t = int(t)
    df, w = history.get_distribution(model_id, t)
    df["CDF"] = w
    tabs = []

    model_ids = history.get_model_probabilities().columns
    for parameter in [col for col in df if col != "CDF"]:
        plot_df = df[["CDF", parameter]].sort_values(parameter)
        plot_df_cumsum = plot_df.cumsum()
        plot_df_cumsum[parameter] = plot_df[parameter]
        p = Panel(child=Line(x=parameter, y="CDF", data=plot_df_cumsum),
                  title=parameter)
        tabs.append(p)
    if len(tabs) == 0:
        plot = PlotScriptDiv("", "This model has no Parameters")
    else:
        plot = PlotScriptDiv(*components(Tabs(tabs=tabs)))
    return render_template("model.html",
                           abc_id=abc_id,
                           model_id=model_id,
                           plot=plot,
                           BOKEH=BOKEH,
                           model_ids=model_ids,
                           t=t,
                           available_t=list(range(history.max_t + 1)))
Beispiel #8
0
def loss_accuracy_plot(df, x, y):
    df = df.fillna(0)
    plot = Row(*[Line(df, x, y, legend=True) for y in y])
    script, div = components(plot, INLINE)
    js_resources = RESOURCE.render_js()
    css_resources = RESOURCE.render_css()
    return Plot(js_resources, css_resources, script, div)
Beispiel #9
0
    def line(self,
             dataframe,
             x=None,
             y=None,
             width=None,
             height=None,
             groups=None,
             palette=None,
             title="Line",
             xaxis_label=None,
             yaxis_label=None):
        palette = self.__default_options__.get(
            'palette', None) if palette is None else palette
        width = self.__default_options__.get('width',
                                             None) if width is None else width

        width, height = self._width_height(width, height)

        line = Line(dataframe,
                    x=x,
                    y=y,
                    color=groups,
                    width=width,
                    height=height,
                    palette=palette,
                    title=title,
                    legend=True)

        if xaxis_label:
            line._xaxis.axis_label = xaxis_label
        if yaxis_label:
            line._yaxis.axis_label = yaxis_label

        return line
Beispiel #10
0
    def line(self,
             dataframe,
             width=None,
             height=None,
             palette=None,
             title="Line",
             x_axis_label=None,
             y_axis_label=None,
             grid=None):

        palette = self.get_option('palette') if palette is None else palette
        width = self.get_option('width') if width is None else width

        if not height:
            width, height = self.golden_ratio(width, height)

        palette = self._palette(palette, len(dataframe.index))

        line = Line(dataframe.T,
                    legend="top_right",
                    color=palette,
                    ylabel=y_axis_label,
                    xlabel=y_axis_label,
                    title=title,
                    width=width,
                    height=height)

        if grid is not None:
            grid.append(line)
            return grid

        return line
Beispiel #11
0
def doPlot11(data, nrDataSource):
    p = Line(data,
             title="Line graph: " + nrDataSource['name'],
             xlabel=data.columns[0],
             ylabel=data.columns[1],
             responsive=True)
    c = components(p, resources=None, wrap_script=False, wrap_plot_info=True)
    return c
Beispiel #12
0
    def showBacktestingResult(self):
        """显示回测结果"""
        d = self.calculateBacktestingResult()

        # 输出
        self.output('-' * 30)
        self.output('First Trade:\t%s' % d['timeList'][0])
        self.output('Last Trade:\t%s' % d['timeList'][-1])

        self.output('Total Trades:\t%s' % formatNumber(d['totalResult']))
        self.output('Total Return:\t%s' % formatNumber(d['capital']))
        self.output('Maximum Drawdown: \t%s' %
                    formatNumber(min(d['drawdownList'])))

        self.output('Ave Trade:\t%s' %
                    formatNumber(d['capital'] / d['totalResult']))
        self.output('Ave Slippage:\t%s' %
                    formatNumber(d['totalSlippage'] / d['totalResult']))
        self.output('Ave Commission:\t%s' %
                    formatNumber(d['totalCommission'] / d['totalResult']))

        self.output('Win Ratio\t\t%s%%' % formatNumber(d['winningRate']))
        self.output('Ave Win\t%s' % formatNumber(d['averageWinning']))
        self.output('Ave Loss\t%s' % formatNumber(d['averageLosing']))
        self.output('Profit Factor:\t%s' % formatNumber(d['profitLossRatio']))

        # Use Bokeh to plot
        from bokeh.charts import Area, Line, Histogram
        from bokeh.layouts import column
        from bokeh.io import show

        plotdata = {
            "TradeN": range(len(d['capitalList'])),
            "Equity Curve": d['capitalList'],
            "Maximum Drawdown": d['drawdownList'],
            "Profit/Loss": d['pnlList']
        }

        f1 = Line(plotdata,
                  x="TradeN",
                  y="Equity Curve",
                  color="blue",
                  width=1000,
                  height=300)
        f2 = Area(plotdata,
                  x="TradeN",
                  y="Maximum Drawdown",
                  color="tomato",
                  width=1000,
                  height=300)
        f3 = Histogram(plotdata,
                       values="Profit/Loss",
                       bins=30,
                       color="green",
                       width=1000,
                       height=300)

        show(column(f1, f2, f3))
Beispiel #13
0
def create_line(data):
    """ Convenience function to create a new line chart with the right args """
    return Line(data,
                x='year',
                y=countries,
                legend=True,
                width=1400,
                height=300,
                ylabel='Energy use per capita',
                palette=['purple', 'green', 'blue', 'pink'])
Beispiel #14
0
def plot_close(tickr):
    qd.ApiConfig.api_key = "FtpsTU3J-q2x2pa2KBqV"
    tickrfinal=''.join(('WIKI/',tickr))
    mydata=qd.get(tickrfinal,start_date="2017-5-1", end_date="2017-5-31")
    print list(mydata)
    y=mydata['Close']
    #output_notebook()
    p = Line(y, title="Closing Price/ Day in May",width=500, height=400,xlabel='Dates', ylabel='Closing Price')
    #output_file("templates\closing.html", title="Closing Price in May")
    return p
Beispiel #15
0
def plot_column(data, column_name, save=True, display=True):
    plot = Line(data, y=column_name, plot_width=1200, plot_height=600)

    if save:
        output_file('plots/' + column_name + '.html')

    if display:
        show(plot)

    return plot
def doPlot11(data, nrDataSource):
    p = Line(data,
             y_mapper_type="log",
             x=data.columns[0],
             xlabel=data.columns[0],
             ylabel=data.columns[1],
             title="Line graph: " + nrDataSource['name'],
             responsive=True)
    p._xaxis.ticker = SingleIntervalTicker(interval=5, num_minor_ticks=10)
    c = components(p, resources=None, wrap_script=False, wrap_plot_info=True)
    return c
Beispiel #17
0
def trying():
	now=datetime.datetime.now().strftime("%Y%m%d")
	now_30=(datetime.datetime.now()-datetime.timedelta(days=30)).strftime("%Y%m%d")
	tick=app.vars
	stock_data = requests.get("https://www.quandl.com/api/v3/datatables/WIKI/PRICES.json?date.gte="+now_30+"&date.lt="+now+"&ticker="+tick+"&api_key=yvSB52TFjUsFTyZU-n--")
	unp_data=stock_data.json()["datatable"]["data"]
	unp_label=stock_data.json()["datatable"]["columns"]
	label=[]
	for entries in unp_label:
		label.append(str(entries["name"]))
	df=pandas.DataFrame(unp_data)
	df.columns=label
	close_mean="%.2f" %df[["close"]].mean()
	close_std="%.2f" %df[["close"]].std()
	captions="mean="+ close_mean +", std="+ close_std
	df2=df[["date","close"]]
	li=Line(df2,x="date",y="close",xlabel="date",ylabel="closing price",legend=False,title=tick+" stock chart from "+now_30+" to "+now)
	caption1=Label(x=400, y=100, x_units='screen', y_units='screen', text=captions, text_font_size='9pt', text_font_style="bold")
	li.add_layout(caption1)
	script, div = components(li)
	return render_template('trying.html', script=script, div=div)
Beispiel #18
0
def HLevelLine():
    # sample = pd.read_table('http://kelesidis.de/static/data/sample.txt')
    # Use the Sample Data

    # Make a list with the months
    ys = [month for month in sample.columns[1:]]

    TOOLS = 'pan,wheel_zoom,hover,crosshair,resize,reset'
    TOOLTIPS = [("Year", "$~x"),
                ("Temp", "$y")]

    # make our line configurations
    # import from df, x= the Year column, y = the ys list we created above
    p = Line(sample, x = 'Year', y = ys, title = "Hight Level Bokeh Line Chart", legend = "bottom_left",
             ylabel = 'Temp', tools = TOOLS, width = 600, height = 450, responsive = True)

    # hover tool configuration
    p_hover = p.select(HoverTool)
    p_hover.tooltips = TOOLTIPS

    p.logo = None

    return p
Beispiel #19
0
def main():
    # check recalculation request
    if 'recalculate' in request.args:
        if request.args.get('recalculate') == 'True':
            betalyzer.recalculate()

    # build sector betas bar chart
    sector_betas = betalyzer.df_tickers.groupby('sector')['beta'].mean()
    bk_sector_betas = Bar(sector_betas,
                          plot_width=550,
                          plot_height=400,
                          legend=None)
    bk_sector_betas_script, bk_sector_betas_div = components(bk_sector_betas)

    # build market cap betas bar chart
    mktcap_betas = betalyzer.df_tickers.groupby(
        'market_cap_decile')['beta'].mean()
    bk_mc_betas = Bar(mktcap_betas,
                      plot_width=550,
                      plot_height=400,
                      legend=None)
    bk_mc_betas_script, bk_mc_betas_div = components(bk_mc_betas)

    # build market cap scatter plot
    scatter = Scatter(betalyzer.df_tickers,
                      x='market_cap_log',
                      y='beta',
                      plot_width=550,
                      plot_height=400)
    scatter_script, scatter_div = components(scatter)

    # build line plot for top three stocks
    top_tickers = betalyzer.df_tickers['ticker'].head(3)
    bk_history = Line(betalyzer.df_betas[top_tickers],
                      plot_width=550,
                      plot_height=400)
    bk_history_script, bk_history_div = components(bk_history)

    return render_template(
        'main.html',
        dt_tickers=betalyzer.df_tickers.to_dict(orient='records'),
        bk_sector_betas_script=bk_sector_betas_script,
        bk_sector_betas_div=bk_sector_betas_div,
        bk_mc_betas_script=bk_mc_betas_script,
        bk_mc_betas_div=bk_mc_betas_div,
        scatter_script=scatter_script,
        scatter_div=scatter_div,
        bk_history_script=bk_history_script,
        bk_history_div=bk_history_div)
def get_variable(config):
    from bokeh.embed import components
    name, rank, df, decade_df = get_result(config.gender, config.decade, config.name)
    if config.name <> name:
        message = "%s not found. Do you mean %s?" % (config.name, name)
    else:
        message =""

    birth_table = df.pivot(index='Decade', columns='Name', values='Births').fillna(0).to_html()
    rank_table = df.pivot(index='Decade', columns='Name', values='Rank').fillna(0).to_html()
    result_table = df[df['Rank']==rank][['Decade','Name','Rank']].sort('Decade').to_html(index=False)
    top_table = decade_df[(decade_df['Rank']<=8) &
                          (decade_df['Decade']==config.decade)].sort('Rank').to_html(index=False)
    from bokeh.charts import Line, save, output_file, ColumnDataSource
    from bokeh.resources import INLINE
    plot_path = "%s_%s_%i.html" % (config.name,config.gender,config.decade)
    output_file("output/" + plot_path, mode='inline')
    tooltips = [(c, '@' + c) for c in df.columns]
    p = Line(df, x='Decade', y='Rank', title="Rank across Time", color='Name',
             xlabel="Decade", ylabel="Rank",
             tooltips=tooltips)
    p.circle('Decade', 'Rank', color='gray', alpha=0.5, source=ColumnDataSource(df))
    save(p)

    #script, div = components(p)
    return {
            'plot_path': plot_path,
            'result_table': result_table,
            'rank_table': rank_table,
            'birth_table': birth_table,
            'top_table': top_table,
            'rank': rank,
            'config': config,
            'name': name,
            'message': message
            }
Beispiel #21
0
def getPlot(ticker):
    ticker = 'GOOG'
    r = requests.get('https://www.quandl.com/api/v3/datasets/WIKI/' + ticker +
                     '/data.json?api_key=Eebg1xGXqxhS11D52xGs')
    asJson = json.loads(r.content)
    dataFormated = pd.DataFrame(np.array(asJson['dataset_data']['data']),
                                columns=asJson['dataset_data']['column_names'])
    dataFormated['Date'] = pd.to_datetime(dataFormated.Date)
    dataFormated['Open'] = pd.to_numeric(dataFormated.Open)

    #dates = matplotlib.dates.date2num(asDateTime.tolist())
    #values = np.array(dataFormated.Open).astype(float)
    line = Line(dataFormated, x='Date', y='Open', title="Cool")

    #line = Line(x=dates,y=values,title="Cool")

    return line
def plot_plan_bokeh_2(plan):
    x = []
    y = []
    ys = []
    i = 0
    for (o, v) in plan:
        # s = str(o) + " - " + str(v)
        s = str(o)
        if s not in ys:
            ys.append(s)
    for (o, v) in plan:
        i += 1
        x.append(i)
        # s = str(o) + " - " + str(v)
        s = str(o)
        y.append(ys.index(s))

    # xyvalues = np.array([[2, 3, 7, 5, 26], [12, 33, 47, 15, 126], [22, 43, 10, 25, 26]])
    xyvalues = np.array(y)
    line = Line(xyvalues, title="plan", legend="top_left", ylabel='operator')

    output_file('plots_html/line.html')
    show(line)
Beispiel #23
0
def ticker(ticker):
    # build line
    line = Line(betalyzer.df_betas[ticker], plot_width=1000, plot_height=400)
    bokeh_script, bokeh_div = components(line)

    # build scatter
    scatter = Scatter(betalyzer.df_changes.head(betalyzer.window),
                      x=betalyzer.market,
                      y=ticker,
                      plot_width=550,
                      plot_height=400)
    scatter_script, scatter_div = components(scatter)

    # build histogram
    df_hist = betalyzer.df_changes[[ticker,
                                    'SPY']].head(500).unstack().reset_index()
    df_hist.rename(columns={'level_0': 'ticker', 0: 'change'}, inplace=True)
    hist = Histogram(df_hist,
                     values='change',
                     color='ticker',
                     bins=20,
                     legend='top_right',
                     plot_width=550,
                     plot_height=400)
    hist_script, hist_div = components(hist)

    return render_template(
        'ticker.html',
        ticker=ticker,
        bokeh_script=bokeh_script,
        bokeh_div=bokeh_div,
        scatter_script=scatter_script,
        scatter_div=scatter_div,
        hist_script=hist_script,
        hist_div=hist_div,
        window=betalyzer.window,
        dt_ticker=betalyzer.df_tickers.loc[ticker].to_dict())
Beispiel #24
0
def generate_graph():
    url = 'https://www.quandl.com/api/v3/datatables/WIKI/PRICES.json?'
    r = requests.get(url, app.selection)
    rjson = r.json()['datatable']
    data = pd.DataFrame(rjson['data'])
    cols = pd.DataFrame(rjson['columns'])['name']
    data.columns = cols

    def convert_to_datetime(val):
        y, m, d = val.split('-')
        return pd.datetime(int(y), int(m), int(d))

    data['date'] = data['date'].apply(convert_to_datetime)
    data = data.set_index(['date'])

    t = 'Time series data for ' + app.selection['ticker']
    p = Line(data, title=t, xlabel='date', ylabel='price ($)')
    app.s, app.d = components(p)

    #  html = file_html(p,CDN,'Data sourced from Quandl WIKI dataset')
    #  f = open('templates/graph.html','w')
    #  f.write(html)
    #  f.close()
    return
Beispiel #25
0
from collections import OrderedDict
import time

import numpy as np

from bokeh.charts import Line, curdoc, cursession, output_server, show
from bokeh.models import GlyphRenderer

N = 80
x = np.linspace(0, 4*np.pi, N)

xyvalues = OrderedDict(sin=np.sin(x), cos=np.cos(x))

output_server("line_animate")

chart = Line(xyvalues, title="Lines", ylabel='measures')

curdoc().add(chart)

show(chart)

renderer = chart.select(dict(type=GlyphRenderer))
ds = renderer[0].data_source

while True:
    for i in np.hstack((np.linspace(1, -1, 100), np.linspace(-1, 1, 100))):
        for k, values in xyvalues.items():
            if k != 'x':
                ds.data['y_%s'%k] = values * i
        cursession().store_objects(ds)
        time.sleep(0.05)
mydata.head()

mydata.plot(figsize=(12, 8))

import seaborn as sns
mydata.plot(figsize=(12, 8))

##BOKEH
from bokeh.plotting import show
from bokeh.io import output_notebook
from bokeh.charts import Line

#doesn't work in spyder!!
output_notebook()
p = Line(mydata, legend='bottom_right')
p.width = 600
p.height = 400
show(p)

#HTML
from bokeh.plotting import figure
from bokeh.palettes import RdYlBu11
from bokeh.plotting import show, output_file, reset_output

output_file("bokehplot.html")

mytools = ['pan', 'box_zoom', 'resize', 'wheel_zoom',
           'reset']  # Does not allow save and question options
p = figure(width=800, height=600, tools=mytools, x_axis_type='datetime')
cols = mydata.columns.values[:-1]
Beispiel #27
0
def make_plot():
    sel_cols = [output_cols[i] for i in chkbx.active]
    p = Line(data=df, x='Date', y=sel_cols, plot_height=400)
    return p
Beispiel #28
0
import pandas as pd
from bokeh.sampledata.degrees import data

defaults.width = 500
defaults.height = 300

TOOLS='box_zoom,box_select,hover,crosshair,resize,reset'

TOOLTIPS = [ ("y", "$~y"), ("x", "$~x") ]

data = data[['Biology', 'Business', 'Computer Science', "Year"]]
data = pd.melt(data, id_vars=['Year'],
               value_vars=['Biology', 'Business', 'Computer Science'],
               value_name='Count', var_name='Degree')

vline = Line(data, y='Count', color='Degree', title="Lines VLine", ylabel='measures',
             tools=TOOLS)

hline = Line(data, y='Count', color='Degree', title="Lines HLine",
             ylabel='measures', tools=TOOLS)

int_vline = Line(data, y='Count', color='Degree', title="Lines VLine Interp",
                 ylabel='measures', tools=TOOLS)

int_hline = Line(data, y='Count', color='Degree', title="Lines HLine Interp",
                 ylabel='measures', tools=TOOLS)

scatter_point = Scatter(data, x='Year', y='Count', color='Degree',
                        title="Scatter mouse", ylabel='measures', legend=True,
                        tools=TOOLS)

scatter = Scatter(data, x='Year', y='Count', color='Degree',
    return breath, status, vol


df = pd.read_csv(r'c:\Research_data\DH_data\test-840-1.txt', engine = 'python', skiprows = 2, skipfooter = 2,
                 names = ['flow', 'paw', 'other'])

df.reset_index(drop = False, inplace = True)
df.drop(labels = 'other', axis = 1, inplace = True)
df.flow = pd.to_numeric(df.flow, errors = 'coerce')
df.paw = pd.to_numeric(df.paw, errors = 'coerce')

df['breath'], df['status'], df['vol'] = count_breath(df.flow.values)
df.dropna(axis = 0, how = 'all', subset = ['flow'], inplace = True)

df.vol = df.groupby('breath')['vol'].cumsum()

p = Line(df, x = 'index', y = ['flow', 'paw', 'vol', 'breath', 'status'], color = 'red')
p.extra_y_ranges = {'flow': Range1d(start = 0, end = 30)}
p.add_layout(LinearAxis(y_range_name = 'flow'), 'left')

check_button = CheckboxButtonGroup(labels = ['Double Stacked', 'Flow Limited', 'Ineffective Trigger'],
                                   active = [0, 0, 0])
next_button = Button(label = 'Next', type = 'success')
next_button.on_click(print('ok'))
output_file('test.html')

show(vform(p, check_button, next_button))


Beispiel #30
0
defaults.height = 300

TOOLS = 'box_zoom,box_select,hover,crosshair,resize,reset'

TOOLTIPS = [("y", "$~y"), ("x", "$~x")]

data = data[['Biology', 'Business', 'Computer Science', "Year"]]
data = pd.melt(data,
               id_vars=['Year'],
               value_vars=['Biology', 'Business', 'Computer Science'],
               value_name='Count',
               var_name='Degree')

vline = Line(data,
             y='Count',
             color='Degree',
             title="Lines VLine",
             ylabel='measures',
             tools=TOOLS)

hline = Line(data,
             y='Count',
             color='Degree',
             title="Lines HLine",
             ylabel='measures',
             tools=TOOLS)

int_vline = Line(data,
                 y='Count',
                 color='Degree',
                 title="Lines VLine Interp",
                 ylabel='measures',
Beispiel #31
0
from bokeh.models import HoverTool
from bokeh.charts import Chart, Step, Line, Area, Scatter, Bar, vplot, hplot, show, output_file

from py import path
HERE = path.local(__file__).dirpath()

xyvalues = pd.read_csv(
    str(HERE.join("percent-bachelors-degrees-women-usa.csv")))
index = xyvalues.pop("Year")
xyvalues = xyvalues[['Biology', 'Business', 'Computer Science']]

TOOLS = 'box_zoom,box_select,hover,crosshair,resize,reset'
output_file("lines.html", title="line.py example")
vline = Line(xyvalues,
             title="Lines VLine",
             ylabel='measures',
             width=500,
             height=300,
             tools=TOOLS)
hline = Line(xyvalues,
             title="Lines HLine",
             ylabel='measures',
             width=500,
             height=300,
             tools=TOOLS)
int_vline = Line(xyvalues,
                 title="Lines VLine Interp",
                 ylabel='measures',
                 width=500,
                 height=300,
                 tools=TOOLS)
int_hline = Line(xyvalues,
Beispiel #32
0
from collections import OrderedDict
import numpy as np
import pandas as pd
from bokeh.charts import Line

xyvalues = OrderedDict(
    python=[2, 3, 7, 5, 26, 221, 44, 233, 254, 265, 266, 267, 120, 111],
    pypy=[12, 33, 47, 15, 126, 121, 144, 233, 254, 225, 226, 267, 110, 130],
    jython=[22, 43, 10, 25, 26, 101, 114, 203, 194, 215, 201, 227, 139, 160],
)

# any of the following commented are valid Line inputs
#xyvalues = pd.DataFrame(xyvalues)
#xyvalues = xyvalues.values()
#xyvalues = np.array(xyvalues.values())

line = Line(xyvalues, title="Lines", ylabel='measures', filename="lines.html")
line.xlabel('time').legend("top_left").show()
Beispiel #33
0
import numpy as np
import pandas as pd
from bokeh.models import HoverTool
from bokeh.charts import Chart, Step, Line, Area, Scatter, Bar, vplot, hplot, show, output_file

from py import path
HERE = path.local(__file__).dirpath()

xyvalues = pd.read_csv(str(HERE.join("percent-bachelors-degrees-women-usa.csv")))
index = xyvalues.pop("Year")
xyvalues = xyvalues[['Biology', 'Business', 'Computer Science']]


TOOLS='box_zoom,box_select,hover,crosshair,resize,reset'
output_file("lines.html", title="line.py example")
vline = Line(xyvalues, title="Lines VLine", ylabel='measures', width=500, height=300,
             tools=TOOLS)
hline = Line(xyvalues, title="Lines HLine", ylabel='measures', width=500, height=300,
             tools=TOOLS)
int_vline = Line(xyvalues, title="Lines VLine Interp", ylabel='measures', width=500, height=300,
             tools=TOOLS)
int_hline = Line(xyvalues, title="Lines HLine Interp", ylabel='measures', width=500, height=300,
             tools=TOOLS)
svalues = {}
# svalues['Business'] = [(i, v) for i, v in zip(index, xyvalues['Business'])]
for k in xyvalues.columns:
    svalues[k] = [(i, v) for i, v in zip(index, xyvalues[k])]
# # import pdb; pdb.set_trace()
scatter_point = Scatter(svalues, title="Scatter mouse", ylabel='measures', width=500, height=300,
             legend=True,
             tools=TOOLS)
scatter = Scatter(svalues, title="Scatter V Line", ylabel='measures', width=500, height=300,
Beispiel #34
0
# build a dataset where multiple columns measure the same thing
data = dict(
    python=[2, 3, 7, 5, 26, 221, 44, 233, 254, 265, 266, 267, 120, 111],
    pypy=[12, 33, 47, 15, 126, 121, 144, 233, 254, 225, 226, 267, 110, 130],
    jython=[22, 43, 10, 25, 26, 101, 114, 203, 194, 215, 201, 227, 139, 160],
    test=[
        'foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar',
        'foo', 'bar', 'foo', 'bar'
    ])
df = pd.DataFrame(data)

# add a column with a range of dates, as if the values were sampled then
df['date'] = pd.date_range('1/1/2015', periods=len(df.index), freq='D')

# default behavior for dataframe input is to plot each numerical column as a line
line = Line(df)

# build the line plots
line0 = Line(df,
             y=['python', 'pypy', 'jython'],
             title="Interpreters (y=['python', 'pypy', 'jython'])",
             ylabel='Duration',
             legend=True)

line1 = Line(df,
             x='date',
             y=['python', 'pypy', 'jython'],
             title="Interpreters (x='date', y=['python', 'pypy', 'jython'])",
             ylabel='Duration',
             legend=True)
Beispiel #35
0
def do_single_search(request_form):
    """
    search method called from both welcome() and search()
    :param request_form:
    :return:
    """
    search_terms = request_form["singleTermQuery"].lower()
    language_var, country_var = request_form["languageAndRegion"].split(':', 1)
    try:
        specific_query = simple_query_totals({"query": "body_text_ws:%s" % search_terms,
                                              "filter": ["country_s:%s" % country_var, "langid_s:%s" % language_var]})
    except (KeyError, HTTPError):
        return flask.render_template('no_results.html', query=search_terms, available_options=AVAILABLE_OPTIONS,
                                     search_mode='single')

    matches = specific_query['num_docs'].sum()

    #############################
    # GET TOTALS FOR EVERYTHING #
    #############################
    totals = simple_query_totals({"query": "*:*",
                                  "filter": ["country_s:%s" % country_var, "langid_s:%s" % language_var]})

    gender_totals = totals.groupby('gender').num_docs.sum()

    age_totals = totals.groupby('age').num_docs.sum()
    age_totals = sort_and_filter_age(age_totals)
    age_totals_norm = age_totals / age_totals.sum()

    age_and_gender_totals = prepare_age_and_gender(totals)

    # nuts_total = totals.groupby('nuts_3').num_docs.sum()


    ###########
    #  GENDER #
    ###########
    gender_specific_query = specific_query.groupby('gender').num_docs.sum()
    abs_percentages = gender_specific_query / gender_totals
    try:
        renormalizer = 1.0 / abs_percentages.sum()
    except ZeroDivisionError:
        return flask.render_template('no_results.html', query=search_terms, available_options=AVAILABLE_OPTIONS,
                                     search_mode='single')

    gender_query_adjusted = abs_percentages * renormalizer

    #######
    # AGE #
    #######
    age_specific_query = specific_query.groupby('age').num_docs.sum()
    age_specific_query = sort_and_filter_age(age_specific_query)
    age_specific_query_norm = age_specific_query / age_specific_query.sum()
    compare_age_df = pd.DataFrame({'background distribution': age_totals_norm,
                                   'query': pd.rolling_mean(age_specific_query_norm, ROLLING_MEAN_FRAME)})
    compare_age_df['i'] = compare_age_df.index

    ##################
    # AGE AND GENDER #
    ##################
    age_and_gender_specific_query = prepare_age_and_gender(specific_query)

    try:
        age_specific_male_totals = gender_specific_query['M'].sum()
        compare_male_df = pd.DataFrame({'background distribution': age_and_gender_totals['M'],
                                        'query': pd.rolling_mean(age_and_gender_specific_query['M'],
                                                                 ROLLING_MEAN_FRAME)})
    except KeyError:
        age_specific_male_totals = 0
        compare_male_df = pd.DataFrame({'background distribution': age_and_gender_totals['M']})
    compare_male_df['i'] = compare_male_df.index

    try:
        age_specific_female_totals = gender_specific_query['F']
        compare_female_df = pd.DataFrame({'background distribution': age_and_gender_totals['F'],
                                          'query': pd.rolling_mean(age_and_gender_specific_query['F'],
                                                                   ROLLING_MEAN_FRAME)})
    except KeyError:
        age_specific_female_totals = 0
        compare_female_df = pd.DataFrame({'background distribution': age_and_gender_totals['F']})
    compare_female_df['i'] = compare_female_df.index

    ########
    # NUTS #
    ########
    nuts_query = specific_query.groupby('nuts_3').num_docs.sum()
    nuts_total = nuts_query.sum()
    nuts_query_norm = nuts_query / nuts_total
    special_regions = nuts_query_norm > nuts_query_norm.median()

    outliers = ', '.join(
        sorted(['%s (%s)' % (NUTS_NAMES[x], x) for x in special_regions.index if special_regions.ix[x].any() == True]))

    # TODO move plotting to its own function
    gender_plot = Bar(gender_query_adjusted,
                      title="Gender distribution",
                      ylabel="percentage",
                      logo=None,
                      toolbar_location="below",
                      # width=300,
                      # height=400,
                      webgl=False)

    age_plot = Line(compare_age_df,
                    x='i',
                    title="Age distribution",
                    x_range=Range1d(start=MIN_AGE, end=MAX_AGE),
                    xlabel='age',
                    ylabel="percentage",
                    logo=None,
                    toolbar_location="below",
                    # width=800,
                    # height=400,
                    legend='top_right',
                    color=['silver', 'red'],
                    webgl=False)

    age_gender_plot_M = Line(compare_male_df,
                             x='i',
                             title="Age distribution for men",
                             xlabel='age',
                             ylabel="percentage",
                             x_range=Range1d(start=MIN_AGE, end=MAX_AGE),
                             logo=None,
                             toolbar_location="below",
                             # width=600,
                             # height=400,
                             legend='top_right',
                             color=['silver', 'green'],
                             webgl=False)
    age_gender_plot_F = Line(compare_female_df,
                             x='i',
                             title="Age distribution for women",
                             xlabel='age',
                             x_range=Range1d(start=MIN_AGE, end=MAX_AGE),
                             logo=None,
                             toolbar_location="below",
                             # width=600,
                             # height=400,
                             legend='top_right',
                             color=['silver', 'blue'],
                             webgl=False)

    bokeh_script, (gender_plot_div, age_plot_div, age_gender_plot_F_div, age_gender_plot_M_div) = components(
        (gender_plot, age_plot, age_gender_plot_F, age_gender_plot_M))

    return flask.render_template('single_term_results.html',
                                 query=search_terms,
                                 matches=matches,
                                 bokeh_script=bokeh_script,
                                 gender_query_adjusted=gender_query_adjusted,
                                 gender_plot=gender_plot_div,
                                 age_plot=age_plot_div,
                                 age_gender_plot_F=age_gender_plot_F_div,
                                 age_gender_plot_M=age_gender_plot_M_div,
                                 country_code=country_var,
                                 map_views=MAP_VIEWS,
                                 nuts_query=nuts_query_norm.to_json(),
                                 outliers=outliers,
                                 gender_total=gender_specific_query.sum(),
                                 age_total=age_specific_query.sum(),
                                 age_total_M=age_specific_male_totals,
                                 age_total_F=age_specific_female_totals,
                                 nuts_total=nuts_query.sum(),
                                 available_options=AVAILABLE_OPTIONS)
Beispiel #36
0
from bokeh.charts import Line, show, output_file

# build a dataset where multiple columns measure the same thing
data = dict(
    python=[2, 3, 7, 5, 26, 221, 44, 233, 254, 265, 266, 267, 120, 111],
    pypy=[12, 33, 47, 15, 126, 121, 144, 233, 254, 225, 226, 267, 110, 130],
    jython=[22, 43, 10, 25, 26, 101, 114, 203, 194, 215, 201, 227, 139, 160],
    test=[
        'foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar',
        'foo', 'bar', 'foo', 'bar'
    ])

# create a line chart where each column of measures receives a unique color and dash style
line = Line(data,
            y=['python', 'pypy', 'jython'],
            dash=['python', 'pypy', 'jython'],
            color=['python', 'pypy', 'jython'],
            title="Interpreter Sample Data",
            ylabel='Duration',
            legend=True)

output_file("line_single.html", title="line_single.py example")

show(line)
Beispiel #37
0
# add a column with a range of dates, as if the values were sampled then
df['date'] = pd.date_range('1/1/2015', periods=len(df.index), freq='D')

# default behavior for dataframe input is to plot each numerical column as a line
line = Line(df)

# build the line plots
line0 = Line(df, y=['python', 'pypy', 'jython'],
             title="Interpreters (y=['python', 'pypy', 'jython'])", ylabel='Duration', legend=True)

line1 = Line(df, x='date', y=['python', 'pypy', 'jython'],
             title="Interpreters (x='date', y=['python', 'pypy', 'jython'])", ylabel='Duration', legend=True)

line2 = Line(df, x='date', y=['python', 'pypy', 'jython'],
             dash=['python', 'pypy', 'jython'],
             title="Interpreters (x='date', y, dash=['python', 'pypy', 'jython'])", ylabel='Duration', legend=True)
line2.title_text_font_size = '11pt'

line3 = Line(df, x='date', y=['python', 'pypy', 'jython'],
             dash=['python', 'pypy', 'jython'],
             color=['python', 'pypy', 'jython'],
             title="Interpreters (x='date', y, dash, color=['python', 'pypy', 'jython'])", ylabel='Duration', legend=True)
line3.title_text_font_size = '11pt'

line4 = Line(df, x='date', y=['python', 'pypy', 'jython'],
             dash='test',
             color=['python', 'pypy', 'jython'],
             title="Interpreters (x='date', y, color=['python', 'pypy', 'jython'], dash='test') with tooltips", ylabel='Duration',
             legend=True, tooltips=[('series', '@series'), ('test', '@test')])