def report(): cursor = get_hive_cursor() if cursor is None: return render_template('/main/bi_connection_issue.html') # FIXME we probably want to create aggregates on hadoop # and cache them rather than returning the whole data # set here # we need to ignore monitoring pings which have rating user_id = -1 # and movie_id = -1 try: cursor.execute( "select * from movie_ratings where customer_id <> '-1' and movie_id <> '-1'", configuration={ 'hive.mapred.supports.subdirectories': 'true', 'mapred.input.dir.recursive': 'true' }) except: return render_template('/main/bi_connection_issue.html') df = as_pandas(cursor) count = df.shape[0] if count == 0: return render_template('/main/bi_no_records.html') from bokeh.charts import Bar, output_file, show fig = Bar( df, label='movie_ratings.rating', values='movie_ratings.rating', agg='count', title='Distribution of movie ratings', legend=False ) fig.plot_height = 400 fig.xaxis.axis_label = 'Rating' fig.yaxis.axis_label = 'Count ( Rating )' js_resources = INLINE.render_js() css_resources = INLINE.render_css() script, div = components(fig) html = flask.render_template( '/main/embed.html', plot_script=script, plot_div=div, js_resources=js_resources, css_resources=css_resources, ) return encode_utf8(html)
def output_chart(issues_df, output_mode='static'): import datetime import bokeh from bokeh.models import HoverTool # Add timestamp to title issues_chart = Bar(issues_df, label='value_delivered', values='status', agg='count', stack='status', title=ISSUES_TITLE + " (Updated " + datetime.datetime.now().strftime('%m/%d/%Y') + ")", xlabel="Value Delivered", ylabel="Number of Use Cases", legend='top_right', tools='hover', color=brewer["GnBu"][3]) issues_chart.plot_width = DESTINATION_FRAME_WIDTH - (HTML_BODY_MARGIN * 2) issues_chart.plot_height = DESTINATION_FRAME_HEIGHT - (HTML_BODY_MARGIN * 2) issues_chart.logo = None issues_chart.toolbar_location = None hover = issues_chart.select(dict(type=HoverTool)) hover.tooltips = [("Value Delivered", "$x")] #--- Configure output --- reset_output() if output_mode == 'static': # Static file. CDN is most space efficient output_file(ISSUES_FILE, title=ISSUES_TITLE, autosave=False, mode='cdn', root_dir=None) # Generate file save(issues_chart, filename=ISSUES_FILE) elif output_mode == 'notebook': output_notebook() # Show inline show(issues_chart) else: # Server (using internal server IP, rather than localhost or external) session = bokeh.session.Session(root_url=BOKEH_SERVER_IP, load_from_config=False) output_server("ddod_chart", session=session) show(issues_chart)
def plot_average_dts(df, hours): lasti = 0 tmp = 0 tmplist = [] ### Currently using a fixed method timeset, if there is a better way to do a log loop this could be changed. ### lasti = 0 tmp = 0 tmplist = [] xlist = [] for i in range(1,10): tmp = 0 for item in df['noao_time']: if item > lasti and item < i: tmp = tmp + 1 tmplist.append(tmp) xlist.append(i) lasti = i for i in range(10,100,10): tmp = 0 for item in df['noao_time']: if item > lasti and item < i: tmp = tmp + 1 tmplist.append(tmp) xlist.append(i) lasti = i for i in range(100,1000,100): tmp = 0 for item in df['noao_time']: if item > lasti and item < i: tmp = tmp + 1 tmplist.append(tmp) xlist.append(i) lasti = i graph_info = {} graph_info['values'] = tmplist graph_info['Time in minutes'] = xlist p = Bar(graph_info, values='values', label='Time in minutes', ylabel='Number of transfers', color='navy', title='DTS time plot') p.plot_height = 500 p.plot_width = 1000 return p
def output_chart(issues_df,output_mode='static'): import datetime import bokeh from bokeh.models import HoverTool # Add timestamp to title issues_chart = Bar(issues_df, label='value_delivered', values='status', agg='count', stack='status', title=ISSUES_TITLE+" (Updated "+datetime.datetime.now().strftime('%m/%d/%Y')+")", xlabel="Value Delivered",ylabel="Number of Use Cases", legend='top_right', tools='hover', color=brewer["GnBu"][3] ) issues_chart.plot_width = DESTINATION_FRAME_WIDTH - (HTML_BODY_MARGIN * 2) issues_chart.plot_height = DESTINATION_FRAME_HEIGHT - (HTML_BODY_MARGIN * 2) issues_chart.logo = None issues_chart.toolbar_location = None hover = issues_chart.select(dict(type=HoverTool)) hover.tooltips = [ ("Value Delivered", "$x")] #--- Configure output --- reset_output() if output_mode == 'static': # Static file. CDN is most space efficient output_file(ISSUES_FILE, title=ISSUES_TITLE, autosave=False, mode='cdn', root_dir=None ) # Generate file save(issues_chart,filename=ISSUES_FILE) elif output_mode == 'notebook': output_notebook() # Show inline show(issues_chart) else: # Server (using internal server IP, rather than localhost or external) session = bokeh.session.Session(root_url = BOKEH_SERVER_IP, load_from_config=False) output_server("ddod_chart", session=session) show(issues_chart)
def generate_single_bar(df, time_str, color_dict, colors): df = df[df.percent_total >= 3] title = "Failure Rate " + time_str fill = [color_dict[model_key] if model_key in color_dict else 'grey' for model_key in df.model] source = ColumnDataSource(dict(color=[c for c in df['color']], model=[m for m in df['model']], failure_rate=[f for f in df['failure_rate']], count=[co for co in df['count']])) plot = Bar(df, 'model', values='failure_rate', title=title, source=source, tools=['hover'],color='color', legend=None) # outline_line_color="color", border_fill_color='color', hover = plot.select(dict(type=HoverTool)) hover.tooltips = [ ("Model ", "@model"), ("Failure rate ", "@y"), ("Number of drives", "@count") #("Time ", "@timeline"), ] hover.mode = 'mouse' plot.xaxis.axis_label = 'Model Serial Number' plot.yaxis.axis_label = 'Naive Failure Rate' plot.title_text_font_size="18px" plot.grid.grid_line_alpha = 0 plot.ygrid.grid_line_color = None plot.toolbar.logo = None plot.outline_line_width = 0 plot.outline_line_color = "white" plot.plot_height = 600 plot.plot_width = 800 plot.xaxis.major_tick_line_color = None plot.yaxis.major_tick_line_color = None plot.xaxis.axis_line_width = 2 plot.yaxis.axis_line_width = 2 plot.title.text_font_size = '16pt' plot.xaxis.axis_label_text_font_size = "14pt" plot.xaxis.major_label_text_font_size = "14pt" plot.yaxis.axis_label_text_font_size = "14pt" plot.yaxis.major_label_text_font_size = "14pt" return(plot)
def summarize_plans(state, age, npi): todaysdate = str(datetime.now()) age = str(age) if age > '65': age = '65' elif age < '20': age = '20' # filter based on what plans are current (not expired) and age filteredplans = pd.read_hdf( 'webapp/data/plan-rates.h5', state, where=[ '(Age==age) & (RateExpirationDate > todaysdate) & (RateEffectiveDate < todaysdate)' ], columns=[ 'IndividualRate', 'MetalLevel', 'Age', 'URLForSummaryofBenefitsCoverage', 'PlanMarketingName' ]) stateave = filteredplans.IndividualRate.mean() myave = stateave # plot it #filteredplans.groupby('MetalLevel').IndividualRate.mean().plot(kind='bar') #statebardf = filteredplans.groupby('MetalLevel').IndividualRate.mean() statebardf = filteredplans.groupby('MetalLevel', as_index=False).mean() #p = mpl.to_bokeh() if npi == '': p = Bar(filteredplans.groupby('MetalLevel').IndividualRate.mean(), values='IndividualRate', xlabel="", ylabel="Montly Premium ($)") else: print 'input npi is ' + npi # read hdf5 of provider-plan pairings provnplanpd = pd.read_hdf('webapp/data/plan_providers.h5', state) # make a list of planids for the input npi planlst = provnplanpd[provnplanpd.npi == npi].plan_id.values print planlst # read in hdf5 of plan info planinfo = pd.read_hdf( 'webapp/data/plan-rates.h5', state, where=[ '(Age==age) & (RateExpirationDate > todaysdate) & (RateEffectiveDate < todaysdate)' ]) # for each planid, get the first entry and concatenate them together. filteredplans = planinfo[planinfo.PlanId == planlst[0]].groupby( 'PlanId', as_index=False).first() filteredplans.head() #pd.concat([filteredplans, filteredplans]) for ii in range(1, len(planlst)): filteredplans = pd.concat([ filteredplans, planinfo[planinfo.PlanId == planlst[ii]].groupby( 'PlanId', as_index=False).first() ]) provbardf = filteredplans.groupby('MetalLevel', as_index=False).mean() provbardf['average'] = ['provider rates'] * len( provbardf.MetalLevel.values) statebardf['average'] = ['state average'] * len( provbardf.MetalLevel.values) myave = statebardf.IndividualRate.mean() #provbardf['average'] = [1, 1] #statebardf['average'] = [2, 2] #plotdf = pd.merge(statebardf, provbardf, on='MetalLevel') plotdf = pd.concat([provbardf, statebardf]) #plotdf['state average'] = statebardf['IndividualRate'] #plotdf['provider rates'] = provbardf['IndividualRate'] print plotdf p = Bar(plotdf, label='MetalLevel', values='IndividualRate', group='average', legend='top_right', xlabel="", ylabel="Montly Premium ($)") p.logo = None p.plot_height = 400 p.toolbar_location = None script, div = components(p) print filteredplans.to_dict(orient='records') return { 'num_plans': len(filteredplans), 'script': script, 'plot_div': div, 'national_comp': format_price_comp(float(age2nationalAverage(age)) - myave), 'state_comp': format_price_comp(float(stateave) - myave), 'plans': render_template('plans.html', plans=filteredplans.sort_values( by='IndividualRate').to_dict(orient='records')) }
def summarize_plans(state, age, npi): todaysdate = str(datetime.now()) age = str(age) if age > '65': age = '65' elif age < '20': age = '20' # filter based on what plans are current (not expired) and age filteredplans = pd.read_hdf('webapp/data/plan-rates.h5', state, where=['(Age==age) & (RateExpirationDate > todaysdate) & (RateEffectiveDate < todaysdate)'], columns = ['IndividualRate', 'MetalLevel', 'Age','URLForSummaryofBenefitsCoverage','PlanMarketingName']) stateave = filteredplans.IndividualRate.mean() myave = stateave # plot it #filteredplans.groupby('MetalLevel').IndividualRate.mean().plot(kind='bar') #statebardf = filteredplans.groupby('MetalLevel').IndividualRate.mean() statebardf = filteredplans.groupby('MetalLevel', as_index=False).mean() #p = mpl.to_bokeh() if npi == '': p = Bar(filteredplans.groupby('MetalLevel').IndividualRate.mean(), values='IndividualRate', xlabel="", ylabel="Montly Premium ($)") else: print 'input npi is ' + npi # read hdf5 of provider-plan pairings provnplanpd = pd.read_hdf('webapp/data/plan_providers.h5', state) # make a list of planids for the input npi planlst = provnplanpd[provnplanpd.npi == npi].plan_id.values print planlst # read in hdf5 of plan info planinfo = pd.read_hdf('webapp/data/plan-rates.h5', state, where=['(Age==age) & (RateExpirationDate > todaysdate) & (RateEffectiveDate < todaysdate)']) # for each planid, get the first entry and concatenate them together. filteredplans = planinfo[planinfo.PlanId==planlst[0]].groupby('PlanId', as_index=False).first() filteredplans.head() #pd.concat([filteredplans, filteredplans]) for ii in range(1, len(planlst)): filteredplans = pd.concat([filteredplans, planinfo[planinfo.PlanId==planlst[ii]].groupby('PlanId', as_index=False).first()]) provbardf = filteredplans.groupby('MetalLevel', as_index=False).mean() provbardf['average'] = ['provider rates']*len(provbardf.MetalLevel.values) statebardf['average'] = ['state average']*len(provbardf.MetalLevel.values) myave = statebardf.IndividualRate.mean() #provbardf['average'] = [1, 1] #statebardf['average'] = [2, 2] #plotdf = pd.merge(statebardf, provbardf, on='MetalLevel') plotdf = pd.concat([provbardf, statebardf]) #plotdf['state average'] = statebardf['IndividualRate'] #plotdf['provider rates'] = provbardf['IndividualRate'] print plotdf p = Bar(plotdf, label='MetalLevel', values='IndividualRate', group='average', legend='top_right', xlabel="", ylabel="Montly Premium ($)") p.logo = None p.plot_height=400 p.toolbar_location = None script,div = components(p) print filteredplans.to_dict(orient='records') return {'num_plans':len(filteredplans), 'script': script, 'plot_div': div, 'national_comp': format_price_comp(float(age2nationalAverage(age))-myave), 'state_comp': format_price_comp(float(stateave)-myave), 'plans': render_template('plans.html', plans=filteredplans.sort_values(by='IndividualRate').to_dict(orient='records'))}