def analyze(self): plots = False h5_filename = self.output_filename + '.h5' with tb.open_file(h5_filename, 'r+') as in_file_h5: raw_data = in_file_h5.root.raw_data[:] meta_data = in_file_h5.root.meta_data[:] hit_data = self.dut.interpret_raw_data(raw_data, meta_data) in_file_h5.createTable(in_file_h5.root, 'hit_data', hit_data, filters=self.filter_tables) hits = hit_data['col'].astype(np.uint16) hits = hits * 64 hits = hits + hit_data['row'] value = np.bincount(hits) value = np.pad(value, (0, 64*64 - value.shape[0]), 'constant') full_occupation = np.full(4096, 100, dtype=int) difference = full_occupation - value tot_diff = abs(np.sum(difference)) if tot_diff<10000: plots=True; self.not_fired.append(tot_diff) logging.info('Shmoo plot entry: %s', str(tot_diff)) if plots == True: occ_plot, H = plotting.plot_occupancy(h5_filename) tot_plot, _ = plotting.plot_tot_dist(h5_filename) lv1id_plot, _ = plotting.plot_lv1id_dist(h5_filename) output_file(self.output_filename + '.html', title=self.run_name) save(vplot(occ_plot, tot_plot, lv1id_plot)) return H
def single_bokeh_graph(args, marked_fn, unmarked_corpus, token='compare', bin_count=400): print(marked_fn) unmarked_fn = os.path.basename(marked_fn) unmarked_text = clean_and_read_text(args.unmarked_corpus_folder + '/' + unmarked_fn) text = clean_and_read_text(marked_fn) if token == 'compare': # assumes that you've passed a True, so you're # trying to graph comparatively. locations, quote_n, bins = find_bin_counts( find_quote_characters(unmarked_text), bin_count) _, caret_n, _ = find_bin_counts(find_carets(text), bin_count) n = quote_n - caret_n elif token == 'caret': locations, n, bins = find_bin_counts(find_carets(text), bin_count) else: locations, n, bins = find_bin_counts( find_quoted_quotes(unmarked_text), bin_count) d_frame = pd.DataFrame(n, columns=['count']) output_file('bokeh_graphs/' + re.sub(r'\.txt', '', os.path.basename(marked_fn)) + '.html') p = Bar(d_frame, legend=False, plot_width=1200) p.xaxis.visible = False p.xgrid.visible = False save(p)
def graph_feature_importance(olddf, model): """ Output: Plot of Feature Importance """ lab_feats = sorted(zip(olddf.columns, model.feature_importances_), key=lambda x : x[1])[::-1] total,cnt = 0,0 for n,v in lab_feats: total+=v if total<=.98: cnt+=1 print cnt,n,v graph_feats = lab_feats[:cnt] col_names = [] feat_vals = [] for name, val in graph_feats: col_names.append(name) feat_vals.append(val) df = pd.concat([pd.DataFrame(col_names, columns=['columns']), pd.DataFrame(feat_vals, columns=['featimpt'])], axis=1).sort_values(by='featimpt', ascending=False) print df p = Bar(df, 'columns', values='featimpt', title="Feature Importance From Model") output_file("templates/feat_impt.html", title="Feature Importance From Model") save(p)
def analyze(self): plots = False h5_filename = self.output_filename + '.h5' with tb.open_file(h5_filename, 'r+') as in_file_h5: raw_data = in_file_h5.root.raw_data[:] meta_data = in_file_h5.root.meta_data[:] hit_data = self.dut.interpret_raw_data(raw_data, meta_data) in_file_h5.createTable(in_file_h5.root, 'hit_data', hit_data, filters=self.filter_tables) hits = hit_data['col'].astype(np.uint16) hits = hits * 64 hits = hits + hit_data['row'] value = np.bincount(hits) value = np.pad(value, (0, 64*64 - value.shape[0]), 'constant') full_occupation = np.full(4096, 100, dtype=int) difference = full_occupation - value tot_diff = abs(np.sum(difference)) if tot_diff<400000: plots=True self.not_fired.append(tot_diff) logging.info('Shmoo plot entry: %s', str(tot_diff)) if plots == True: occ_plot, H = plotting.plot_occupancy(h5_filename) tot_plot, _ = plotting.plot_tot_dist(h5_filename) lv1id_plot, _ = plotting.plot_lv1id_dist(h5_filename) output_file(self.output_filename + '.html', title=self.run_name) save(vplot(occ_plot, tot_plot, lv1id_plot)) return H
def histogram_exptime(): """ Create a histogram showing the distribution of exposure times for the composite lightcurves """ logging.info('Creating exposure time historgram') data_dir = get_settings()['composite_dir'] exptime_data = {} for dataset in glob.glob(os.path.join(data_dir, '*.fits')): with fits.open(dataset) as hdu: targname = os.path.split(dataset)[-1].split('_')[4] exptime = hdu[0].header['EXPTIME'] if targname in exptime_data: exptime_data[targname] += exptime else: exptime_data[targname] = exptime charts.output_file(os.path.join(get_settings()['plot_dir'], 'exptime_histogram.html')) times = np.array(exptime_data.values()) names = np.array(exptime_data.keys()) indx = np.argsort(times) times = list(times[indx])[::-1][:30] names = list(names[indx])[::-1][:30] bar = charts.Bar(times, label=names, xlabel='Target', ylabel='Exptime (s)', width=700, height=400, title='Cumulative Exptime per Target') bar.background_fill = '#cccccc' bar.outline_line_color = 'black' # change just some things about the x-grid #bar.xgrid.grid_line_color = None # change just some things about the y-grid #bar.ygrid.band_fill_alpha = 0.1 #bar.ygrid.band_fill_color = "navy" #bar.toolbar_location = None #charts.show(bar) script, div = components(bar) plot_file = os.path.join(get_settings()['plot_dir'], 'exptime_histogram.html') charts.save(obj=bar, filename=plot_file) set_permissions(plot_file)
def _chart(self, chartcls, **kwargs): opts = dict(width=1000, height=500, legend='bottom_left') show = kwargs.pop('show', True) opts.update(self.kwargs()) opts.update(kwargs) p = chartcls(self.frame, **opts) if show: charts.show(p) else: charts.save(p) return p
def dataset_dashboard(filename, plot_file=''): """ Creates interactive bokeh 'dashboard' plot for the given filename Parameters ---------- filename : str The path to the lightcurve plot_file : str The path to the PNG plot. The user can supply this argument if they wish to update the plot or save to a specific location. """ logging.info('Creating bokeh dashboard plots for {}'.format(filename)) if not plot_file: plot_file = filename.replace('.fits', '.html') if os.path.exists(plot_file): os.remove(plot_file) bokeh.io.output_file(plot_file) TOOLS = 'pan,wheel_zoom,box_zoom,reset,resize,box_select,lasso_select,save' with fits.open(filename) as hdu: source = bokeh.models.ColumnDataSource(data={col : hdu[1].data[col] for col in hdu[1].data.names}) endless_colors = itertools.cycle(palettes.Spectral6) colors = [endless_colors.next() for i in np.unique(hdu[1].data['dataset'])] dset_counts = Counter(hdu[1].data['dataset']) repeats = [dset_counts[key] for key in sorted(dset_counts.keys())] colors = np.repeat(colors, repeats) axes = [] for key in ['gross', 'net', 'flux', 'error', 'background']: if len(axes) == 0: axes.append(figure(tools=TOOLS, plot_width=900, plot_height=350, title=key, toolbar_location='right')) else: axes.append(figure(tools=TOOLS, x_range=axes[0].x_range, plot_width=900, plot_height=350, title=key, toolbar_location='right')) axes[-1].circle(hdu[1].data['mjd'], hdu[1].data[key], size=12, color=colors, fill_alpha=1) # put all the plots in a grid layout p = bokeh.io.vplot(*axes) charts.save(obj=p, filename=plot_file) charts.reset_output() del p set_permissions(plot_file)
def export(): nonlocal data import bokeh.charts as bch from bokeh.layouts import column # from bokeh.models import HoverTool, GlyphRenderer bch.output_file("Chart.html") data = data.iloc[1:, :] # TOOLS = "pan, wheel_zoom, box_zoom, crosshair, resize, reset "# , hover" title = "History (total result: {0:.2f} €)".format(result) if bonus > 0: title = title[:-1] + ", excluding Bonus: {0:.2f} €)".format(bonus) cols = ["Paid-in", "Balance", "Normalised", "Total Result"] if bonus > 0: cols = ["Paid-in", "Balance", "Normalised", "Total Result incl Bonus", "Total Result"] tsline = bch.TimeSeries(data, x="Time", y=cols, title=title, # tools=TOOLS, ylabel='Euro', legend=True, width=1250, height=550) """ from bokeh.models import HoverTool hover = HoverTool( tooltips=[ ("index", "$index"), ("(x,y)", "($x, $y)"), ("desc", "$balance"), # ("test", data.iloc["$index", 4]) ] ) tsline.add_tools(hover) """ if open_browser: bch.show(column(tsline)) else: bch.save(column(tsline)) import matplotlib.pyplot as plt import matplotlib matplotlib.style.use('ggplot') data.plot(x="Time", y=cols) plt.savefig("Chart.pdf")
def generate(): # Obtain both the matches and item JSONS matches = getItems(suffix='matches', apiKey=apiKey) items = getItems(suffix='items', apiKey=apiKey) # Convert both to dataframe match_df = pd.DataFrame(matches) item_df = pd.DataFrame(items) # Filter to only the relevant columns item_df = item_df[['_id', 'bought', 'minPrice']] item_df = item_df.rename(columns={'bought': 'bought_item'}) match_df = match_df[['itemID', 'bought', 'matchedPrice', 'dateBought']] match_df['dateBought'] = pd.to_datetime(match_df['dateBought'], yearfirst=True, exact=False, format='%y-%m-%d') match_df = match_df.rename(columns={'bought': 'bought_match'}) # Merge DFs on item ids matched_items = match_df.merge(item_df, how='left', left_on='itemID', right_on='_id') matched_items = matched_items.loc[matched_items['bought_match'] == 1] matched_items['Profit'] = (matched_items['matchedPrice'] - matched_items['minPrice']) matched_items = matched_items.reset_index(drop=True) output_file('templates/sales-data.html') source = ColumnDataSource(matched_items) columns = [ TableColumn(field="dateBought", title="Date", formatter=DateFormatter(format='dd/mm/yy')), TableColumn(field="matchedPrice", title="Matched Price"), TableColumn(field="minPrice", title="Posted Price"), TableColumn(field="Profit", title="Profit") ] data_table = DataTable(source=source, columns=columns, width=600, height=500) save(widgetbox(data_table)) #t = app.jinja_environment.get_template(name='sales-data.html') #return(t) return (render_template('sales-data.html'))
def do_output(p, mode, output_path=''): if mode == 'object': return p elif mode == 'embed': script, div = components(p) return [script, div] else: output_file(output_path) if mode == 'show': show(p) elif mode == 'save': save(p) return True
def generate_histogram(table_data, form_data): """Generate histogram.""" if form_data['color'] == '': color = 'blue' else: color = form_data['color'] plot = Histogram(table_data, values=form_data['column'], color=color, tools='pan,wheel_zoom,box_zoom,reset,resize,hover,save') plot.title.text_font_style = "bold" output_file("output.html") save(plot) return build_html()
def static_bar_plot(plug): han2 = prepare_han2(plug) start_time = str(han2.head(1).READING_DATETIME.values[0])[:10] end_time = str(han2.tail(1).READING_DATETIME.values[0])[:10] # customer = han2.CUSTOMER_ID.unique()[0] customer = '10082576' # FIXME plug = han2.PLUG_NAME.unique()[0] output_file("templates/plot.html") p = Bar(han2, 'HOUR', values='READING_DELTA', title="Total Energy Used from %s to %s" % (start_time, end_time), ylabel="kWh Consumed, %s at site %s" % (plug, customer), xlabel='Hour in Day (5 means 5:00am to 5:59am)') save(p)
def output_chart(issues_df, output_mode='static'): import datetime import bokeh from bokeh.models import HoverTool # Add timestamp to title issues_chart = Bar(issues_df, label='value_delivered', values='status', agg='count', stack='status', title=ISSUES_TITLE + " (Updated " + datetime.datetime.now().strftime('%m/%d/%Y') + ")", xlabel="Value Delivered", ylabel="Number of Use Cases", legend='top_right', tools='hover', color=brewer["GnBu"][3]) issues_chart.plot_width = DESTINATION_FRAME_WIDTH - (HTML_BODY_MARGIN * 2) issues_chart.plot_height = DESTINATION_FRAME_HEIGHT - (HTML_BODY_MARGIN * 2) issues_chart.logo = None issues_chart.toolbar_location = None hover = issues_chart.select(dict(type=HoverTool)) hover.tooltips = [("Value Delivered", "$x")] #--- Configure output --- reset_output() if output_mode == 'static': # Static file. CDN is most space efficient output_file(ISSUES_FILE, title=ISSUES_TITLE, autosave=False, mode='cdn', root_dir=None) # Generate file save(issues_chart, filename=ISSUES_FILE) elif output_mode == 'notebook': output_notebook() # Show inline show(issues_chart) else: # Server (using internal server IP, rather than localhost or external) session = bokeh.session.Session(root_url=BOKEH_SERVER_IP, load_from_config=False) output_server("ddod_chart", session=session) show(issues_chart)
def generate_bar(table_data, form_data): """Generate Bar plot.""" if form_data['group'] == '': form_data['group'] = False plot = Bar(table_data, label=form_data['label'], values=form_data['values'], agg=form_data['agg'], title=form_data['label'] + ' vs ' + form_data['values'], group=form_data['group'], tools='pan,wheel_zoom,box_zoom,reset,resize,hover,save') plot.title.text_font_style = "bold" output_file("output.html") save(plot) return build_html()
def generate_scatter(table_data, form_data): """Generate scatter plot.""" if form_data['marker'] == '': form_data['marker'] = None if form_data['color'] == '': form_data['color'] = None plot = Scatter(table_data, x=form_data['x'], y=form_data['y'], title=form_data['x'] + ' vs ' + form_data['y'], color=form_data['color'], marker=form_data['marker'], tools='pan,wheel_zoom,box_zoom,reset,resize,hover,save') plot.title.text_font_style = "bold" output_file("output.html") save(plot) return build_html()
def bar_opt_elem(): """ Create a bar chart showing the number of composite lightcurves for each COS & STIS optical element """ logging.info('Creating optical element bar chart') # Query for data query = session.query(Metadata.instrume, Metadata.opt_elem).all() instrumes = [result[0] for result in query] opt_elems = [result[1] for result in query] opt_elems_set = sorted(list(set(opt_elems))) # Initialize dictionaries that store all optical elements cos_dict, stis_dict = {}, {} for opt_elem in opt_elems_set: cos_dict[opt_elem] = 0 stis_dict[opt_elem] = 0 # Count number of opt_elems for instrument, opt_elem in zip(instrumes, opt_elems): if instrument == 'COS': cos_dict[opt_elem] += 1 elif instrument == 'STIS': stis_dict[opt_elem] += 1 # Determine plotting values cat = list(opt_elems_set) xyvalues = OrderedDict() xyvalues['COS'] = [cos_dict[opt_elem] for opt_elem in opt_elems_set] xyvalues['STIS'] = [stis_dict[opt_elem] for opt_elem in opt_elems_set] # Make plots bar = charts.Bar(xyvalues, label=cat, xlabel='Optical Element', ylabel='# of Lightcurves', stacked=True, legend = 'top_right') bar.background_fill = '#cccccc' bar.outline_line_color = 'black' charts.output_file(os.path.join(get_settings()['plot_dir'], 'opt_elem.html')) plot_file = os.path.join(get_settings()['plot_dir'], 'opt_elem.html') charts.save(obj=bar, filename=plot_file) set_permissions(plot_file)
def prepare_and_draw_matrix(dh_mat2, heading_list, disc_list, outfile): totals = {} for d in disc_list: total = 0 for h in heading_list: if( dh_mat2[h].get(d, None) is not None): total += dh_mat2[h][d] totals[d] = total section = [] d_type = [] percent = [] for d in disc_list: for h in heading_list: if( dh_mat2[h].get(d, None) is None): section.append(h) d_type.append(d) percent.append(0.0) else : section.append(h) d_type.append(d) percent.append(100.0 * dh_mat2[h][d] / totals[d]) data = {'Section': section, 'Discourse Type': d_type, 'Percentage': percent } color = ColorAttr(bin=True, palette=gray(6), sort=True, ascending=False) hm = HeatMap(data, x='Discourse Type', y='Section', values='Percentage', stat=None, plot_height=260, legend=False, color=color) output_file(outfile+'1.html', mode='cdn', root_dir=None) save(hm) hm1 = HeatMap(data, x='Discourse Type', y='Section', values='Percentage', stat=None, plot_height=260, legend=True, color=color) output_file(outfile+'2.html', mode='cdn', root_dir=None) save(hm1) '''
def analyze(self): h5_filename = self.output_filename +'.h5' with tb.open_file(h5_filename, 'r+') as in_file_h5: raw_data = in_file_h5.root.raw_data[:] meta_data = in_file_h5.root.meta_data[:] hit_data = self.dut.interpret_raw_data(raw_data, meta_data) in_file_h5.createTable(in_file_h5.root, 'hit_data', hit_data, filters=self.filter_tables) occ_plot, H = plotting.plot_occupancy(h5_filename) tot_plot,_ = plotting.plot_tot_dist(h5_filename) lv1id_plot, _ = plotting.plot_lv1id_dist(h5_filename) output_file(self.output_filename + '.html', title=self.run_name) save(vplot(occ_plot, tot_plot, lv1id_plot)) return H
def analyze(self): h5_filename = self.output_filename +'.h5' with tb.open_file(h5_filename, 'r+') as in_file_h5: raw_data = in_file_h5.root.raw_data[:] meta_data = in_file_h5.root.meta_data[:] hit_data = self.dut.interpret_raw_data(raw_data, meta_data) in_file_h5.createTable(in_file_h5.root, 'hit_data', hit_data, filters=self.filter_tables) status_plot = plotting.plot_status(h5_filename) occ_plot, H = plotting.plot_occupancy(h5_filename) tot_plot,_ = plotting.plot_tot_dist(h5_filename) lv1id_plot, _ = plotting.plot_lv1id_dist(h5_filename) scan_pix_hist, _ = plotting.scan_pix_hist(h5_filename) output_file(self.output_filename + '.html', title=self.run_name) save(vplot(hplot(occ_plot, tot_plot, lv1id_plot), scan_pix_hist, status_plot))
def output_chart(issues_df,output_mode='static'): import datetime import bokeh from bokeh.models import HoverTool # Add timestamp to title issues_chart = Bar(issues_df, label='value_delivered', values='status', agg='count', stack='status', title=ISSUES_TITLE+" (Updated "+datetime.datetime.now().strftime('%m/%d/%Y')+")", xlabel="Value Delivered",ylabel="Number of Use Cases", legend='top_right', tools='hover', color=brewer["GnBu"][3] ) issues_chart.plot_width = DESTINATION_FRAME_WIDTH - (HTML_BODY_MARGIN * 2) issues_chart.plot_height = DESTINATION_FRAME_HEIGHT - (HTML_BODY_MARGIN * 2) issues_chart.logo = None issues_chart.toolbar_location = None hover = issues_chart.select(dict(type=HoverTool)) hover.tooltips = [ ("Value Delivered", "$x")] #--- Configure output --- reset_output() if output_mode == 'static': # Static file. CDN is most space efficient output_file(ISSUES_FILE, title=ISSUES_TITLE, autosave=False, mode='cdn', root_dir=None ) # Generate file save(issues_chart,filename=ISSUES_FILE) elif output_mode == 'notebook': output_notebook() # Show inline show(issues_chart) else: # Server (using internal server IP, rather than localhost or external) session = bokeh.session.Session(root_url = BOKEH_SERVER_IP, load_from_config=False) output_server("ddod_chart", session=session) show(issues_chart)
def stock_graph(ticker): quandl_apikey = "mWZdFiBfbyUzVK2xeec6" quandl_header = "https://www.quandl.com/api/v3/datasets/WIKI" today = datetime.date.today() lastmonth = today - datetime.timedelta(days=31) # not so accurate... dateformat = "%Y-%m-%d" end_date = today.strftime(dateformat) start_date = lastmonth.strftime(dateformat) stock = ticker qurl='https://www.quandl.com/api/v3/datasets/WIKI/%s.json?column_index=4&'\ 'start_date=%s&end_date=%s&api_key=mWZdFiBfbyUzVK2xeec6'%(stock, start_date, end_date) r = requests.get(qurl) if r.status_code != requests.codes.ok: return 0 print r.url print r.json() print(json.dumps(r.json(), sort_keys=True, indent=4 * ' ')) #print json.loads(r.json()) tsdata = r.json()['dataset']['data'] print json.dumps(tsdata) # format data as pandas' time series data s = pd.Series() for price in tsdata: s.set_value(pd.Timestamp(price[0]), price[1]) print s.is_time_series print s graph_title = "30 Day Stock Price of " + stock.encode('ascii', 'ignore') tsline = TimeSeries(s, title=graph_title, ylabel='Stock Prices', xlabel='Date', legend=True) # output_file("timeseries.html", autosave=True) save(obj=tsline, filename='templates/timeseries.html') # save(column(tsline)) return 1
def analyze(self): h5_filename = self.output_filename +'.h5' with tb.open_file(h5_filename, 'r+') as in_file_h5: raw_data = in_file_h5.root.raw_data[:] meta_data = in_file_h5.root.meta_data[:] hit_data = self.dut.interpret_raw_data(raw_data, meta_data) in_file_h5.createTable(in_file_h5.root, 'hit_data', hit_data, filters=self.filter_tables) analysis.analyze_threshold_scan(h5_filename) status_plot = plotting.plot_status(h5_filename) occ_plot, H = plotting.plot_occupancy(h5_filename) tot_plot,_ = plotting.plot_tot_dist(h5_filename) lv1id_plot, _ = plotting.plot_lv1id_dist(h5_filename) scan_pix_hist, _ = plotting.scan_pix_hist(h5_filename) t_dac = plotting.t_dac_plot(h5_filename) output_file(self.output_filename + '.html', title=self.run_name) save(vplot(hplot(occ_plot, tot_plot, lv1id_plot), scan_pix_hist, t_dac, status_plot))
def plot_dataset(filename, plot_file=''): """ Create an interactive bokeh lightcurve plot for the given filename Parameters ---------- filename : str The path to the lightcurve plot_file : str The path to the PNG plot. The user can supply this argument if they wish to update the plot or save to a specific location """ logging.info('Creating bokeh lightcurve for {}'.format(filename)) path, name = os.path.split(filename) if not plot_file: plot_file = name.replace('.fits', '.html') if os.path.exists(plot_file): os.remove(plot_file) TOOLS = 'pan,wheel_zoom,box_zoom,box_select,lasso_select,reset,resize,save' charts.output_file(plot_file) p = figure(tools=TOOLS, toolbar_location='above', logo='grey', plot_width=700) p.background_fill= "#cccccc" with fits.open(filename) as hdu: p.circle(hdu[1].data['MJD'], hdu[1].data['FLUX'], size=12, line_color="black", fill_alpha=0.8) p.xaxis.axis_label='Time (MJD)' p.yaxis.axis_label='Net (cnts/sec)' p.grid.grid_line_color='white' charts.save(obj=p, filename=plot_file) charts.reset_output() del p set_permissions(plot_file)
def prediction_scatter_plot(result_df, show_plot, save_plot, title="", file_name=None): p = figure(plot_width=600, plot_height=600, title=title) p.xaxis.axis_label = "real" p.yaxis.axis_label = "predict" p.x_range = Range1d(-4, 4) p.y_range = Range1d(-4, 4) p.circle(result_df['real'], result_df['prediction'], size=3, alpha=0.2) if save_plot: output_file(file_name) save(p) if show_plot: show(p)
def main(self, stats_days, stats_weeks, stats_remaining): days_chart = self.build_velocity_days(stats_days) weeks_chart = self.build_velocity_weeks(stats_weeks) remaining_types = self.chart_remaining_types(stats_remaining) remaining_assignees = self.chart_remaining_assignees(stats_remaining) remaining_days = self.chart_remaining_days(stats_remaining) bokeh_layout = layout([ [days_chart] , [weeks_chart] , [remaining_types, remaining_assignees, remaining_days] ], sizing_mode='stretch_both') # output to static HTML file output_file(self.config.filepath_charts + 'index.html', title='[' + self.time.get_current_date().strftime( "%Y-%m-%d") + '] - Jira Metrics, built on: ' + self.time.get_current_date().strftime( "%Y-%m-%d")) save(bokeh_layout) show(bokeh_layout)
def plot_opt(name): with open('{}.txt'.format(name), 'r') as f: data = f.read() N = int(re.findall('\[I(\d*) \d*\]\n.* .* .*', data)[0]) B = int(re.findall('\[I\d* (\d*)\]\n.* .* .*', data)[0]) TP = ['with', 'without'] T1 = [float(x) for x in re.findall('\[I\d* \d*\]\n(.*) .* .*', data)] T2 = [float(x) for x in re.findall('\[I\d* \d*\]\n.* (.*) .*', data)] T3 = [float(x) for x in re.findall('\[I\d* \d*\]\n.* .* (.*)', data)] T1 += [ float(x) for x in re.findall('\[I\d* \d*\]\n.* .* .*\n(.*) .* .*', data) ] T2 += [ float(x) for x in re.findall('\[I\d* \d*\]\n.* .* .*\n.* (.*) .*', data) ] T3 += [ float(x) for x in re.findall('\[I\d* \d*\]\n.* .* .*\n.* .* (.*)', data) ] df = pd.DataFrame({ 'type': TP, 'computation': T1, 'communication': T2, 'memcpy': T3 }) bar = Bar(df, label=['type'], values=blend('computation', 'communication', 'memcpy', name='times', labels_name='time'), xlabel='comparison', ylabel='total time(s)', stack=cat(columns=['time'], sort=False), title='{}'.format(name)) output_file('{}.html'.format(name), title='N={}, B={}'.format(N, B)) save(bar)
def graph_feature_importance(olddf, model): """ Output: Plot of Feature Importance """ lab_feats = sorted(zip(olddf.columns, model.feature_importances_), key=lambda x: x[1])[::-1] total, cnt = 0, 0 for n, v in lab_feats: total += v if total <= .98: cnt += 1 print cnt, n, v graph_feats = lab_feats[:cnt] col_names = [] feat_vals = [] for name, val in graph_feats: col_names.append(name) feat_vals.append(val) df = pd.concat([ pd.DataFrame(col_names, columns=['columns']), pd.DataFrame(feat_vals, columns=['featimpt']) ], axis=1).sort_values(by='featimpt', ascending=False) print df p = Bar(df, 'columns', values='featimpt', title="Feature Importance From Model") output_file("templates/feat_impt.html", title="Feature Importance From Model") save(p)
def plot_weak(name): with open('{}.tot'.format(name), 'r') as f: data = f.read() Ns = [int(x) for x in re.findall('\[I(\d*) \d*\]\n.* .* .*', data)] Bs = [int(x) for x in re.findall('\[I\d* (\d*)\]\n.* .* .*', data)] T1s = [float(x) for x in re.findall('\[I\d* \d*\]\n(.*) .* .*', data)] T2s = [float(x) for x in re.findall('\[I\d* \d*\]\n.* (.*) .*', data)] T3s = [float(x) for x in re.findall('\[I\d* \d*\]\n.* .* (.*)', data)] """ if 'cuda' not in name: Ns += Ns Bs += Bs T1s += [float(x) for x in re.findall('\[I\d* \d*\]\n.* .* .*\n(.*) .* .*', data)] T2s += [float(x) for x in re.findall('\[I\d* \d*\]\n.* .* .*\n.* (.*) .*', data)] T3s += [float(x) for x in re.findall('\[I\d* \d*\]\n.* .* .*\n.* .* (.*)', data)] """ df = pd.DataFrame({ 'N': Ns, 'B': Bs, 'computation': T1s, 'communication': T2s, 'memcpy': T3s }) bar = Bar(df, label=['N', 'B'], values=blend('computation', 'communication', 'memcpy', name='times', labels_name='time'), xlabel='#node', ylabel='total time(s)', stack=cat(columns=['time'], sort=False), title='Weak Scalability') output_file('weak_{}.html'.format(name)) save(bar)
def get_variable(config): from bokeh.embed import components name, rank, df, decade_df = get_result(config.gender, config.decade, config.name) if config.name <> name: message = "%s not found. Do you mean %s?" % (config.name, name) else: message ="" birth_table = df.pivot(index='Decade', columns='Name', values='Births').fillna(0).to_html() rank_table = df.pivot(index='Decade', columns='Name', values='Rank').fillna(0).to_html() result_table = df[df['Rank']==rank][['Decade','Name','Rank']].sort('Decade').to_html(index=False) top_table = decade_df[(decade_df['Rank']<=8) & (decade_df['Decade']==config.decade)].sort('Rank').to_html(index=False) from bokeh.charts import Line, save, output_file, ColumnDataSource from bokeh.resources import INLINE plot_path = "%s_%s_%i.html" % (config.name,config.gender,config.decade) output_file("output/" + plot_path, mode='inline') tooltips = [(c, '@' + c) for c in df.columns] p = Line(df, x='Decade', y='Rank', title="Rank across Time", color='Name', xlabel="Decade", ylabel="Rank", tooltips=tooltips) p.circle('Decade', 'Rank', color='gray', alpha=0.5, source=ColumnDataSource(df)) save(p) #script, div = components(p) return { 'plot_path': plot_path, 'result_table': result_table, 'rank_table': rank_table, 'birth_table': birth_table, 'top_table': top_table, 'rank': rank, 'config': config, 'name': name, 'message': message }
def create_html(report, filename, full): output_file(filename) structure = (("Tasks", [ ("Task progress", create_ctime, False), ("Task summary", create_task_summary, False), ("Task durations", create_task_durations, False) ]), ("Monitoring", [ ("CPU & Memory usage", create_monitoring, False) ]), ("Scheduling", [ ("Timeline", create_timelines, True), ("Pending tasks", create_pending_tasks, False), ("Scheduling time", create_scheduling_time, False), ("Worker load", create_worker_load, False) ]), ("Communication", [("Transfer per tasks", create_ctransfer, False), ("Transfer per nodes", create_transfer, False)])) tabs = [] for name, subtabs in structure: print("Tab:", name) tabs2 = [] for name2, fn, full_only in subtabs: if full_only and not full: print(" - {} ... SKIPPED".format(name2)) f = Div(text="""Chart is disabled. Use parameter <strong>--full</strong> to enabled this graph.""") else: print(" - {} ...".format(name2)) f = fn(report) tabs2.append(Panel(child=f, title=name2)) tabs.append(Panel(child=Tabs(tabs=tabs2), title=name)) print("Saving results ...") main = Tabs(tabs=tabs) save(main)
def tdc_table(self, scanrange): h5_filename = self.output_filename + '.h5' with tb.open_file(h5_filename, 'r+') as in_file_h5: raw_data = in_file_h5.root.raw_data[:] meta_data = in_file_h5.root.meta_data[:] if (meta_data.shape[0] == 0): return repeat_command = in_file_h5.root.meta_data.attrs.kwargs a = repeat_command.rfind("repeat_command: ") repeat_command = repeat_command[a + len("repeat_command: "):a + len("repeat_command: ") + 7] a = repeat_command.rfind("\n") repeat_command = int(repeat_command[0:a]) param, index = np.unique(meta_data['scan_param_id'], return_index=True) pxl_list = [] for p in param: pix_no = int(p) / int(len(self.inj_charge)) pxl_list.append(self.pixel_list[pix_no][0] * 64 + self.pixel_list[pix_no][1]) index = index[1:] index = np.append(index, meta_data.shape[0]) index = index - 1 stops = meta_data['index_stop'][index] split = np.split(raw_data, stops) avg_tdc = [] avg_tdc_err = [] avg_del = [] avg_del_err = [] hits = [] deletelist = () for i in range(len(split[:-1])): # loop on pulses rwa_data_param = split[i] tdc_data = rwa_data_param & 0xFFF # take last 12 bit tdc_delay = (rwa_data_param & 0x0FF00000) >> 20 counter = 0.0 TOT_sum = 0.0 DEL_sum = 0.0 if (tdc_data.shape[0] == 0 or tdc_data.shape[0] == 1): counter = 1.0 for j in range(tdc_data.shape[0]): # loop on repeats if (j > 0): counter += 1 TOT_sum += tdc_data[j] DEL_sum += tdc_delay[j] if (counter > 1): hits.append(counter) avg_tdc.append((float(TOT_sum) / float(counter)) * 1.5625) avg_tdc_err.append(1.5625 / (np.sqrt(12.0 * counter))) avg_del.append((float(DEL_sum) / float(counter)) * 1.5625) avg_del_err.append(1.5625 / (np.sqrt(12.0 * counter))) else: deletelist = np.append(deletelist, i) pxl_list = np.delete(pxl_list, deletelist) newpix = [0] pix_no_old = pxl_list[0] runparam = 0 for p in pxl_list: if p != pix_no_old: newpix = np.append(newpix, runparam) pix_no_old = p runparam = runparam + 1 addedvalues = 0 for pixels in range(len(newpix)): missingvalues = 0 if newpix[pixels] == newpix[-1]: missingvalues = scanrange - abs(newpix[pixels] + addedvalues - len(hits)) else: if abs(newpix[pixels] - newpix[pixels + 1]) < scanrange: missingvalues = scanrange - abs(newpix[pixels] - newpix[pixels + 1]) if missingvalues != 0: hits = np.insert(hits, newpix[pixels] + addedvalues, np.zeros(missingvalues)) avg_tdc = np.insert(avg_tdc, newpix[pixels] + addedvalues, np.zeros(missingvalues)) avg_tdc_err = np.insert(avg_tdc_err, newpix[pixels] + addedvalues, np.zeros(missingvalues)) avg_del = np.insert(avg_del, newpix[pixels] + addedvalues, np.zeros(missingvalues)) avg_del_err = np.insert(avg_del_err, newpix[pixels] + addedvalues, np.zeros(missingvalues)) pxl_list = np.insert(pxl_list, newpix[pixels] + addedvalues, (pxl_list[newpix[pixels] + addedvalues]) * np.ones(missingvalues)) addedvalues = addedvalues + missingvalues injections = [] for pixels in range(int(len(pxl_list) / len(self.inj_charge))): for i in range(len(self.inj_charge)): injections = np.append(injections, self.inj_charge[i]) pix, stop = np.unique(pxl_list, return_index=True) stop = np.sort(stop) stop = list(stop) stop.append(len(avg_tdc)) repeat_command_dic={} repeat_command_dic['repeat_command']=repeat_command avg_tab = np.rec.fromarrays([injections, pxl_list, hits, avg_tdc, avg_tdc_err, avg_del, avg_del_err], dtype=[('charge', float), ('pixel_no', int), ('hits', int), ('tot_ns', float), ('err_tot_ns', float), ('delay_ns', float), ('err_delay_ns', float)]) tdc_table=in_file_h5.createTable(in_file_h5.root, 'tdc_data', avg_tab, filters=self.filter_tables) tdc_table.attrs.repeat_command = repeat_command_dic thresholds = () expfit0 = () expfit1 = () expfit2 = () expfit3 = () pixels = () for i in range(len(stop) - 1): s1 = int(stop[i]) s2 = int(stop[i + 1]) A, mu, sigma = analysis.fit_scurve(hits[s1:s2], injections[s1:s2],repeat_command) if np.max(hits[s1:s2]) > (repeat_command + 200): # or mu > 3000: thresholds = np.append(thresholds, 0) expfit0 = np.append(expfit0, 0) expfit1 = np.append(expfit1, 0) expfit2 = np.append(expfit2, 0) expfit3 = np.append(expfit3, 0) pixels = np.append(pixels, pxl_list[s1]) continue for values in range(s1, s2): if injections[values] >= 5 / 4 * mu: s1 = values break numberer = 0 hitvaluesold = hits[-1] for hitvalues in hits[s1:s2]: if abs(hitvalues - hitvaluesold) <= 1 and hitvalues != 0: break numberer = numberer + 1 hitvaluesold = hitvalues if numberer == len(avg_del[s1:s2]): numberer = 0 expfit = analysis.fit_exp(injections[s1:s2], avg_del[s1:s2], mu, abs(numberer)) startexp = -expfit[0] * np.log((25.0 + np.min(avg_del[s1:s2]) - expfit[3]) / expfit[2]) - expfit[1] if np.isnan(startexp) or startexp >= 2000: startexp = 0 thresholds = np.append(thresholds, startexp) expfit0 = np.append(expfit0, expfit[0]) expfit1 = np.append(expfit1, expfit[1]) expfit2 = np.append(expfit2, expfit[2]) expfit3 = np.append(expfit3, expfit[3]) pixels = np.append(pixels, pxl_list[s1]) thresh = np.rec.fromarrays([pixels, thresholds, expfit0, expfit1, expfit2, expfit3], dtype=[('pixel_no', int), ('td_threshold', float), ('expfit0', float), ('expfit1', float), ('expfit2', float), ('expfit3', float)]) in_file_h5.createTable(in_file_h5.root, 'td_threshold', thresh, filters=self.filter_tables) p1, p2, single_scan = plotting.plot_timewalk(h5_filename) output_file(self.output_filename + '.html', title=self.run_name) status = plotting.plot_status(h5_filename) save(hplot(vplot(p1, p2, status), single_scan))
def tdc_table(self, scanrange): h5_filename = self.output_filename + '.h5' with tb.open_file(h5_filename, 'r+') as in_file_h5: raw_data = in_file_h5.root.raw_data[:] meta_data = in_file_h5.root.meta_data[:] if (meta_data.shape[0] == 0): return repeat_command = in_file_h5.root.meta_data.attrs.kwargs a = repeat_command.rfind("repeat_command: ") repeat_command = repeat_command[a + len("repeat_command: "):a + len("repeat_command: ") + 7] a = repeat_command.rfind("\n") repeat_command = int(repeat_command[0:a]) param, index = np.unique(meta_data['scan_param_id'], return_index=True) pxl_list = [] for p in param: pix_no = int(p) / int(len(self.inj_charge)) pxl_list.append(self.pixel_list[pix_no][0] * 64 + self.pixel_list[pix_no][1]) index = index[1:] index = np.append(index, meta_data.shape[0]) index = index - 1 stops = meta_data['index_stop'][index] split = np.split(raw_data, stops) avg_tdc = [] avg_tdc_err = [] avg_del = [] avg_del_err = [] hits = [] deletelist = () for i in range(len(split[:-1])): # loop on pulses rwa_data_param = split[i] tdc_data = rwa_data_param & 0xFFF # take last 12 bit tdc_delay = (rwa_data_param & 0x0FF00000) >> 20 counter = 0.0 TOT_sum = 0.0 DEL_sum = 0.0 if (tdc_data.shape[0] == 0 or tdc_data.shape[0] == 1): counter = 1.0 for j in range(tdc_data.shape[0]): # loop on repeats if (j > 0): counter += 1 TOT_sum += tdc_data[j] DEL_sum += tdc_delay[j] if (counter > 1): hits.append(counter) avg_tdc.append((float(TOT_sum) / float(counter)) * 1.5625) avg_tdc_err.append(1.5625 / (np.sqrt(12.0 * counter))) avg_del.append((float(DEL_sum) / float(counter)) * 1.5625) avg_del_err.append(1.5625 / (np.sqrt(12.0 * counter))) else: deletelist = np.append(deletelist, i) pxl_list = np.delete(pxl_list, deletelist) newpix = [0] pix_no_old = pxl_list[0] runparam = 0 for p in pxl_list: if p != pix_no_old: newpix = np.append(newpix, runparam) pix_no_old = p runparam = runparam + 1 addedvalues = 0 for pixels in range(len(newpix)): missingvalues = 0 if newpix[pixels] == newpix[-1]: missingvalues = scanrange - abs(newpix[pixels] + addedvalues - len(hits)) else: if abs(newpix[pixels] - newpix[pixels + 1]) < scanrange: missingvalues = scanrange - abs(newpix[pixels] - newpix[pixels + 1]) if missingvalues != 0: hits = np.insert(hits, newpix[pixels] + addedvalues, np.zeros(missingvalues)) avg_tdc = np.insert(avg_tdc, newpix[pixels] + addedvalues, np.zeros(missingvalues)) avg_tdc_err = np.insert(avg_tdc_err, newpix[pixels] + addedvalues, np.zeros(missingvalues)) avg_del = np.insert(avg_del, newpix[pixels] + addedvalues, np.zeros(missingvalues)) avg_del_err = np.insert(avg_del_err, newpix[pixels] + addedvalues, np.zeros(missingvalues)) pxl_list = np.insert( pxl_list, newpix[pixels] + addedvalues, (pxl_list[newpix[pixels] + addedvalues]) * np.ones(missingvalues)) addedvalues = addedvalues + missingvalues injections = [] for pixels in range(int(len(pxl_list) / len(self.inj_charge))): for i in range(len(self.inj_charge)): injections = np.append(injections, self.inj_charge[i]) pix, stop = np.unique(pxl_list, return_index=True) stop = np.sort(stop) stop = list(stop) stop.append(len(avg_tdc)) repeat_command_dic = {} repeat_command_dic['repeat_command'] = repeat_command avg_tab = np.rec.fromarrays([ injections, pxl_list, hits, avg_tdc, avg_tdc_err, avg_del, avg_del_err ], dtype=[('charge', float), ('pixel_no', int), ('hits', int), ('tot_ns', float), ('err_tot_ns', float), ('delay_ns', float), ('err_delay_ns', float)]) tdc_table = in_file_h5.createTable(in_file_h5.root, 'tdc_data', avg_tab, filters=self.filter_tables) tdc_table.attrs.repeat_command = repeat_command_dic thresholds = () expfit0 = () expfit1 = () expfit2 = () expfit3 = () pixels = () for i in range(len(stop) - 1): s1 = int(stop[i]) s2 = int(stop[i + 1]) A, mu, sigma = analysis.fit_scurve(hits[s1:s2], injections[s1:s2], repeat_command) if np.max( hits[s1:s2]) > (repeat_command + 200): # or mu > 3000: thresholds = np.append(thresholds, 0) expfit0 = np.append(expfit0, 0) expfit1 = np.append(expfit1, 0) expfit2 = np.append(expfit2, 0) expfit3 = np.append(expfit3, 0) pixels = np.append(pixels, pxl_list[s1]) continue for values in range(s1, s2): if injections[values] >= 5 / 4 * mu: s1 = values break numberer = 0 hitvaluesold = hits[-1] for hitvalues in hits[s1:s2]: if abs(hitvalues - hitvaluesold) <= 1 and hitvalues != 0: break numberer = numberer + 1 hitvaluesold = hitvalues if numberer == len(avg_del[s1:s2]): numberer = 0 expfit = analysis.fit_exp(injections[s1:s2], avg_del[s1:s2], mu, abs(numberer)) startexp = -expfit[0] * np.log( (25.0 + np.min(avg_del[s1:s2]) - expfit[3]) / expfit[2]) - expfit[1] if np.isnan(startexp) or startexp >= 2000: startexp = 0 thresholds = np.append(thresholds, startexp) expfit0 = np.append(expfit0, expfit[0]) expfit1 = np.append(expfit1, expfit[1]) expfit2 = np.append(expfit2, expfit[2]) expfit3 = np.append(expfit3, expfit[3]) pixels = np.append(pixels, pxl_list[s1]) thresh = np.rec.fromarrays( [pixels, thresholds, expfit0, expfit1, expfit2, expfit3], dtype=[('pixel_no', int), ('td_threshold', float), ('expfit0', float), ('expfit1', float), ('expfit2', float), ('expfit3', float)]) in_file_h5.createTable(in_file_h5.root, 'td_threshold', thresh, filters=self.filter_tables) p1, p2, single_scan = plotting.plot_timewalk(h5_filename) output_file(self.output_filename + '.html', title=self.run_name) status = plotting.plot_status(h5_filename) save(hplot(vplot(p1, p2, status), single_scan))
def distribution_plot(df, target, value): name = value + "distribution overlay" hist = Histogram(df, values=value, color=target, legend='top_right') output_file(join('plots', name + '.html')) save(hist)
def boxplot(df, target, value): name = y + "against" + value + "boxplot" p = BoxPlot(df, values=value, label=target, title=name) output_file(join('plots', name + ".html")) save(p)
def preProcess(theFileName): df = pd.read_csv(str(theFileName)) if 'Unnamed: 0' in df.columns: df = df.drop('Unnamed: 0', axis=1) labBin = sklearn.preprocessing.LabelBinarizer() df['y'] = labBin.fit_transform(df['y']) dp = pd.get_dummies(df) X = dp.drop('y', axis=1) y = dp[['y']] # get the features theFeatures = X.columns # convert the dataframes to arrays X = X.values y = y.values y.shape = np.shape(y)[0] yOrig = y[:] # need this later for plotting feature impacts # and carry out feature scaling X = StandardScaler().fit_transform(X) #======================================================================= # apply random undersampling if labels are imbalanced labelSkewness = 100 * np.sum(y) * 1. / np.shape(y)[0] if np.min([labelSkewness, 100 - labelSkewness]) < (100. / 3.): rus = RandomUnderSampler(verbose=0) X, y = rus.fit_sample(X, y) #======================================================================= # select optimal number of features thisModel = LogisticRegression(penalty='l1', C=1) rfecv = RFECV(estimator=thisModel, step=1, cv=StratifiedKFold(y, n_folds=3), scoring='f1') Xt = rfecv.fit_transform(X, y) optimalNumberOfFeatures = rfecv.n_features_ introReport = [ 'Optimal Number of Attributes: ' + str(optimalNumberOfFeatures), 'The following attributes are the most influential to the outcome' ] #======================================================================= # plot number of selected features VS cross-validation scores plt.figure(figsize=(12, 8)) plt.xlabel("Number of Attributes", fontsize=20) plt.ylabel("Score", fontsize=20) plt.title("Attribute Selection", fontsize=25) plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_) imgOne = 'static/thePlot.jpg' plt.savefig('flask_files/' + imgOne, dpi=300) #======================================================================= # get the feature feature importance rankings model = RandomForestClassifier(n_estimators=300) model.fit(X, y) theImportances = list(model.feature_importances_) sortedImportances = sorted(theImportances, reverse=True) # ...and print the selected features along with their weights and ranks tableOne = [] for ii in range(1, optimalNumberOfFeatures + 1): tableOne.append( dict(Feature=str(theFeatures[theImportances.index( sortedImportances[ii - 1])]), Weight=str(sortedImportances[ii - 1]), Rank=str(ii))) #======================================================================= # plot histogram of the most important feature thisFeature = 0 allThoseFeatures = dp[theFeatures[theImportances.index( sortedImportances[thisFeature])]] plt.figure(figsize=(12, 8)) combinedOutcomes = plt.hist(allThoseFeatures, bins=10) # plt.hist(allThoseFeatures, bins=10) plt.xlabel('Attribute: ' + theFeatures[theImportances.index(sortedImportances[0])], fontsize=20) plt.ylabel('Count', fontsize=20) plt.title('Impact of the Most Influential Attribute', fontsize=25) imgTwo = 'static/theHist.jpg' plt.savefig('flask_files/' + imgTwo, dpi=300) #======================================================================= # plot impact of the most important feature positiv = allThoseFeatures[yOrig == 1] negativ = allThoseFeatures[yOrig == 0] plt.figure(figsize=(12, 8)) negA = plt.hist(negativ, bins=combinedOutcomes[1]) posA = plt.hist(positiv, bins=combinedOutcomes[1]) # yUpperLimit = np.max([negA[0], posA[0]])*1.01 # plt.subplot(1,2,1) # plt.hist(negativ,bins=combinedOutcomes[1]) # plt.ylim(ymax = yUpperLimit*1.01, ymin = 0) # plt.xlabel(theFeatures[theImportances.index(sortedImportances[thisFeature])], fontsize=16) # plt.ylabel('Count', fontsize=16) # plt.title('Negative', fontsize=20) # # plt.subplot(1,2,2) # plt.hist(positiv,bins=combinedOutcomes[1]) # plt.ylim(ymax = yUpperLimit, ymin = 0) # plt.xlabel(theFeatures[theImportances.index(sortedImportances[thisFeature])], fontsize=16) # plt.title('Positive',fontsize=20) # # imgThree = 'static/theNegPosHist.jpg' # plt.savefig('flask_files/'+imgThree, dpi=300) #======================================================================= a = posA[0] b = negA[0] c = combinedOutcomes[0] posImpact = np.divide(a, c) negImpact = np.divide(b, c) midPoints = [] for i in range(1, len(combinedOutcomes[1])): midPoints.append( (combinedOutcomes[1][i] + combinedOutcomes[1][i - 1]) / 2.) for i in range(len(posImpact)): if np.isnan(posImpact[i]): posImpact[i] = 0 if np.isnan(negImpact[i]): negImpact[i] = 0 plt.figure(figsize=(12, 8)) plt.hold(True) plt.plot(midPoints, posImpact, '.', markersize=20, label='Positive') plt.plot(midPoints, negImpact, 'r.', markersize=20, label='Negative') plt.legend(prop={'size': 20}) plt.xlabel(theFeatures[theImportances.index( sortedImportances[thisFeature])], fontsize=16) plt.ylabel('Relative Impact', fontsize=20) plt.grid() imgThree = 'static/theNegPosHist.jpg' plt.savefig('flask_files/' + imgThree, dpi=300) #======================================================================= # generate plots for report (this is save to an "html" file) from bokeh.charts import Histogram, output_file, show, save, gridplot from bokeh.plotting import figure plotList = [] for i in range(optimalNumberOfFeatures): thisFeatureIs = theFeatures[theImportances.index(sortedImportances[i])] allThoseFeatures = dp[thisFeatureIs] combinedOutcomes = plt.hist(allThoseFeatures, bins=10) positiv = allThoseFeatures[yOrig == 1] negativ = allThoseFeatures[yOrig == 0] negA = plt.hist(negativ, bins=combinedOutcomes[1]) posA = plt.hist(positiv, bins=combinedOutcomes[1]) posImpact = np.divide(posA[0], combinedOutcomes[0]) negImpact = np.divide(negA[0], combinedOutcomes[0]) midPoints = [] for i in range(1, len(combinedOutcomes[1])): midPoints.append( (combinedOutcomes[1][i] + combinedOutcomes[1][i - 1]) / 2.) for i in range(len(posImpact)): if np.isnan(posImpact[i]): posImpact[i] = 0 if np.isnan(negImpact[i]): negImpact[i] = 0 hist0 = Histogram(dp, values=thisFeatureIs, color='blue', title="Impact of " + thisFeatureIs, bins=10) plot0 = figure() plot0.xaxis.axis_label = thisFeatureIs plot0.yaxis.axis_label = "Relative Impact" # plot0.title = "Relative Impact of " + thisFeatureIs plot0.circle(midPoints, list(negImpact), size=10, color="red", alpha=0.9, legend='Negative') plot0.circle(midPoints, list(posImpact), size=10, color="green", alpha=0.9, legend='Positive') plotList.append([hist0, plot0]) output_file("flask_files/static/Report.html", title="Report") hist = gridplot(plotList) save(hist) #======================================================================= # specify the models to run tests with theModels = { 'Logistic Regression': LogisticRegression(penalty='l1'), 'LDA': LinearDiscriminantAnalysis(), 'SVM': SVC(kernel='linear'), 'Random Forest': RandomForestClassifier(n_estimators=300) } # ...then display the results of the tests classifierComparisons = [] for aModel in theModels: model = theModels[aModel] results = cross_validation.cross_val_score(model, Xt, y, scoring='f1', cv=StratifiedKFold( y, n_folds=3)) classifierComparisons.append( dict(Classifier=aModel, Score=np.max(results))) #======================================================================= # display the plots theJPGs = [imgOne, imgTwo, imgThree] #======================================================================= return introReport, tableOne, optimalNumberOfFeatures, classifierComparisons, theJPGs
def preProcess(theFileName): df = pd.read_csv(str(theFileName)) if 'Unnamed: 0' in df.columns: df = df.drop('Unnamed: 0', axis = 1) labBin = sklearn.preprocessing.LabelBinarizer() df['y'] = labBin.fit_transform(df['y']) dp = pd.get_dummies(df) X = dp.drop('y', axis = 1) y = dp[['y']] # get the features theFeatures = X.columns # convert the dataframes to arrays X = X.values y = y.values y.shape = np.shape(y)[0] yOrig = y[:] # need this later for plotting feature impacts # and carry out feature scaling X = StandardScaler().fit_transform(X) #======================================================================= # apply random undersampling if labels are imbalanced labelSkewness = 100*np.sum(y)*1./np.shape(y)[0] if np.min([labelSkewness, 100-labelSkewness]) < (100./3.): rus = RandomUnderSampler(verbose=0) X, y = rus.fit_sample(X, y) #======================================================================= # select optimal number of features thisModel = LogisticRegression(penalty='l1', C=1) rfecv = RFECV(estimator=thisModel, step=1, cv=StratifiedKFold(y, n_folds=3), scoring='f1') Xt = rfecv.fit_transform(X, y); optimalNumberOfFeatures = rfecv.n_features_ introReport = ['Optimal Number of Attributes: ' + str(optimalNumberOfFeatures), 'The following attributes are the most influential to the outcome'] #======================================================================= # plot number of selected features VS cross-validation scores plt.figure(figsize=(12, 8)) plt.xlabel("Number of Attributes", fontsize=20) plt.ylabel("Score", fontsize=20) plt.title("Attribute Selection", fontsize=25) plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_) imgOne = 'static/thePlot.jpg' plt.savefig('flask_files/'+imgOne, dpi=300) #======================================================================= # get the feature feature importance rankings model = RandomForestClassifier(n_estimators=300) model.fit(X,y) theImportances = list(model.feature_importances_) sortedImportances = sorted(theImportances,reverse = True) # ...and print the selected features along with their weights and ranks tableOne = [] for ii in range(1,optimalNumberOfFeatures+1): tableOne.append(dict(Feature = str(theFeatures[theImportances.index(sortedImportances[ii-1])]), Weight = str(sortedImportances[ii-1]), Rank = str(ii))) #======================================================================= # plot histogram of the most important feature thisFeature = 0 allThoseFeatures = dp[theFeatures[theImportances.index(sortedImportances[thisFeature])]] plt.figure(figsize=(12, 8)) combinedOutcomes = plt.hist(allThoseFeatures, bins=10) # plt.hist(allThoseFeatures, bins=10) plt.xlabel('Attribute: ' + theFeatures[theImportances.index(sortedImportances[0])], fontsize=20) plt.ylabel('Count', fontsize=20) plt.title('Impact of the Most Influential Attribute', fontsize=25) imgTwo = 'static/theHist.jpg' plt.savefig('flask_files/'+imgTwo, dpi=300) #======================================================================= # plot impact of the most important feature positiv = allThoseFeatures[yOrig==1] negativ = allThoseFeatures[yOrig==0] plt.figure(figsize=(12, 8)) negA = plt.hist(negativ,bins=combinedOutcomes[1]) posA = plt.hist(positiv,bins=combinedOutcomes[1]) # yUpperLimit = np.max([negA[0], posA[0]])*1.01 # plt.subplot(1,2,1) # plt.hist(negativ,bins=combinedOutcomes[1]) # plt.ylim(ymax = yUpperLimit*1.01, ymin = 0) # plt.xlabel(theFeatures[theImportances.index(sortedImportances[thisFeature])], fontsize=16) # plt.ylabel('Count', fontsize=16) # plt.title('Negative', fontsize=20) # # plt.subplot(1,2,2) # plt.hist(positiv,bins=combinedOutcomes[1]) # plt.ylim(ymax = yUpperLimit, ymin = 0) # plt.xlabel(theFeatures[theImportances.index(sortedImportances[thisFeature])], fontsize=16) # plt.title('Positive',fontsize=20) # # imgThree = 'static/theNegPosHist.jpg' # plt.savefig('flask_files/'+imgThree, dpi=300) #======================================================================= a = posA[0] b = negA[0] c = combinedOutcomes[0] posImpact = np.divide(a,c) negImpact = np.divide(b,c) midPoints=[] for i in range(1,len(combinedOutcomes[1])): midPoints.append((combinedOutcomes[1][i] + combinedOutcomes[1][i-1])/2.) for i in range(len(posImpact)): if np.isnan(posImpact[i]): posImpact[i]=0 if np.isnan(negImpact[i]): negImpact[i]=0 plt.figure(figsize=(12, 8)) plt.hold(True) plt.plot(midPoints, posImpact,'.', markersize=20, label='Positive') plt.plot(midPoints, negImpact, 'r.', markersize=20, label='Negative') plt.legend(prop={'size':20}) plt.xlabel(theFeatures[theImportances.index(sortedImportances[thisFeature])], fontsize=16) plt.ylabel('Relative Impact', fontsize=20) plt.grid() imgThree = 'static/theNegPosHist.jpg' plt.savefig('flask_files/'+imgThree, dpi=300) #======================================================================= # generate plots for report (this is save to an "html" file) from bokeh.charts import Histogram, output_file, show, save, gridplot from bokeh.plotting import figure plotList=[] for i in range(optimalNumberOfFeatures): thisFeatureIs = theFeatures[theImportances.index(sortedImportances[i])] allThoseFeatures = dp[thisFeatureIs] combinedOutcomes = plt.hist(allThoseFeatures, bins=10) positiv = allThoseFeatures[yOrig==1] negativ = allThoseFeatures[yOrig==0] negA = plt.hist(negativ,bins=combinedOutcomes[1]) posA = plt.hist(positiv,bins=combinedOutcomes[1]) posImpact = np.divide(posA[0],combinedOutcomes[0]) negImpact = np.divide(negA[0],combinedOutcomes[0]) midPoints=[] for i in range(1,len(combinedOutcomes[1])): midPoints.append((combinedOutcomes[1][i] + combinedOutcomes[1][i-1])/2.) for i in range(len(posImpact)): if np.isnan(posImpact[i]): posImpact[i]=0 if np.isnan(negImpact[i]): negImpact[i]=0 hist0 = Histogram(dp, values=thisFeatureIs, color='blue', title="Impact of " + thisFeatureIs, bins=10) plot0 = figure() plot0.xaxis.axis_label = thisFeatureIs plot0.yaxis.axis_label = "Relative Impact" # plot0.title = "Relative Impact of " + thisFeatureIs plot0.circle(midPoints, list(negImpact), size=10, color="red", alpha=0.9, legend='Negative') plot0.circle(midPoints, list(posImpact), size=10, color="green", alpha=0.9, legend='Positive') plotList.append([hist0,plot0]) output_file("flask_files/static/Report.html", title = "Report") hist = gridplot(plotList) save(hist) #======================================================================= # specify the models to run tests with theModels = {'Logistic Regression':LogisticRegression(penalty='l1'), 'LDA':LinearDiscriminantAnalysis(), 'SVM':SVC(kernel='linear'), 'Random Forest':RandomForestClassifier(n_estimators=300)} # ...then display the results of the tests classifierComparisons=[] for aModel in theModels: model = theModels[aModel] results = cross_validation.cross_val_score(model, Xt, y, scoring='f1', cv=StratifiedKFold(y, n_folds=3)) classifierComparisons.append(dict(Classifier = aModel, Score = np.max(results))) #======================================================================= # display the plots theJPGs = [imgOne, imgTwo, imgThree] #======================================================================= return introReport, tableOne, optimalNumberOfFeatures, classifierComparisons, theJPGs
cursor.execute(sql) sql = """INSERT INTO prod1 SELECT prod_id, COUNT(cust_id)AS frequency FROM new_schema.cust2 GROUP BY prod_id ORDER BY prod_id;""" cursor.execute(sql) sql = """select * from prod1;""" cursor.execute(sql) data = cursor.fetchall() print(data) df = pd.DataFrame([[ij for ij in i] for i in data]) df.rename(columns={ 0: 'Product ID', 1: 'Frequency' }, inplace=True) df = df.sort_values(['Product ID'], ascending=[1]) print(df.head(1536)) #from bokeh.sampledata.autompg import autompg as df from bokeh.charts import Scatter, output_file, show, save from bokeh.models import HoverTool scatter = Scatter(df, x='Product ID', y='Frequency', tools='hover', legend=False) hover = scatter.select(dict(type=HoverTool)) hover.tooltips = [("Product ID", "$x{int}"), ("Frequency", "$y{int}")] print(df.head()) output_file('prodscatter.html') save(scatter)
# import pandas module import pandas as pd # import Donut, out_file, and save from bokeh.charts import Donut, output_file, save # name "icon" as the results from the csv file icon = pd.read_csv('dataproject.csv') # create pie chart p = Donut(icon, label="precipitation") output_file('piechart.html') # save chart save(p)
def jwst_inventory(instruments=JWST_INSTRUMENTS, dataproducts=['image', 'spectrum', 'cube'], caom=False, plot=False): """Gather a full inventory of all JWST data in each instrument service by instrument/dtype Parameters ---------- instruments: sequence The list of instruments to count dataproducts: sequence The types of dataproducts to count caom: bool Query CAOM service plot: bool Return a pie chart of the data Returns ------- astropy.table.table.Table The table of record counts for each instrument and mode """ logging.info('Searching database...') # Iterate through instruments inventory, keywords = [], {} for instrument in instruments: ins = [instrument] for dp in dataproducts: count = instrument_inventory(instrument, dataproduct=dp, caom=caom) ins.append(count) # Get the total ins.append(sum(ins[-3:])) # Add it to the list inventory.append(ins) # Add the keywords to the dict keywords[instrument] = instrument_keywords(instrument, caom=caom) logging.info( 'Completed database search for {} instruments and {} data products.'. format(instruments, dataproducts)) # Make the table all_cols = ['instrument'] + dataproducts + ['total'] table = pd.DataFrame(inventory, columns=all_cols) # Melt the table table = pd.melt(table, id_vars=['instrument'], value_vars=dataproducts, value_name='files', var_name='dataproduct') # Plot it if plot: # Determine plot location and names output_dir = get_config()['outputs'] if caom: output_filename = 'database_monitor_caom' else: output_filename = 'database_monitor_jwst' # Make the plot plt = Donut(table, label=['instrument', 'dataproduct'], values='files', text_font_size='12pt', hover_text='files', name="JWST Inventory", plot_width=600, plot_height=600) # Save the plot as full html html_filename = output_filename + '.html' outfile = os.path.join(output_dir, 'monitor_mast', html_filename) output_file(outfile) save(plt) set_permissions(outfile) logging.info( 'Saved Bokeh plots as HTML file: {}'.format(html_filename)) # Save the plot as components plt.sizing_mode = 'stretch_both' script, div = components(plt) div_outfile = os.path.join(output_dir, 'monitor_mast', output_filename + "_component.html") with open(div_outfile, 'w') as f: f.write(div) f.close() set_permissions(div_outfile) script_outfile = os.path.join(output_dir, 'monitor_mast', output_filename + "_component.js") with open(script_outfile, 'w') as f: f.write(script) f.close() set_permissions(script_outfile) logging.info( 'Saved Bokeh components files: {}_component.html and {}_component.js' .format(output_filename, output_filename)) return table, keywords
from bokeh.charts import Scatter, output_file, save from bokeh.charts import HeatMap, output_file, save from bokeh.layouts import gridplot from bokeh.plotting import figure, save, output_file #read file df = pd.read_csv('file.csv') #Plots a heatmap p = HeatMap(df, x='Date', y='Temperature', values=None, stat='count', xgrid=False, ygrid=False, hover_tool=True, hover_text='Temperature', plot_width=2000, plot_height=1000) output_file('HeatMap.html') #Saves graph save(p) #box = BoxPlot(df, values='Temperature', label='Date', title="Temperature Date Box Plot", plot_width=400) #Creates a Scatter Plot v = Scatter(df, x='Date', y='Temperature', color='red', title="Date vs. Temperature", legend='top_right', xlabel="Date", ylabel="Temperature", plot_width=2000, plot_height=1000) output_file('Scatter.html') #Saves Graph as a html file save(v) #Creates array for Temperature Temperatures = np.array(df['Temperature']) #Creates array for Date
def single_timewalk_plot(h5_filename): output_file('/home/mark/Desktop/Stuff/single.html') p1, p2, single_scan = plot_timewalk2(h5_filename) save(hplot(vplot(p1, p2), single_scan))
def configuration_piechart(): """ Create a piechart showing distribution of configurations for each imstrument/detector combination """ logging.info('Creating target piechart') data_dir = get_settings()['composite_dir'] configs = {} for dataset in glob.glob(os.path.join(data_dir, '*.fits')): instrument = os.path.split(dataset)[1].split('_')[3] grating = os.path.split(dataset)[1].split('_')[5] cenwave = os.path.split(dataset)[1].split('_')[6] if not instrument in configs: configs[instrument] = ['{}/{}'.format(grating, cenwave)] else: configs[instrument].append('{}/{}'.format(grating,cenwave)) # COS FUV try: settings = Counter(configs['cos-fuv']) charts.output_file(os.path.join(get_settings()['plot_dir'], 'pie_config_cos_fuv.html')) plot = charts.Donut(settings.values(), settings.keys(), width=1200, height=600, title='COS FUV breakdown') plot_file = os.path.join(get_settings()['plot_dir'], 'pie_config_cos_fuv.html') charts.save(obj=plot, filename=plot_file) set_permissions(plot_file) except KeyError: logging.info('No COS FUV datasets') # COS NUV try: settings = Counter(configs['cos-nuv']) charts.output_file(os.path.join(get_settings()['plot_dir'], 'pie_config_cos_nuv.html')) plot = charts.Donut(settings.values(), settings.keys(), width=1200, height=600, title='COS NUV breakdown') plot_file = os.path.join(get_settings()['plot_dir'], 'pie_config_cos_nuv.html') charts.save(obj=plot, filename=plot_file) set_permissions(plot_file) except KeyError: logging.info('No COS NUV datasets') # STIS FUV try: settings = Counter(configs['stis-fuv-mama']) charts.output_file(os.path.join(get_settings()['plot_dir'], 'pie_config_stis_fuv.html')) plot = charts.Donut(settings.values(), settings.keys(), width=1200, height=600, title='STIS FUV breakdown') plot_file = os.path.join(get_settings()['plot_dir'], 'pie_config_stis_fuv.html') charts.save(obj=plot, filename=plot_file) set_permissions(plot_file) except KeyError: logging.info('No STIS FUV-MAMA datasets') # STIS NUV try: settings = Counter(configs['stis-nuv-mama']) charts.output_file(os.path.join(get_settings()['plot_dir'], 'pie_config_stis_nuv.html')) plot = charts.Donut(settings.values(), settings.keys(), width=1200, height=600, title='STIS NUV breakdown') plot_file = os.path.join(get_settings()['plot_dir'], 'pie_config_stis_nuv.html') charts.save(obj=plot, filename=plot_file) set_permissions(plot_file) except KeyError: logging.info('No STIS NUV datasets')
"minutes", values="duration", color="color", title=project, background_fill_alpha=0, border_fill_alpha=0, outline_line_alpha=0, xgrid=False, legend=None, toolbar_location=None, width=900, height=400) # plot styling plot.axis.axis_label = None plot.xaxis.axis_label = None plot.axis.major_tick_line_color = None plot.axis.minor_tick_line_color = None # plot.xaxis[0].ticker.desired_num_ticks = 5 # plot.xaxis.major_label_text_alpha = 0 plot.xaxis.major_label_orientation = pi / 2 for g in plot.renderers: if "GlyphRenderer" in str(g.__repr__): g.glyph.line_alpha = 0 all_plots.append(plot) output_file(html_output, title="rvt_pulse", mode="inline") save(column(all_plots))
cursor = db.cursor() cursor.execute( "select cust_id,quantity from new_schema.cust2 where prod_id=21975 ;") data = cursor.fetchall() print(data) df = pd.DataFrame([[ij for ij in i] for i in data]) df.rename(columns={ 0: 'Customer ID', 1: 'Quantity' }, inplace=True) df = df.sort_values(['Customer ID'], ascending=[1]) #df['new'] = 0 df = df.groupby("Customer ID").sum() print(df.head(10)) from bokeh.charts import Bar, output_file, save from bokeh.models import HoverTool bar = Bar(df, 'Customer ID', values='Quantity', title="test chart", responsive=True, tools='hover', legend=False) hover = bar.select(dict(type=HoverTool)) hover.tooltips = [("Customer ID", "$x"), ("Quantity", "$y")] output_file('listofcust.html') save(bar)
# import pandas module import pandas as pd # import Histogram, out_file, and save from bokeh.charts import Histogram, output_file, save # name "wind" as the results from the csv file wind = pd.read_csv('dataproject3.csv') # create histogram h = Histogram(wind, title="Wind Speeds") output_file('Histogram.html') # save histogram save(h)