def init_plot(crawl_name): session = Session() document = Document() session.use_doc(crawl_name) session.load_document(document) if document.context.children: plot = document.context.children[0] else: output_server(crawl_name) # TODO: Remove these when Bokeh is upgraded # placeholders or Bokeh can't inject properly current = np.datetime64(datetime.now()) xdr = Range1d(current, current + 1) ydr = ["urls"] # styling suggested by Bryan plot = figure(title="Crawler Monitor", tools="hover", x_axis_type="datetime", y_axis_location="right", x_range=xdr, y_range=ydr, width=1200, height=600) plot.toolbar_location = None plot.xgrid.grid_line_color = None document.add(plot) session.store_document(document) script = autoload_server(plot, session) #TODO: Looks like a Bokeh bug, probably not repeatable with current code script = script.replace("'modelid': u'", "'modelid': '") return script
def init_plot(crawl_name): session = Session() document = Document() session.use_doc(crawl_name) session.load_document(document) if document.context.children: plot = document.context.children[0] else: output_server(crawl_name) # TODO: Remove these when Bokeh is upgraded # placeholders or Bokeh can't inject properly current = np.datetime64(datetime.now()) xdr = Range1d(current, current + 1) ydr = ["urls"] # styling suggested by Bryan plot = figure(title="Crawler Monitor", tools="hover", x_axis_type="datetime", y_axis_location="right", x_range=xdr, y_range=ydr, width=1200, height=600) plot.toolbar_location = None plot.xgrid.grid_line_color = None document.add(plot) session.store_document(document) script = autoload_server(plot, session) #TODO: Looks like a Bokeh bug, probably not repeatable with current code script = script.replace("'modelid': u'", "'modelid': '") return script
def wrapper(*args, **kwargs): document = Document() session = Session(name=url, root_url=url) session.use_doc(document.docid) session.load_document(document) session.publish() document.autoadd = False document.autostore = False obj = func(*args, **kwargs) obj._docid = session.docid obj._root_url = session.root_url document.add(obj) session.store_document(document) return obj
def wrapper(*args, **kwargs): docname = prefix + str(uuid.uuid4()) session = Session(name=url, root_url=url) session.use_doc(docname) session.load_document(curdoc()) curdoc().autoadd = False curdoc().autostore = False obj = func(*args, **kwargs) tag = embed.autoload_server(obj, session) obj.tag = tag curdoc().add(obj) changed = session.store_document(curdoc()) logger.debug("stored: %s", str(changed)) return obj
def wrapper(*args, **kwargs): docname = prefix + str(uuid.uuid4()) session = Session(name=url, root_url=url) session.use_doc(docname) session.load_document(curdoc()) curdoc().autoadd = False curdoc().autostore = False obj = func(*args, **kwargs) tag = embed.autoload_server(obj, session) obj.tag = tag curdoc().add(obj) changed = session.store_document(curdoc()) logger.debug("stored: %s", str(changed)) return obj
import pandas as pd from bokeh.models import (Plot, ColumnDataSource, DataRange1d, FactorRange, LinearAxis, CategoricalAxis, Grid, Glyph) from bokeh.models.widgets import (DateRangeSlider, HBox, VBox, Paragraph, Select, VBoxModelForm) from bokeh.glyphs import Rect from bokeh.document import Document from bokeh.session import Session import employment_data_reader as emp from employment_utils import get_country_for_byte, get_jobtype_for_byte document = Document() session = Session() session.use_doc('employment_server') session.load_document(document) days_of_week = {0: "Monday", 1: "Tuesday", 2: "Wednesday", 3: "Thursday", 4: "Friday", 5: "Saturday", 6: "Sunday"} bounds_start = datetime.date(2012, 11, 1) bounds_end = datetime.date(2013, 12, 30) start = datetime.date(2013, 5, 26) end = datetime.date(2013, 7, 5) source_country = ColumnDataSource(data=dict()) source_dow = ColumnDataSource(data=dict()) source_jobtype = ColumnDataSource(data=dict()) source_par = Paragraph() country_choices = ["Dominican Republic___________________", "Colombia_____________________________", "Mexico_______________________________", "Peru_________________________________",
class Population(object): year = 2010 location = "World" def __init__(self): from bokeh.objects import ColumnDataSource from bokeh.document import Document from bokeh.session import Session from bokeh.sampledata.population import load_population self.document = Document() self.session = Session() self.session.use_doc('population_reveal') self.session.load_document(self.document) self.df = load_population() self.source_pyramid = ColumnDataSource(data=dict()) def render(self): self.pyramid_plot() self.create_layout() self.document.add(self.layout) self.update_pyramid() def pyramid_plot(self): from bokeh.objects import (Plot, DataRange1d, LinearAxis, Grid, Glyph, Legend, SingleIntervalTicker) from bokeh.glyphs import Quad xdr = DataRange1d(sources=[self.source_pyramid.columns("male"), self.source_pyramid.columns("female")]) ydr = DataRange1d(sources=[self.source_pyramid.columns("groups")]) self.plot = Plot(title="Widgets", x_range=xdr, y_range=ydr, plot_width=600, plot_height=600) xaxis = LinearAxis(plot=self.plot) self.plot.below.append(xaxis) yaxis = LinearAxis(plot=self.plot, ticker=SingleIntervalTicker(interval=5)) self.plot.left.append(yaxis) xgrid = Grid(plot=self.plot, dimension=0, ticker=xaxis.ticker) ygrid = Grid(plot=self.plot, dimension=1, ticker=yaxis.ticker) male_quad = Quad(left="male", right=0, bottom="groups", top="shifted", fill_color="blue") male_quad_glyph = Glyph(data_source=self.source_pyramid, xdata_range=xdr, ydata_range=ydr, glyph=male_quad) self.plot.renderers.append(male_quad_glyph) female_quad = Quad(left=0, right="female", bottom="groups", top="shifted", fill_color="violet") female_quad_glyph = Glyph(data_source=self.source_pyramid, xdata_range=xdr, ydata_range=ydr, glyph=female_quad) self.plot.renderers.append(female_quad_glyph) legend = Legend(plot=self.plot, legends=dict(Male=[male_quad_glyph], Female=[female_quad_glyph])) self.plot.renderers.append(legend) def on_year_change(self, obj, attr, old, new): self.year = int(new) self.update_pyramid() def on_location_change(self, obj, attr, old, new): self.location = new self.update_pyramid() def create_layout(self): from bokeh.widgetobjects import Select, HBox, VBox years = list(map(str, sorted(self.df.Year.unique()))) locations = sorted(self.df.Location.unique()) year_select = Select(title="Year:", value="2010", options=years) location_select = Select(title="Location:", value="World", options=locations) year_select.on_change('value', self.on_year_change) location_select.on_change('value', self.on_location_change) controls = HBox(children=[year_select, location_select]) self.layout = VBox(children=[controls, self.plot]) def update_pyramid(self): pyramid = self.df[(self.df.Location == self.location) & (self.df.Year == self.year)] male = pyramid[pyramid.Sex == "Male"] female = pyramid[pyramid.Sex == "Female"] total = male.Value.sum() + female.Value.sum() male_percent = -male.Value / total female_percent = female.Value / total groups = male.AgeGrpStart.tolist() shifted = groups[1:] + [groups[-1] + 5] self.source_pyramid.data = dict( groups=groups, shifted=shifted, male=male_percent, female=female_percent, ) self.session.store_document(self.document)
class Dashboard(object): """ A collection of plots and widgets served by bokeh-server. """ def __init__(self, port = BK_SERVER_PORT, doc_name = DOC_NAME, colors = COLORS, show = False, ): """ Initialize Bokeh session and document. """ self.document = Document() self.port = port self.show = show self.doc_name = doc_name name="http://*****:*****@type"), ]) seabornify(plot) return plot # Update Routine # -------------- def update(self): self.session.store_objects(self.data_source) # Run # --- def run(self, poll_interval=1): self.session.store_document(self.document) link = self.session.object_link(self.document.context) if self.show: import webbrowser webbrowser.open(link) else: print("Please visit %s to see the plots (press ctrl-C to exit)" % link) try: while True: self.session.load_document(self.document) sleep(poll_interval) except KeyboardInterrupt: print() except ConnectionError: print("Connection to bokeh-server was terminated.") if __name__ == "__main__": args = parse_args() dashboard = Dashboard(**vars(args)) dashboard.run()
class DashBoard(object): def __init__(self): self.document = Document() self.session = Session() self.session.use_doc('crawler_dashboard') self.session.load_document(self.document) #self.harvest_source = ColumnDataSource(data=dict()) #self.domain_relevant_source = ColumnDataSource(data=dict()) #self.domain_crawled_source = ColumnDataSource(data=dict()) #self.domain_frontier_source = ColumnDataSource(data=dict()) #self.handson_source = ColumnDataSource(data=dict()) #self.termite_source = ColumnDataSource(data=dict()) self.harvest = Harvest() self.domain = Domain() #handson = Handson() self.termite = Termite() self.document.add(self.create_layout()) self.session.store_document(self.document) def render(self): self.create_layout() self.document.add(self.layout) self.update_data() def create_layout(self): #button = Button(label="Randomize data", type="success") #button.on_click(update_data) #top_panel = HBox(children=[button, self.harvest.plot, self.harvest.rate_plot]) top_panel = HBox(children=[self.harvest.plot, self.harvest.rate_plot]) domains = VBox(children=[self.domain.sort_relevant_plot, self.domain.sort_crawled_plot, self.domain.sort_frontier_plot], width=200) #middle_panel = HBox(children=[domains, handson.plot]) middle_panel = HBox(children=[domains]) layout = VBox(children=[top_panel, middle_panel, self.termite.plot]) self.layout = layout return layout def update_data(self): self.harvest.source = self.harvest.update_source() self.domain.sort_relevant_source, self.domain.sort_crawled_source, self.domain.sort_frontier_source = self.domain.update_source() self.termite.data, self.termite.source = self.termite.update_source() #self.session.store_objects(ds) self.session.store_document(self.document) def run(self, poll_interval=0.5): #link = self.session.object_link(self.document.context) #print("Please visit %s to see the plots (press ctrl-C to exit)" % link) try: while True: self.update_data() self.session.load_document(self.document) time.sleep(poll_interval) except KeyboardInterrupt: print() except ConnectionError: print("Connection to bokeh-server was terminated")
class DataTables(object): def __init__(self): self.document = Document() self.session = Session() self.session.use_doc('data_tables_server') self.session.load_document(self.document) self.manufacturer_filter = None self.model_filter = None self.transmission_filter = None self.drive_filter = None self.class_filter = None self.source = ColumnDataSource() self.update_data() self.document.add(self.create()) self.session.store_document(self.document) def create(self): manufacturers = sorted(mpg["manufacturer"].unique()) models = sorted(mpg["model"].unique()) transmissions = sorted(mpg["trans"].unique()) drives = sorted(mpg["drv"].unique()) classes = sorted(mpg["class"].unique()) manufacturer_select = Select(title="Manufacturer:", value="All", options=["All"] + manufacturers) manufacturer_select.on_change('value', self.on_manufacturer_change) model_select = Select(title="Model:", value="All", options=["All"] + models) model_select.on_change('value', self.on_model_change) transmission_select = Select(title="Transmission:", value="All", options=["All"] + transmissions) transmission_select.on_change('value', self.on_transmission_change) drive_select = Select(title="Drive:", value="All", options=["All"] + drives) drive_select.on_change('value', self.on_drive_change) class_select = Select(title="Class:", value="All", options=["All"] + classes) class_select.on_change('value', self.on_class_change) columns = [ TableColumn(field="manufacturer", title="Manufacturer", editor=SelectEditor(options=manufacturers), formatter=StringFormatter(font_style="bold")), TableColumn(field="model", title="Model", editor=StringEditor(completions=models)), TableColumn(field="displ", title="Displacement", editor=NumberEditor(step=0.1), formatter=NumberFormatter(format="0.0")), TableColumn(field="year", title="Year", editor=IntEditor()), TableColumn(field="cyl", title="Cylinders", editor=IntEditor()), TableColumn(field="trans", title="Transmission", editor=SelectEditor(options=transmissions)), TableColumn(field="drv", title="Drive", editor=SelectEditor(options=drives)), TableColumn(field="class", title="Class", editor=SelectEditor(options=classes)), TableColumn(field="cty", title="City MPG", editor=IntEditor()), TableColumn(field="hwy", title="Highway MPG", editor=IntEditor()), ] data_table = DataTable(source=self.source, columns=columns, editable=True) xdr = DataRange1d() ydr = DataRange1d() plot = Plot(title=None, x_range=xdr, y_range=ydr, plot_width=800, plot_height=300) xaxis = LinearAxis(plot=plot) plot.below.append(xaxis) yaxis = LinearAxis(plot=plot) ygrid = Grid(plot=plot, dimension=1, ticker=yaxis.ticker) plot.left.append(yaxis) cty_glyph = Circle(x="index", y="cty", fill_color="#396285", size=8, fill_alpha=0.5, line_alpha=0.5) hwy_glyph = Circle(x="index", y="hwy", fill_color="#CE603D", size=8, fill_alpha=0.5, line_alpha=0.5) cty = GlyphRenderer(data_source=self.source, glyph=cty_glyph) hwy = GlyphRenderer(data_source=self.source, glyph=hwy_glyph) tooltips = [ ("Manufacturer", "@manufacturer"), ("Model", "@model"), ("Displacement", "@displ"), ("Year", "@year"), ("Cylinders", "@cyl"), ("Transmission", "@trans"), ("Drive", "@drv"), ("Class", "@class"), ] cty_hover_tool = HoverTool(plot=plot, renderers=[cty], tooltips=tooltips + [("City MPG", "@cty")]) hwy_hover_tool = HoverTool(plot=plot, renderers=[hwy], tooltips=tooltips + [("Highway MPG", "@hwy")]) select_tool = BoxSelectTool(plot=plot, renderers=[cty, hwy], dimensions=['width']) plot.tools.extend([cty_hover_tool, hwy_hover_tool, select_tool]) plot.renderers.extend([cty, hwy, ygrid]) controls = VBox(children=[manufacturer_select, model_select, transmission_select, drive_select, class_select], width=200) top_panel = HBox(children=[controls, plot]) layout = VBox(children=[top_panel, data_table]) return layout def on_manufacturer_change(self, obj, attr, _, value): self.manufacturer_filter = None if value == "All" else value self.update_data() def on_model_change(self, obj, attr, _, value): self.model_filter = None if value == "All" else value self.update_data() def on_transmission_change(self, obj, attr, _, value): self.transmission_filter = None if value == "All" else value self.update_data() def on_drive_change(self, obj, attr, _, value): self.drive_filter = None if value == "All" else value self.update_data() def on_class_change(self, obj, attr, _, value): self.class_filter = None if value == "All" else value self.update_data() def update_data(self): df = mpg if self.manufacturer_filter: df = df[df["manufacturer"] == self.manufacturer_filter] if self.model_filter: df = df[df["model"] == self.model_filter] if self.transmission_filter: df = df[df["trans"] == self.transmission_filter] if self.drive_filter: df = df[df["drv"] == self.drive_filter] if self.class_filter: df = df[df["class"] == self.class_filter] self.source.data = ColumnDataSource.from_df(df) self.session.store_document(self.document) def run(self, do_view=False, poll_interval=0.5): link = self.session.object_link(self.document.context) print("Please visit %s to see the plots" % link) if do_view: view(link) print("\npress ctrl-C to exit") self.session.poll_document(self.document)
class DataTables(object): def __init__(self): self.document = Document() self.session = Session() self.session.use_doc('data_tables_server') self.session.load_document(self.document) self.manufacturer_filter = None self.model_filter = None self.transmission_filter = None self.drive_filter = None self.class_filter = None self.source = ColumnDataSource() self.update_data() self.document.add(self.create()) self.session.store_document(self.document) def create(self): manufacturers = sorted(mpg["manufacturer"].unique()) models = sorted(mpg["model"].unique()) transmissions = sorted(mpg["trans"].unique()) drives = sorted(mpg["drv"].unique()) classes = sorted(mpg["class"].unique()) manufacturer_select = Select(title="Manufacturer:", value="All", options=["All"] + manufacturers) manufacturer_select.on_change('value', self.on_manufacturer_change) model_select = Select(title="Model:", value="All", options=["All"] + models) model_select.on_change('value', self.on_model_change) transmission_select = Select(title="Transmission:", value="All", options=["All"] + transmissions) transmission_select.on_change('value', self.on_transmission_change) drive_select = Select(title="Drive:", value="All", options=["All"] + drives) drive_select.on_change('value', self.on_drive_change) class_select = Select(title="Class:", value="All", options=["All"] + classes) class_select.on_change('value', self.on_class_change) columns = [ TableColumn(field="manufacturer", header="Manufacturer", type="autocomplete", source=manufacturers), TableColumn(field="model", header="Model", type="autocomplete", source=models), TableColumn(field="displ", header="Displacement", type="numeric", format="0.00"), TableColumn(field="year", header="Year", type="numeric"), TableColumn(field="cyl", header="Cylinders", type="numeric"), TableColumn(field="trans", header="Transmission", type="dropdown", strict=True, source=transmissions), TableColumn(field="drv", header="Drive", type="autocomplete", strict=True, source=drives), TableColumn(field="class", header="Class", type="autocomplete", strict=True, source=classes), TableColumn(field="cty", header="City MPG", type="numeric"), TableColumn(field="hwy", header="Highway MPG", type="numeric"), ] handson_table = HandsonTable(source=self.source, columns=columns, sorting=True) xdr = DataRange1d(sources=[self.source.columns("index")]) #xdr = FactorRange(factors=manufacturers) ydr = DataRange1d( sources=[self.source.columns("cty"), self.source.columns("hwy")]) plot = Plot(title=None, data_sources=[self.source], x_range=xdr, y_range=ydr, plot_width=800, plot_height=300) xaxis = LinearAxis(plot=plot) plot.below.append(xaxis) yaxis = LinearAxis(plot=plot) ygrid = Grid(plot=plot, dimension=1, ticker=yaxis.ticker) plot.left.append(yaxis) cty = Glyph(data_source=self.source, glyph=Circle(x="index", y="cty", fill_color="#396285", size=8, fill_alpha=0.5, line_alpha=0.5)) hwy = Glyph(data_source=self.source, glyph=Circle(x="index", y="hwy", fill_color="#CE603D", size=8, fill_alpha=0.5, line_alpha=0.5)) select_tool = BoxSelectTool(renderers=[cty, hwy], select_y=False) plot.tools.append(select_tool) overlay = BoxSelectionOverlay(tool=select_tool) plot.renderers.extend([cty, hwy, ygrid, overlay]) controls = VBox(children=[ manufacturer_select, model_select, transmission_select, drive_select, class_select ], width=200) top_panel = HBox(children=[controls, plot]) layout = VBox(children=[top_panel, handson_table]) return layout def on_manufacturer_change(self, obj, attr, _, value): self.manufacturer_filter = None if value == "All" else value self.update_data() def on_model_change(self, obj, attr, _, value): self.model_filter = None if value == "All" else value self.update_data() def on_transmission_change(self, obj, attr, _, value): self.transmission_filter = None if value == "All" else value self.update_data() def on_drive_change(self, obj, attr, _, value): self.drive_filter = None if value == "All" else value self.update_data() def on_class_change(self, obj, attr, _, value): self.class_filter = None if value == "All" else value self.update_data() def update_data(self): df = mpg if self.manufacturer_filter: df = df[df["manufacturer"] == self.manufacturer_filter] if self.model_filter: df = df[df["model"] == self.model_filter] if self.transmission_filter: df = df[df["trans"] == self.transmission_filter] if self.drive_filter: df = df[df["drv"] == self.drive_filter] if self.class_filter: df = df[df["class"] == self.class_filter] self.source.data = ColumnDataSource.from_df(df) self.session.store_document(self.document) def run(self, do_view=False, poll_interval=0.5): link = self.session.object_link(self.document.context) print("Please visit %s to see the plots" % link) if do_view: view(link) print("\npress ctrl-C to exit") self.session.poll_document(self.document)
class DashBoard(object): def __init__(self, path, url): self.document = Document() self.session = Session(name=url, root_url=url) #self.session = Session('load_from_config=False') self.session.use_doc('crawler_dashboard') self.session.load_document(self.document) #self.harvest_source = ColumnDataSource(data=dict()) #self.domain_relevant_source = ColumnDataSource(data=dict()) #self.domain_crawled_source = ColumnDataSource(data=dict()) #self.domain_frontier_source = ColumnDataSource(data=dict()) #self.handson_source = ColumnDataSource(data=dict()) #self.termite_source = ColumnDataSource(data=dict()) self.harvest = Harvest(path) self.domain = Domain(path) #handson = Handson() #self.termite = Termite() self.document.add(self.create_layout()) self.session.store_document(self.document) def render(self): self.create_layout() self.document.add(self.layout) self.update_data() def create_layout(self): #button = Button(label="Randomize data", type="success") #button.on_click(update_data) #top_panel = HBox(children=[button, self.harvest.plot, self.harvest.rate_plot]) top_panel = HBox(children=[self.harvest.plot, self.harvest.rate_plot]) domains = VBox(children=[ self.domain.sort_relevant_plot, self.domain.sort_crawled_plot, self.domain.sort_frontier_plot ], width=200) #middle_panel = HBox(children=[domains, handson.plot]) middle_panel = HBox(children=[domains]) layout = VBox(children=[top_panel, middle_panel]) self.layout = layout return layout def update_data(self): self.harvest.source = self.harvest.update_source() self.domain.sort_relevant_source, self.domain.sort_crawled_source, self.domain.sort_frontier_source = self.domain.update_source( ) #self.termite.data, self.termite.source = self.termite.update_source() #self.session.store_objects(ds) self.session.store_document(self.document) def run(self, poll_interval=0.5): #link = self.session.object_link(self.document.context) #print("Please visit %s to see the plots (press ctrl-C to exit)" % link) try: while True: self.update_data() self.session.load_document(self.document) time.sleep(poll_interval) except KeyboardInterrupt: print() except ConnectionError: print("Connection to bokeh-server was terminated")
class DataTables(object): def __init__(self): self.document = Document() self.session = Session() self.session.use_doc('data_tables_server') self.session.load_document(self.document) self.manufacturer_filter = None self.model_filter = None self.transmission_filter = None self.drive_filter = None self.class_filter = None self.source = ColumnDataSource() self.update_data() self.document.add(self.create()) self.session.store_document(self.document) def create(self): manufacturers = sorted(mpg["manufacturer"].unique()) models = sorted(mpg["model"].unique()) transmissions = sorted(mpg["trans"].unique()) drives = sorted(mpg["drv"].unique()) classes = sorted(mpg["class"].unique()) manufacturer_select = Select(title="Manufacturer:", value="All", options=["All"] + manufacturers) manufacturer_select.on_change('value', self.on_manufacturer_change) model_select = Select(title="Model:", value="All", options=["All"] + models) model_select.on_change('value', self.on_model_change) transmission_select = Select(title="Transmission:", value="All", options=["All"] + transmissions) transmission_select.on_change('value', self.on_transmission_change) drive_select = Select(title="Drive:", value="All", options=["All"] + drives) drive_select.on_change('value', self.on_drive_change) class_select = Select(title="Class:", value="All", options=["All"] + classes) class_select.on_change('value', self.on_class_change) columns = [ TableColumn(field="manufacturer", header="Manufacturer", type="autocomplete", source=manufacturers), TableColumn(field="model", header="Model", type="autocomplete", source=models), TableColumn(field="displ", header="Displacement", type="numeric", format="0.00"), TableColumn(field="year", header="Year", type="numeric"), TableColumn(field="cyl", header="Cylinders", type="numeric"), TableColumn(field="trans", header="Transmission", type="dropdown", strict=True, source=transmissions), TableColumn(field="drv", header="Drive", type="autocomplete", strict=True, source=drives), TableColumn(field="class", header="Class", type="autocomplete", strict=True, source=classes), TableColumn(field="cty", header="City MPG", type="numeric"), TableColumn(field="hwy", header="Highway MPG", type="numeric"), ] handson_table = HandsonTable(source=self.source, columns=columns, sorting=True) xdr = DataRange1d(sources=[self.source.columns("index")]) #xdr = FactorRange(factors=manufacturers) ydr = DataRange1d(sources=[self.source.columns("cty"), self.source.columns("hwy")]) plot = Plot(title=None, data_sources=[self.source], x_range=xdr, y_range=ydr, plot_width=800, plot_height=300) xaxis = LinearAxis(plot=plot) plot.below.append(xaxis) yaxis = LinearAxis(plot=plot) ygrid = Grid(plot=plot, dimension=1, ticker=yaxis.ticker) plot.left.append(yaxis) cty = Glyph(data_source=self.source, glyph=Circle(x="index", y="cty", fill_color="green")) hwy = Glyph(data_source=self.source, glyph=Circle(x="index", y="hwy", fill_color="red")) select_tool = BoxSelectTool(renderers=[cty, hwy], select_y=False) plot.tools.append(select_tool) overlay = BoxSelectionOverlay(tool=select_tool) plot.renderers.extend([cty, hwy, ygrid, overlay]) controls = VBox(children=[manufacturer_select, model_select, transmission_select, drive_select, class_select], width=200) top_panel = HBox(children=[controls, plot]) layout = VBox(children=[top_panel, handson_table]) return layout def on_manufacturer_change(self, obj, attr, _, value): self.manufacturer_filter = None if value == "All" else value self.update_data() def on_model_change(self, obj, attr, _, value): self.model_filter = None if value == "All" else value self.update_data() def on_transmission_change(self, obj, attr, _, value): self.transmission_filter = None if value == "All" else value self.update_data() def on_drive_change(self, obj, attr, _, value): self.drive_filter = None if value == "All" else value self.update_data() def on_class_change(self, obj, attr, _, value): self.class_filter = None if value == "All" else value self.update_data() def update_data(self): df = mpg if self.manufacturer_filter: df = df[df["manufacturer"] == self.manufacturer_filter] if self.model_filter: df = df[df["model"] == self.model_filter] if self.transmission_filter: df = df[df["trans"] == self.transmission_filter] if self.drive_filter: df = df[df["drv"] == self.drive_filter] if self.class_filter: df = df[df["class"] == self.class_filter] self.source.data = ColumnDataSource.from_df(df) self.session.store_document(self.document) def run(self, poll_interval=0.5): link = self.session.object_link(self.document.context) print("Please visit %s to see the plots (press ctrl-C to exit)" % link) try: while True: self.session.load_document(self.document) time.sleep(poll_interval) except KeyboardInterrupt: print() except ConnectionError: print("Connection to bokeh-server was terminated")
class DataTables(object): def __init__(self): self.document = Document() self.session = Session() self.session.use_doc('data_tables_server') self.session.load_document(self.document) self.manufacturer_filter = None self.model_filter = None self.transmission_filter = None self.drive_filter = None self.class_filter = None self.source = ColumnDataSource() self.update_data() self.document.add(self.create()) self.session.store_document(self.document) def create(self): manufacturers = sorted(mpg["manufacturer"].unique()) models = sorted(mpg["model"].unique()) transmissions = sorted(mpg["trans"].unique()) drives = sorted(mpg["drv"].unique()) classes = sorted(mpg["class"].unique()) manufacturer_select = Select(title="Manufacturer:", value="All", options=["All"] + manufacturers) manufacturer_select.on_change('value', self.on_manufacturer_change) model_select = Select(title="Model:", value="All", options=["All"] + models) model_select.on_change('value', self.on_model_change) transmission_select = Select(title="Transmission:", value="All", options=["All"] + transmissions) transmission_select.on_change('value', self.on_transmission_change) drive_select = Select(title="Drive:", value="All", options=["All"] + drives) drive_select.on_change('value', self.on_drive_change) class_select = Select(title="Class:", value="All", options=["All"] + classes) class_select.on_change('value', self.on_class_change) columns = [ TableColumn(field="manufacturer", title="Manufacturer", editor=SelectEditor(options=manufacturers), formatter=StringFormatter(font_style="bold")), TableColumn(field="model", title="Model", editor=StringEditor(completions=models)), TableColumn(field="displ", title="Displacement", editor=NumberEditor(step=0.1), formatter=NumberFormatter(format="0.0")), TableColumn(field="year", title="Year", editor=IntEditor()), TableColumn(field="cyl", title="Cylinders", editor=IntEditor()), TableColumn(field="trans", title="Transmission", editor=SelectEditor(options=transmissions)), TableColumn(field="drv", title="Drive", editor=SelectEditor(options=drives)), TableColumn(field="class", title="Class", editor=SelectEditor(options=classes)), TableColumn(field="cty", title="City MPG", editor=IntEditor()), TableColumn(field="hwy", title="Highway MPG", editor=IntEditor()), ] data_table = DataTable(source=self.source, columns=columns, editable=True) xdr = DataRange1d() ydr = DataRange1d() plot = Plot(title=None, x_range=xdr, y_range=ydr, plot_width=800, plot_height=300) xaxis = LinearAxis(plot=plot) plot.below.append(xaxis) yaxis = LinearAxis(plot=plot) ygrid = Grid(plot=plot, dimension=1, ticker=yaxis.ticker) plot.left.append(yaxis) cty_glyph = Circle(x="index", y="cty", fill_color="#396285", size=8, fill_alpha=0.5, line_alpha=0.5) hwy_glyph = Circle(x="index", y="hwy", fill_color="#CE603D", size=8, fill_alpha=0.5, line_alpha=0.5) cty = GlyphRenderer(data_source=self.source, glyph=cty_glyph) hwy = GlyphRenderer(data_source=self.source, glyph=hwy_glyph) tooltips = [ ("Manufacturer", "@manufacturer"), ("Model", "@model"), ("Displacement", "@displ"), ("Year", "@year"), ("Cylinders", "@cyl"), ("Transmission", "@trans"), ("Drive", "@drv"), ("Class", "@class"), ] cty_hover_tool = HoverTool(plot=plot, renderers=[cty], tooltips=tooltips + [("City MPG", "@cty")]) hwy_hover_tool = HoverTool(plot=plot, renderers=[hwy], tooltips=tooltips + [("Highway MPG", "@hwy")]) select_tool = BoxSelectTool(plot=plot, renderers=[cty, hwy], dimensions=['width']) plot.tools.extend([cty_hover_tool, hwy_hover_tool, select_tool]) plot.renderers.extend([cty, hwy, ygrid]) controls = VBox(children=[ manufacturer_select, model_select, transmission_select, drive_select, class_select ], width=200) top_panel = HBox(children=[controls, plot]) layout = VBox(children=[top_panel, data_table]) return layout def on_manufacturer_change(self, obj, attr, _, value): self.manufacturer_filter = None if value == "All" else value self.update_data() def on_model_change(self, obj, attr, _, value): self.model_filter = None if value == "All" else value self.update_data() def on_transmission_change(self, obj, attr, _, value): self.transmission_filter = None if value == "All" else value self.update_data() def on_drive_change(self, obj, attr, _, value): self.drive_filter = None if value == "All" else value self.update_data() def on_class_change(self, obj, attr, _, value): self.class_filter = None if value == "All" else value self.update_data() def update_data(self): df = mpg if self.manufacturer_filter: df = df[df["manufacturer"] == self.manufacturer_filter] if self.model_filter: df = df[df["model"] == self.model_filter] if self.transmission_filter: df = df[df["trans"] == self.transmission_filter] if self.drive_filter: df = df[df["drv"] == self.drive_filter] if self.class_filter: df = df[df["class"] == self.class_filter] self.source.data = ColumnDataSource.from_df(df) self.session.store_document(self.document) def run(self, do_view=False, poll_interval=0.5): link = self.session.object_link(self.document.context) print("Please visit %s to see the plots" % link) if do_view: view(link) print("\npress ctrl-C to exit") self.session.poll_document(self.document)
class PlottingExtension(SimpleExtension): """Base class for extensions doing Bokeh plotting. Parameters ---------- document_name : str The name of the Bokeh document. Use a different name for each experiment if you are storing your plots. start_server : bool, optional Whether to try and start the Bokeh plotting server. Defaults to ``False``. The server started is not persistent i.e. after shutting it down you will lose your plots. If you want to store your plots, start the server manually using the ``bokeh-server`` command. Also see the warning above. server_url : str, optional Url of the bokeh-server. Ex: when starting the bokeh-server with ``bokeh-server --ip 0.0.0.0`` at ``alice``, server_url should be ``http://alice:5006``. When not specified the default configured by ``bokeh_server`` in ``.blocksrc`` will be used. Defaults to ``http://localhost:5006/``. clear_document : bool, optional Whether or not to clear the contents of the server-side document upon creation. If `False`, previously existing plots within the document will be kept. Defaults to `True`. """ def __init__(self, document_name, server_url=None, start_server=False, clear_document=True, **kwargs): self.document_name = document_name self.server_url = (config.bokeh_server if server_url is None else server_url) self.start_server = start_server self.sub = self._start_server_process() if self.start_server else None self.session = Session(root_url=self.server_url) self.document = Document() self._setup_document(clear_document) super(PlottingExtension, self).__init__(**kwargs) def _start_server_process(self): def preexec_fn(): """Prevents the server from dying on training interrupt.""" signal.signal(signal.SIGINT, signal.SIG_IGN) # Only memory works with subprocess, need to wait for it to start logger.info('Starting plotting server on localhost:5006') self.sub = Popen('bokeh-server --ip 0.0.0.0 ' '--backend memory'.split(), stdout=PIPE, stderr=PIPE, preexec_fn=preexec_fn) time.sleep(2) logger.info('Plotting server PID: {}'.format(self.sub.pid)) def _setup_document(self, clear_document=False): self.session.use_doc(self.document_name) self.session.load_document(self.document) if clear_document: self.document.clear() self._document_setup_done = True def __getstate__(self): state = self.__dict__.copy() state['sub'] = None state.pop('session', None) state.pop('_push_thread', None) return state def __setstate__(self, state): self.__dict__.update(state) if self.start_server: self._start_server_process() self.session = Session(root_url=self.server_url) self._document_setup_done = False def do(self, which_callback, *args): if not self._document_setup_done: self._setup_document() @property def push_thread(self): if not hasattr(self, '_push_thread'): self._push_thread = PushThread(self.session, self.document) self._push_thread.start() return self._push_thread def store(self, obj): self.push_thread.put(obj, PushThread.PUT) def push(self, which_callback): self.push_thread.put(which_callback, PushThread.PUSH)
class Population(object): year = 2010 location = "World" def __init__(self): from bokeh.models import ColumnDataSource from bokeh.document import Document from bokeh.session import Session from bokeh.sampledata.population import load_population self.document = Document() self.session = Session() self.session.use_doc('population_reveal') self.session.load_document(self.document) self.df = load_population() self.source_pyramid = ColumnDataSource(data=dict()) # just render at the initialization self._render() def _render(self): self.pyramid_plot() self.create_layout() self.document.add(self.layout) self.update_pyramid() def pyramid_plot(self): from bokeh.models import (Plot, DataRange1d, LinearAxis, Grid, Legend, SingleIntervalTicker) from bokeh.models.glyphs import Quad xdr = DataRange1d(sources=[self.source_pyramid.columns("male"), self.source_pyramid.columns("female")]) ydr = DataRange1d(sources=[self.source_pyramid.columns("groups")]) self.plot = Plot(title="Widgets", x_range=xdr, y_range=ydr, plot_width=600, plot_height=600) xaxis = LinearAxis() self.plot.add_layout(xaxis, 'below') yaxis = LinearAxis(ticker=SingleIntervalTicker(interval=5)) self.plot.add_layout(yaxis, 'left') self.plot.add_layout(Grid(dimension=0, ticker=xaxis.ticker)) self.plot.add_layout(Grid(dimension=1, ticker=yaxis.ticker)) male_quad = Quad(left="male", right=0, bottom="groups", top="shifted", fill_color="#3B8686") male_quad_glyph = self.plot.add_glyph(self.source_pyramid, male_quad) female_quad = Quad(left=0, right="female", bottom="groups", top="shifted", fill_color="#CFF09E") female_quad_glyph = self.plot.add_glyph(self.source_pyramid, female_quad) self.plot.add_layout(Legend(legends=dict(Male=[male_quad_glyph], Female=[female_quad_glyph]))) def on_year_change(self, obj, attr, old, new): self.year = int(new) self.update_pyramid() def on_location_change(self, obj, attr, old, new): self.location = new self.update_pyramid() def create_layout(self): from bokeh.models.widgets import Select, HBox, VBox years = list(map(str, sorted(self.df.Year.unique()))) locations = sorted(self.df.Location.unique()) year_select = Select(title="Year:", value="2010", options=years) location_select = Select(title="Location:", value="World", options=locations) year_select.on_change('value', self.on_year_change) location_select.on_change('value', self.on_location_change) controls = HBox(year_select, location_select) self.layout = VBox(controls, self.plot) def update_pyramid(self): pyramid = self.df[(self.df.Location == self.location) & (self.df.Year == self.year)] male = pyramid[pyramid.Sex == "Male"] female = pyramid[pyramid.Sex == "Female"] total = male.Value.sum() + female.Value.sum() male_percent = -male.Value / total female_percent = female.Value / total groups = male.AgeGrpStart.tolist() shifted = groups[1:] + [groups[-1] + 5] self.source_pyramid.data = dict( groups=groups, shifted=shifted, male=male_percent, female=female_percent, ) self.session.store_document(self.document)
class Population(object): year = 2010 location = "World" def __init__(self): from bokeh.document import Document from bokeh.session import Session from bokeh.models import ColumnDataSource from bokeh.sampledata.population import load_population self.document = Document() self.session = Session() self.session.use_doc('population') self.session.load_document(self.document) self.df = load_population() self.source_pyramid = ColumnDataSource(data=dict()) def render(self): self.pyramid_plot() self.create_layout() self.document.add(self.layout) self.update_pyramid() def pyramid_plot(self): from bokeh.models import ( Plot, DataRange1d, LinearAxis, Grid, Legend, SingleIntervalTicker ) from bokeh.models.glyphs import Quad xdr = DataRange1d() ydr = DataRange1d() self.plot = Plot(title=None, x_range=xdr, y_range=ydr, plot_width=600, plot_height=600) xaxis = LinearAxis() self.plot.add_layout(xaxis, 'below') yaxis = LinearAxis(ticker=SingleIntervalTicker(interval=5)) self.plot.add_layout(yaxis, 'left') self.plot.add_layout(Grid(dimension=0, ticker=xaxis.ticker)) self.plot.add_layout(Grid(dimension=1, ticker=yaxis.ticker)) male_quad = Quad(left="male", right=0, bottom="groups", top="shifted", fill_color="#3B8686") male_quad_glyph = self.plot.add_glyph(self.source_pyramid, male_quad) female_quad = Quad(left=0, right="female", bottom="groups", top="shifted", fill_color="#CFF09E") female_quad_glyph = self.plot.add_glyph(self.source_pyramid, female_quad) self.plot.add_layout(Legend(legends=dict(Male=[male_quad_glyph], Female=[female_quad_glyph]))) def on_year_change(self, obj, attr, old, new): self.year = int(new) self.update_pyramid() def on_location_change(self, obj, attr, old, new): self.location = new self.update_pyramid() def create_layout(self): from bokeh.models.widgets import Select, HBox, VBox years = list(map(str, sorted(self.df.Year.unique()))) locations = sorted(self.df.Location.unique()) year_select = Select(title="Year:", value="2010", options=years) location_select = Select(title="Location:", value="World", options=locations) year_select.on_change('value', self.on_year_change) location_select.on_change('value', self.on_location_change) controls = HBox(year_select, location_select) self.layout = VBox(controls, self.plot) def update_pyramid(self): pyramid = self.df[(self.df.Location == self.location) & (self.df.Year == self.year)] male = pyramid[pyramid.Sex == "Male"] female = pyramid[pyramid.Sex == "Female"] total = male.Value.sum() + female.Value.sum() male_percent = -male.Value / total female_percent = female.Value / total groups = male.AgeGrpStart.tolist() shifted = groups[1:] + [groups[-1] + 5] self.source_pyramid.data = dict( groups=groups, shifted=shifted, male=male_percent, female=female_percent, ) self.session.store_document(self.document)
class NutchUrlTrails: """ Class for managing URL Trails visualizations """ @staticmethod def strip_url(url): """ Make a URL safe for visualization in Bokeh server :param url: a URL to be shortened/stripped :return: The stripped URL """ # TODO: remove protocol-stripping on next Bokeh release stripped_url = url.replace('https://', '').replace('http://', '').replace(':', '_').replace('-', '_') if len(stripped_url) <= URL_CHAR_WIDTH: return stripped_url else: return stripped_url[:int(URL_CHAR_WIDTH/2)] + '...' + stripped_url[-int(URL_CHAR_WIDTH/2)-3:] @staticmethod def jtime_to_datetime(t): """ Convert a Java-format Epoch time stamp into np.datetime64 object :param t: Java-format Epoch time stamp (milliseconds) :return: A np.datetime64 scalar """ return np.datetime64(datetime.fromtimestamp(t/1000.0)) def __init__(self, crawl_name, num_urls=DEFAULT_NUM_URLS): """ Create a NutchUrlTrails instance for visualizing a running Nutch crawl in real-time using Bokeh :param name: The name of the crawl (as identified by the queue) :param num_urls: The number of URLs to display in the visualization :return: A NutchUrLTrails instance """ self.crawl_name = crawl_name self.num_urls = num_urls self.open_urls = {} self.closed_urls = {} self.old_segments = None self.old_circles = None self.session = Session() self.session.use_doc(self.crawl_name) self.document = Document() con = Connection() exchange = Exchange(EXCHANGE_NAME, 'direct', durable=False) queue = Queue(crawl_name, exchange=exchange, routing_key=crawl_name) self.queue = con.SimpleQueue(name=queue) def handle_messages(self): """ Get and parse up to 250 messages from the queue then plot. Break early if less. """ for i in range(250): try: m = self.queue.get(block=True, timeout=1) self.parse_message(m) except Empty: break self.plot_urls() def parse_message(self, message): """ Parse a single message arriving from the queue. Updates list of open/closed urls. :param message: A message from the queue """ print(message.body) message = json.loads(message.body) url = message["url"] if message["eventType"] == "START": self.open_urls[url] = NutchUrlTrails.jtime_to_datetime(message["timestamp"]) elif message["eventType"] == "END": if url in self.open_urls: self.closed_urls[url] = (self.open_urls[url], NutchUrlTrails.jtime_to_datetime(message["timestamp"])) del self.open_urls[url] else: # TODO: Log mismatched messages instead of just swallowing them pass else: raise Exception("Unexpected message type") def plot_urls(self): """ Visualize crawler activity by showing the most recently crawled URLs and the fetch time. """ self.session.load_document(self.document) plot = self.document.context.children[0] # don't plot if no URLs available if not (self.open_urls or self.closed_urls): return # x0/x0, left and right boundaries of segments, correspond to fetch time x0 = [] x = [] # y-axis, name of URL being fetched urls = [] # maintain x and URL of circles in a separate list circles = [] circle_urls = [] current_time = np.datetime64(datetime.now()) # For open URLs (not completed fetching), draw a segment from start time to now for url, start_t in self.open_urls.items(): url = NutchUrlTrails.strip_url(url) x0.append(start_t) x.append(current_time) urls.append(url) # For closed URLs (completed fetching), draw a segment from start to end time, and a circle as well. for url, (start_t, end_t) in self.closed_urls.items(): url = NutchUrlTrails.strip_url(url) x0.append(start_t) x.append(end_t) circles.append(end_t) urls.append(url) circle_urls.append(url) x0 = np.asarray(x0) x = np.asarray(x) circles = np.asarray(circles) # sort segments sort_index = np.argsort(x0)[::-1] x0 = x0[sort_index] x = x[sort_index] urls = [urls[i] for i in sort_index] # sort circles if self.closed_urls: circle_sort_index = np.argsort(circles)[::-1] circles = circles[circle_sort_index] circle_urls = [circle_urls[i] for i in circle_sort_index] # Filter to latest num_url URLs (ascending order) # filter segments active_x0 = x0[:self.num_urls] active_x = x[:self.num_urls] active_urls = urls[:self.num_urls] min_x = min(active_x0) plot.x_range.start = min_x plot.x_range.end = np.datetime64(datetime.now()) plot.y_range.factors = active_urls # make sure these are turned back on! # turn y axis grid lines back on for r in plot.renderers: if type(r) == Grid: r.grid_line_color = 'black' break # turn tickers and their labels back on plot.right[0].minor_tick_line_color = 'black' plot.right[0].major_tick_line_color = 'black' plot.right[0].major_label_text_font_size = '12pt' plot.below[0].minor_tick_line_color = 'black' plot.below[0].major_tick_line_color = 'black' plot.below[0].major_label_text_font_size = '12pt' # TODO: Find a more correct way to remove old segment/circle glyphs if self.old_circles: plot.renderers.pop() self.old_circles = None if self.old_segments: plot.renderers.pop() self.old_segments = None segment_source = ColumnDataSource(dict(x0=active_x0, x1=active_x, urls=active_urls)) self.old_segments = Segment(x0="x0", y0="urls", x1="x1", y1="urls", line_color="orange", line_width=10) plot.add_glyph(segment_source, self.old_segments) if self.closed_urls and PLOT_CIRCLES: # filter circles (some of these might not be displayed) active_circles = circles[:self.num_urls] active_circle_urls = circle_urls[:self.num_urls] circle_source = ColumnDataSource(dict(x=active_circles, urls=active_circle_urls)) self.old_circles = Circle(x="x", y="urls", size=12, fill_color="green", line_color="orange", line_width=2) plot.add_glyph(circle_source, self.old_circles) self.session.store_document(self.document, dirty_only=False)
class PlottingExtension(Callback): """Base class for extensions doing Bokeh plotting. Parameters ---------- document_name : str The name of the Bokeh document. Use a different name for each experiment if you are storing your plots. ##start_server : Removed, as it has a number of flaws inkl. zombi bokeh-server processes! server_url : str, optional Url of the bokeh-server. Ex: when starting the bokeh-server with ``bokeh-server --ip 0.0.0.0`` at ``alice``, server_url should be ``http://alice:5006``. Defaults to http://localhost:5006. clear_document : bool, optional Whether or not to clear the contents of the server-side document upon creation. If `False`, previously existing plots within the document will be kept. Defaults to `True`. """ def __init__(self, document_name, server_url=None, clear_document=True): super(PlottingExtension, self).__init__() self.document_name = document_name self.server_url = DEFAULT_SRV_URL if server_url is None else server_url self.session = Session(root_url=self.server_url) self.document = Document() self._setup_document(clear_document) def _setup_document(self, clear_document=False): self.session.use_doc(self.document_name) self.session.load_document(self.document) if clear_document: self.document.clear() self._document_setup_done = True def __getstate__(self): state = self.__dict__.copy() state.pop('_sub', None) state.pop('session', None) state.pop('_push_thread', None) return state def __setstate__(self, state): self.__dict__.update(state) self.session = Session(root_url=self.server_url) self._document_setup_done = False def on_callback(self, logs={}): if not self._document_setup_done: self._setup_document() @property def push_thread(self): if not hasattr(self, '_push_thread'): self._push_thread = PushThread(self.session, self.document) self._push_thread.start() return self._push_thread def store_data(self, obj): self.push_thread.put(obj, PushThread.PUT) def push_document(self, after_training=False): self.push_thread.put(after_training, PushThread.PUSH)
class NutchUrlTrails: """ Class for managing URL Trails visualizations """ @staticmethod def strip_url(url): """ Make a URL safe for visualization in Bokeh server :param url: a URL to be shortened/stripped :return: The stripped URL """ # TODO: remove protocol-stripping on next Bokeh release stripped_url = url.replace('https://', '').replace('http://', '').replace( ':', '_').replace('-', '_') if len(stripped_url) <= URL_CHAR_WIDTH: return stripped_url else: return stripped_url[:int(URL_CHAR_WIDTH / 2)] + '...' + stripped_url[ -int(URL_CHAR_WIDTH / 2) - 3:] @staticmethod def jtime_to_datetime(t): """ Convert a Java-format Epoch time stamp into np.datetime64 object :param t: Java-format Epoch time stamp (milliseconds) :return: A np.datetime64 scalar """ return np.datetime64(datetime.fromtimestamp(t / 1000.0)) def __init__(self, crawl_name, num_urls=DEFAULT_NUM_URLS): """ Create a NutchUrlTrails instance for visualizing a running Nutch crawl in real-time using Bokeh :param name: The name of the crawl (as identified by the queue) :param num_urls: The number of URLs to display in the visualization :return: A NutchUrLTrails instance """ self.crawl_name = crawl_name self.num_urls = num_urls self.open_urls = {} self.closed_urls = {} self.old_segments = None self.old_circles = None self.session = Session() self.session.use_doc(self.crawl_name) self.document = Document() con = Connection() exchange = Exchange(EXCHANGE_NAME, 'direct', durable=False) queue = Queue(crawl_name, exchange=exchange, routing_key=crawl_name) self.queue = con.SimpleQueue(name=queue) def handle_messages(self): """ Get and parse up to 250 messages from the queue then plot. Break early if less. """ for i in range(250): try: m = self.queue.get(block=True, timeout=1) self.parse_message(m) except Empty: break self.plot_urls() def parse_message(self, message): """ Parse a single message arriving from the queue. Updates list of open/closed urls. :param message: A message from the queue """ print(message.body) message = json.loads(message.body) url = message["url"] if message["eventType"] == "START": self.open_urls[url] = NutchUrlTrails.jtime_to_datetime( message["timestamp"]) elif message["eventType"] == "END": if url in self.open_urls: self.closed_urls[url] = (self.open_urls[url], NutchUrlTrails.jtime_to_datetime( message["timestamp"])) del self.open_urls[url] else: # TODO: Log mismatched messages instead of just swallowing them pass else: raise Exception("Unexpected message type") def plot_urls(self): """ Visualize crawler activity by showing the most recently crawled URLs and the fetch time. """ self.session.load_document(self.document) plot = self.document.context.children[0] # don't plot if no URLs available if not (self.open_urls or self.closed_urls): return # x0/x0, left and right boundaries of segments, correspond to fetch time x0 = [] x = [] # y-axis, name of URL being fetched urls = [] # maintain x and URL of circles in a separate list circles = [] circle_urls = [] current_time = np.datetime64(datetime.now()) # For open URLs (not completed fetching), draw a segment from start time to now for url, start_t in self.open_urls.items(): url = NutchUrlTrails.strip_url(url) x0.append(start_t) x.append(current_time) urls.append(url) # For closed URLs (completed fetching), draw a segment from start to end time, and a circle as well. for url, (start_t, end_t) in self.closed_urls.items(): url = NutchUrlTrails.strip_url(url) x0.append(start_t) x.append(end_t) circles.append(end_t) urls.append(url) circle_urls.append(url) x0 = np.asarray(x0) x = np.asarray(x) circles = np.asarray(circles) # sort segments sort_index = np.argsort(x0)[::-1] x0 = x0[sort_index] x = x[sort_index] urls = [urls[i] for i in sort_index] # sort circles if self.closed_urls: circle_sort_index = np.argsort(circles)[::-1] circles = circles[circle_sort_index] circle_urls = [circle_urls[i] for i in circle_sort_index] # Filter to latest num_url URLs (ascending order) # filter segments active_x0 = x0[:self.num_urls] active_x = x[:self.num_urls] active_urls = urls[:self.num_urls] min_x = min(active_x0) plot.x_range.start = min_x plot.x_range.end = np.datetime64(datetime.now()) plot.y_range.factors = active_urls # make sure these are turned back on! # turn y axis grid lines back on for r in plot.renderers: if type(r) == Grid: r.grid_line_color = 'black' break # turn tickers and their labels back on plot.right[0].minor_tick_line_color = 'black' plot.right[0].major_tick_line_color = 'black' plot.right[0].major_label_text_font_size = '12pt' plot.below[0].minor_tick_line_color = 'black' plot.below[0].major_tick_line_color = 'black' plot.below[0].major_label_text_font_size = '12pt' # TODO: Find a more correct way to remove old segment/circle glyphs if self.old_circles: plot.renderers.pop() self.old_circles = None if self.old_segments: plot.renderers.pop() self.old_segments = None segment_source = ColumnDataSource( dict(x0=active_x0, x1=active_x, urls=active_urls)) self.old_segments = Segment(x0="x0", y0="urls", x1="x1", y1="urls", line_color="orange", line_width=10) plot.add_glyph(segment_source, self.old_segments) if self.closed_urls and PLOT_CIRCLES: # filter circles (some of these might not be displayed) active_circles = circles[:self.num_urls] active_circle_urls = circle_urls[:self.num_urls] circle_source = ColumnDataSource( dict(x=active_circles, urls=active_circle_urls)) self.old_circles = Circle(x="x", y="urls", size=12, fill_color="green", line_color="orange", line_width=2) plot.add_glyph(circle_source, self.old_circles) self.session.store_document(self.document, dirty_only=False)
import pandas as pd from bokeh.objects import (Plot, ColumnDataSource, DataRange1d, FactorRange, LinearAxis, CategoricalAxis, Grid, Glyph) from bokeh.widgetobjects import (DateRangeSlider, HBox, VBox, Paragraph, Select, VBoxModelForm) from bokeh.glyphs import Rect from bokeh.document import Document from bokeh.session import Session import employment_data_reader as emp from employment_utils import get_country_for_byte, get_jobtype_for_byte document = Document() session = Session() session.use_doc('employment_server') session.load_document(document) days_of_week = { 0: "Monday", 1: "Tuesday", 2: "Wednesday", 3: "Thursday", 4: "Friday", 5: "Saturday", 6: "Sunday" } bounds_start = datetime.date(2012, 11, 1) bounds_end = datetime.date(2013, 12, 30) start = datetime.date(2013, 5, 26) end = datetime.date(2013, 7, 5) source_country = ColumnDataSource(data=dict())
class PlottingExtension(SimpleExtension): """Base class for extensions doing Bokeh plotting. Parameters ---------- document_name : str The name of the Bokeh document. Use a different name for each experiment if you are storing your plots. start_server : bool, optional Whether to try and start the Bokeh plotting server. Defaults to ``False``. The server started is not persistent i.e. after shutting it down you will lose your plots. If you want to store your plots, start the server manually using the ``bokeh-server`` command. Also see the warning above. server_url : str, optional Url of the bokeh-server. Ex: when starting the bokeh-server with ``bokeh-server --ip 0.0.0.0`` at ``alice``, server_url should be ``http://alice:5006``. When not specified the default configured by ``bokeh_server`` in ``.blocksrc`` will be used. Defaults to ``http://localhost:5006/``. clear_document : bool, optional Whether or not to clear the contents of the server-side document upon creation. If `False`, previously existing plots within the document will be kept. Defaults to `True`. """ def __init__(self, document_name, server_url=None, start_server=False, clear_document=True, **kwargs): self.document_name = document_name self.server_url = (config.bokeh_server if server_url is None else server_url) self.start_server = start_server self.sub = self._start_server_process() if self.start_server else None self.session = Session(root_url=self.server_url) self.document = Document() self._setup_document(clear_document) super(PlottingExtension, self).__init__(**kwargs) def _start_server_process(self): def preexec_fn(): """Prevents the server from dying on training interrupt.""" signal.signal(signal.SIGINT, signal.SIG_IGN) # Only memory works with subprocess, need to wait for it to start logger.info('Starting plotting server on localhost:5006') self.sub = Popen('bokeh-server --ip 0.0.0.0 ' '--backend memory'.split(), stdout=PIPE, stderr=PIPE, preexec_fn=preexec_fn) time.sleep(2) logger.info('Plotting server PID: {}'.format(self.sub.pid)) def _setup_document(self, clear_document=False): self.session.use_doc(self.document_name) self.session.load_document(self.document) if clear_document: self.document.clear() self._document_setup_done = True def __getstate__(self): state = self.__dict__.copy() state['sub'] = None state.pop('session', None) state.pop('_push_thread', None) return state def __setstate__(self, state): self.__dict__.update(state) if self.start_server: self._start_server_process() self.session = Session(root_url=self.server_url) self._document_setup_done = False def do(self, which_callback, *args): if not self._document_setup_done: self._setup_document() @property def push_thread(self): if not hasattr(self, '_push_thread'): self._push_thread = PushThread(self.session, self.document) self._push_thread.start() return self._push_thread def store(self, obj): self.push_thread.put(obj, PushThread.PUT) def push(self, which_callback): self.push_thread.put(which_callback, PushThread.PUSH)