def init_plot(crawl_name): session = Session() document = Document() session.use_doc(crawl_name) session.load_document(document) if document.context.children: plot = document.context.children[0] else: output_server(crawl_name) # TODO: Remove these when Bokeh is upgraded # placeholders or Bokeh can't inject properly current = np.datetime64(datetime.now()) xdr = Range1d(current, current + 1) ydr = ["urls"] # styling suggested by Bryan plot = figure(title="Crawler Monitor", tools="hover", x_axis_type="datetime", y_axis_location="right", x_range=xdr, y_range=ydr, width=1200, height=600) plot.toolbar_location = None plot.xgrid.grid_line_color = None document.add(plot) session.store_document(document) script = autoload_server(plot, session) #TODO: Looks like a Bokeh bug, probably not repeatable with current code script = script.replace("'modelid': u'", "'modelid': '") return script
def init_plot(crawl_name): session = Session() document = Document() session.use_doc(crawl_name) session.load_document(document) if document.context.children: plot = document.context.children[0] else: output_server(crawl_name) # TODO: Remove these when Bokeh is upgraded # placeholders or Bokeh can't inject properly current = np.datetime64(datetime.now()) xdr = Range1d(current, current + 1) ydr = ["urls"] # styling suggested by Bryan plot = figure(title="Crawler Monitor", tools="hover", x_axis_type="datetime", y_axis_location="right", x_range=xdr, y_range=ydr, width=1200, height=600) plot.toolbar_location = None plot.xgrid.grid_line_color = None document.add(plot) session.store_document(document) script = autoload_server(plot, session) #TODO: Looks like a Bokeh bug, probably not repeatable with current code script = script.replace("'modelid': u'", "'modelid': '") return script
def wrapper(*args, **kwargs): document = Document() session = Session(name=url, root_url=url) session.use_doc(document.docid) session.load_document(document) session.publish() document.autoadd = False document.autostore = False obj = func(*args, **kwargs) obj._docid = session.docid obj._root_url = session.root_url document.add(obj) session.store_document(document) return obj
def wrapper(*args, **kwargs): docname = prefix + str(uuid.uuid4()) session = Session(name=url, root_url=url) session.use_doc(docname) session.load_document(curdoc()) curdoc().autoadd = False curdoc().autostore = False obj = func(*args, **kwargs) tag = embed.autoload_server(obj, session) obj.tag = tag curdoc().add(obj) changed = session.store_document(curdoc()) logger.debug("stored: %s", str(changed)) return obj
def wrapper(*args, **kwargs): docname = prefix + str(uuid.uuid4()) session = Session(name=url, root_url=url) session.use_doc(docname) session.load_document(curdoc()) curdoc().autoadd = False curdoc().autostore = False obj = func(*args, **kwargs) tag = embed.autoload_server(obj, session) obj.tag = tag curdoc().add(obj) changed = session.store_document(curdoc()) logger.debug("stored: %s", str(changed)) return obj
class NutchUrlTrails: """ Class for managing URL Trails visualizations """ @staticmethod def strip_url(url): """ Make a URL safe for visualization in Bokeh server :param url: a URL to be shortened/stripped :return: The stripped URL """ # TODO: remove protocol-stripping on next Bokeh release stripped_url = url.replace('https://', '').replace('http://', '').replace( ':', '_').replace('-', '_') if len(stripped_url) <= URL_CHAR_WIDTH: return stripped_url else: return stripped_url[:int(URL_CHAR_WIDTH / 2)] + '...' + stripped_url[ -int(URL_CHAR_WIDTH / 2) - 3:] @staticmethod def jtime_to_datetime(t): """ Convert a Java-format Epoch time stamp into np.datetime64 object :param t: Java-format Epoch time stamp (milliseconds) :return: A np.datetime64 scalar """ return np.datetime64(datetime.fromtimestamp(t / 1000.0)) def __init__(self, crawl_name, num_urls=DEFAULT_NUM_URLS): """ Create a NutchUrlTrails instance for visualizing a running Nutch crawl in real-time using Bokeh :param name: The name of the crawl (as identified by the queue) :param num_urls: The number of URLs to display in the visualization :return: A NutchUrLTrails instance """ self.crawl_name = crawl_name self.num_urls = num_urls self.open_urls = {} self.closed_urls = {} self.old_segments = None self.old_circles = None self.session = Session() self.session.use_doc(self.crawl_name) self.document = Document() con = Connection() exchange = Exchange(EXCHANGE_NAME, 'direct', durable=False) queue = Queue(crawl_name, exchange=exchange, routing_key=crawl_name) self.queue = con.SimpleQueue(name=queue) def handle_messages(self): """ Get and parse up to 250 messages from the queue then plot. Break early if less. """ for i in range(250): try: m = self.queue.get(block=True, timeout=1) self.parse_message(m) except Empty: break self.plot_urls() def parse_message(self, message): """ Parse a single message arriving from the queue. Updates list of open/closed urls. :param message: A message from the queue """ print(message.body) message = json.loads(message.body) url = message["url"] if message["eventType"] == "START": self.open_urls[url] = NutchUrlTrails.jtime_to_datetime( message["timestamp"]) elif message["eventType"] == "END": if url in self.open_urls: self.closed_urls[url] = (self.open_urls[url], NutchUrlTrails.jtime_to_datetime( message["timestamp"])) del self.open_urls[url] else: # TODO: Log mismatched messages instead of just swallowing them pass else: raise Exception("Unexpected message type") def plot_urls(self): """ Visualize crawler activity by showing the most recently crawled URLs and the fetch time. """ self.session.load_document(self.document) plot = self.document.context.children[0] # don't plot if no URLs available if not (self.open_urls or self.closed_urls): return # x0/x0, left and right boundaries of segments, correspond to fetch time x0 = [] x = [] # y-axis, name of URL being fetched urls = [] # maintain x and URL of circles in a separate list circles = [] circle_urls = [] current_time = np.datetime64(datetime.now()) # For open URLs (not completed fetching), draw a segment from start time to now for url, start_t in self.open_urls.items(): url = NutchUrlTrails.strip_url(url) x0.append(start_t) x.append(current_time) urls.append(url) # For closed URLs (completed fetching), draw a segment from start to end time, and a circle as well. for url, (start_t, end_t) in self.closed_urls.items(): url = NutchUrlTrails.strip_url(url) x0.append(start_t) x.append(end_t) circles.append(end_t) urls.append(url) circle_urls.append(url) x0 = np.asarray(x0) x = np.asarray(x) circles = np.asarray(circles) # sort segments sort_index = np.argsort(x0)[::-1] x0 = x0[sort_index] x = x[sort_index] urls = [urls[i] for i in sort_index] # sort circles if self.closed_urls: circle_sort_index = np.argsort(circles)[::-1] circles = circles[circle_sort_index] circle_urls = [circle_urls[i] for i in circle_sort_index] # Filter to latest num_url URLs (ascending order) # filter segments active_x0 = x0[:self.num_urls] active_x = x[:self.num_urls] active_urls = urls[:self.num_urls] min_x = min(active_x0) plot.x_range.start = min_x plot.x_range.end = np.datetime64(datetime.now()) plot.y_range.factors = active_urls # make sure these are turned back on! # turn y axis grid lines back on for r in plot.renderers: if type(r) == Grid: r.grid_line_color = 'black' break # turn tickers and their labels back on plot.right[0].minor_tick_line_color = 'black' plot.right[0].major_tick_line_color = 'black' plot.right[0].major_label_text_font_size = '12pt' plot.below[0].minor_tick_line_color = 'black' plot.below[0].major_tick_line_color = 'black' plot.below[0].major_label_text_font_size = '12pt' # TODO: Find a more correct way to remove old segment/circle glyphs if self.old_circles: plot.renderers.pop() self.old_circles = None if self.old_segments: plot.renderers.pop() self.old_segments = None segment_source = ColumnDataSource( dict(x0=active_x0, x1=active_x, urls=active_urls)) self.old_segments = Segment(x0="x0", y0="urls", x1="x1", y1="urls", line_color="orange", line_width=10) plot.add_glyph(segment_source, self.old_segments) if self.closed_urls and PLOT_CIRCLES: # filter circles (some of these might not be displayed) active_circles = circles[:self.num_urls] active_circle_urls = circle_urls[:self.num_urls] circle_source = ColumnDataSource( dict(x=active_circles, urls=active_circle_urls)) self.old_circles = Circle(x="x", y="urls", size=12, fill_color="green", line_color="orange", line_width=2) plot.add_glyph(circle_source, self.old_circles) self.session.store_document(self.document, dirty_only=False)
class DataTables(object): def __init__(self): self.document = Document() self.session = Session() self.session.use_doc('data_tables_server') self.session.load_document(self.document) self.manufacturer_filter = None self.model_filter = None self.transmission_filter = None self.drive_filter = None self.class_filter = None self.source = ColumnDataSource() self.update_data() self.document.add(self.create()) self.session.store_document(self.document) def create(self): manufacturers = sorted(mpg["manufacturer"].unique()) models = sorted(mpg["model"].unique()) transmissions = sorted(mpg["trans"].unique()) drives = sorted(mpg["drv"].unique()) classes = sorted(mpg["class"].unique()) manufacturer_select = Select(title="Manufacturer:", value="All", options=["All"] + manufacturers) manufacturer_select.on_change('value', self.on_manufacturer_change) model_select = Select(title="Model:", value="All", options=["All"] + models) model_select.on_change('value', self.on_model_change) transmission_select = Select(title="Transmission:", value="All", options=["All"] + transmissions) transmission_select.on_change('value', self.on_transmission_change) drive_select = Select(title="Drive:", value="All", options=["All"] + drives) drive_select.on_change('value', self.on_drive_change) class_select = Select(title="Class:", value="All", options=["All"] + classes) class_select.on_change('value', self.on_class_change) columns = [ TableColumn(field="manufacturer", header="Manufacturer", type="autocomplete", source=manufacturers), TableColumn(field="model", header="Model", type="autocomplete", source=models), TableColumn(field="displ", header="Displacement", type="numeric", format="0.00"), TableColumn(field="year", header="Year", type="numeric"), TableColumn(field="cyl", header="Cylinders", type="numeric"), TableColumn(field="trans", header="Transmission", type="dropdown", strict=True, source=transmissions), TableColumn(field="drv", header="Drive", type="autocomplete", strict=True, source=drives), TableColumn(field="class", header="Class", type="autocomplete", strict=True, source=classes), TableColumn(field="cty", header="City MPG", type="numeric"), TableColumn(field="hwy", header="Highway MPG", type="numeric"), ] handson_table = HandsonTable(source=self.source, columns=columns, sorting=True) xdr = DataRange1d(sources=[self.source.columns("index")]) #xdr = FactorRange(factors=manufacturers) ydr = DataRange1d(sources=[self.source.columns("cty"), self.source.columns("hwy")]) plot = Plot(title=None, data_sources=[self.source], x_range=xdr, y_range=ydr, plot_width=800, plot_height=300) xaxis = LinearAxis(plot=plot) plot.below.append(xaxis) yaxis = LinearAxis(plot=plot) ygrid = Grid(plot=plot, dimension=1, ticker=yaxis.ticker) plot.left.append(yaxis) cty = Glyph(data_source=self.source, glyph=Circle(x="index", y="cty", fill_color="green")) hwy = Glyph(data_source=self.source, glyph=Circle(x="index", y="hwy", fill_color="red")) select_tool = BoxSelectTool(renderers=[cty, hwy], select_y=False) plot.tools.append(select_tool) overlay = BoxSelectionOverlay(tool=select_tool) plot.renderers.extend([cty, hwy, ygrid, overlay]) controls = VBox(children=[manufacturer_select, model_select, transmission_select, drive_select, class_select], width=200) top_panel = HBox(children=[controls, plot]) layout = VBox(children=[top_panel, handson_table]) return layout def on_manufacturer_change(self, obj, attr, _, value): self.manufacturer_filter = None if value == "All" else value self.update_data() def on_model_change(self, obj, attr, _, value): self.model_filter = None if value == "All" else value self.update_data() def on_transmission_change(self, obj, attr, _, value): self.transmission_filter = None if value == "All" else value self.update_data() def on_drive_change(self, obj, attr, _, value): self.drive_filter = None if value == "All" else value self.update_data() def on_class_change(self, obj, attr, _, value): self.class_filter = None if value == "All" else value self.update_data() def update_data(self): df = mpg if self.manufacturer_filter: df = df[df["manufacturer"] == self.manufacturer_filter] if self.model_filter: df = df[df["model"] == self.model_filter] if self.transmission_filter: df = df[df["trans"] == self.transmission_filter] if self.drive_filter: df = df[df["drv"] == self.drive_filter] if self.class_filter: df = df[df["class"] == self.class_filter] self.source.data = ColumnDataSource.from_df(df) self.session.store_document(self.document) def run(self, poll_interval=0.5): link = self.session.object_link(self.document.context) print("Please visit %s to see the plots (press ctrl-C to exit)" % link) try: while True: self.session.load_document(self.document) time.sleep(poll_interval) except KeyboardInterrupt: print() except ConnectionError: print("Connection to bokeh-server was terminated")
dropdown = Dropdown(label="Dropdown button", type="warning", menu=menu) dropdown.on_click(dropdown_handler) menu = [("Item 1", "foo"), ("Item 2", "bar"), None, ("Item 3", "baz")] split = Dropdown(label="Split button", type="danger", menu=menu, default_action="baz") split.on_click(split_handler) checkbox_group = CheckboxGroup(labels=["Option 1", "Option 2", "Option 3"], active=[0, 1]) checkbox_group.on_click(checkbox_group_handler) radio_group = RadioGroup(labels=["Option 1", "Option 2", "Option 3"], active=0) radio_group.on_click(radio_group_handler) checkbox_button_group = CheckboxButtonGroup(labels=["Option 1", "Option 2", "Option 3"], active=[0, 1]) checkbox_button_group.on_click(checkbox_button_group_handler) radio_button_group = RadioButtonGroup(labels=["Option 1", "Option 2", "Option 3"], active=0) radio_button_group.on_click(radio_button_group_handler) vbox = VBox(children=[button, toggle, dropdown, split, checkbox_group, radio_group, checkbox_button_group, radio_button_group]) document.add(vbox) session.store_document(document) if __name__ == "__main__": link = session.object_link(document.context) print("Please visit %s to see the plots" % link) view(link) print("\npress ctrl-C to exit") session.poll_document(document)
def make_layout(): plot, source = make_plot() columns = [ TableColumn(field="dates", title="Date", editor=DateEditor(), formatter=DateFormatter()), TableColumn(field="downloads", title="Downloads", editor=IntEditor()), ] data_table = DataTable(source=source, columns=columns, width=400, height=400, editable=True) button = Button(label="Randomize data", type="success") button.on_click(click_handler) buttons = VBox(children=[button]) vbox = VBox(children=[buttons, plot, data_table]) return vbox document.add(make_layout()) session.store_document(document) if __name__ == "__main__": link = session.object_link(document.context) print("Please visit %s to see the plots" % link) view(link) print("\npress ctrl-C to exit") session.poll_document(document)
class Population(object): year = 2010 location = "World" def __init__(self): from bokeh.objects import ColumnDataSource from bokeh.document import Document from bokeh.session import Session from bokeh.sampledata.population import load_population self.document = Document() self.session = Session() self.session.use_doc('population_reveal') self.session.load_document(self.document) self.df = load_population() self.source_pyramid = ColumnDataSource(data=dict()) def render(self): self.pyramid_plot() self.create_layout() self.document.add(self.layout) self.update_pyramid() def pyramid_plot(self): from bokeh.objects import (Plot, DataRange1d, LinearAxis, Grid, Glyph, Legend, SingleIntervalTicker) from bokeh.glyphs import Quad xdr = DataRange1d(sources=[self.source_pyramid.columns("male"), self.source_pyramid.columns("female")]) ydr = DataRange1d(sources=[self.source_pyramid.columns("groups")]) self.plot = Plot(title="Widgets", x_range=xdr, y_range=ydr, plot_width=600, plot_height=600) xaxis = LinearAxis(plot=self.plot) self.plot.below.append(xaxis) yaxis = LinearAxis(plot=self.plot, ticker=SingleIntervalTicker(interval=5)) self.plot.left.append(yaxis) xgrid = Grid(plot=self.plot, dimension=0, ticker=xaxis.ticker) ygrid = Grid(plot=self.plot, dimension=1, ticker=yaxis.ticker) male_quad = Quad(left="male", right=0, bottom="groups", top="shifted", fill_color="blue") male_quad_glyph = Glyph(data_source=self.source_pyramid, xdata_range=xdr, ydata_range=ydr, glyph=male_quad) self.plot.renderers.append(male_quad_glyph) female_quad = Quad(left=0, right="female", bottom="groups", top="shifted", fill_color="violet") female_quad_glyph = Glyph(data_source=self.source_pyramid, xdata_range=xdr, ydata_range=ydr, glyph=female_quad) self.plot.renderers.append(female_quad_glyph) legend = Legend(plot=self.plot, legends=dict(Male=[male_quad_glyph], Female=[female_quad_glyph])) self.plot.renderers.append(legend) def on_year_change(self, obj, attr, old, new): self.year = int(new) self.update_pyramid() def on_location_change(self, obj, attr, old, new): self.location = new self.update_pyramid() def create_layout(self): from bokeh.widgetobjects import Select, HBox, VBox years = list(map(str, sorted(self.df.Year.unique()))) locations = sorted(self.df.Location.unique()) year_select = Select(title="Year:", value="2010", options=years) location_select = Select(title="Location:", value="World", options=locations) year_select.on_change('value', self.on_year_change) location_select.on_change('value', self.on_location_change) controls = HBox(children=[year_select, location_select]) self.layout = VBox(children=[controls, self.plot]) def update_pyramid(self): pyramid = self.df[(self.df.Location == self.location) & (self.df.Year == self.year)] male = pyramid[pyramid.Sex == "Male"] female = pyramid[pyramid.Sex == "Female"] total = male.Value.sum() + female.Value.sum() male_percent = -male.Value / total female_percent = female.Value / total groups = male.AgeGrpStart.tolist() shifted = groups[1:] + [groups[-1] + 5] self.source_pyramid.data = dict( groups=groups, shifted=shifted, male=male_percent, female=female_percent, ) self.session.store_document(self.document)
class Dashboard(object): """ A collection of plots and widgets served by bokeh-server. """ def __init__(self, port = BK_SERVER_PORT, doc_name = DOC_NAME, colors = COLORS, show = False, ): """ Initialize Bokeh session and document. """ self.document = Document() self.port = port self.show = show self.doc_name = doc_name name="http://*****:*****@type"), ]) seabornify(plot) return plot # Update Routine # -------------- def update(self): self.session.store_objects(self.data_source) # Run # --- def run(self, poll_interval=1): self.session.store_document(self.document) link = self.session.object_link(self.document.context) if self.show: import webbrowser webbrowser.open(link) else: print("Please visit %s to see the plots (press ctrl-C to exit)" % link) try: while True: self.session.load_document(self.document) sleep(poll_interval) except KeyboardInterrupt: print() except ConnectionError: print("Connection to bokeh-server was terminated.") if __name__ == "__main__": args = parse_args() dashboard = Dashboard(**vars(args)) dashboard.run()
class DashBoard(object): def __init__(self): self.document = Document() self.session = Session() self.session.use_doc('crawler_dashboard') self.session.load_document(self.document) #self.harvest_source = ColumnDataSource(data=dict()) #self.domain_relevant_source = ColumnDataSource(data=dict()) #self.domain_crawled_source = ColumnDataSource(data=dict()) #self.domain_frontier_source = ColumnDataSource(data=dict()) #self.handson_source = ColumnDataSource(data=dict()) #self.termite_source = ColumnDataSource(data=dict()) self.harvest = Harvest() self.domain = Domain() #handson = Handson() self.termite = Termite() self.document.add(self.create_layout()) self.session.store_document(self.document) def render(self): self.create_layout() self.document.add(self.layout) self.update_data() def create_layout(self): #button = Button(label="Randomize data", type="success") #button.on_click(update_data) #top_panel = HBox(children=[button, self.harvest.plot, self.harvest.rate_plot]) top_panel = HBox(children=[self.harvest.plot, self.harvest.rate_plot]) domains = VBox(children=[self.domain.sort_relevant_plot, self.domain.sort_crawled_plot, self.domain.sort_frontier_plot], width=200) #middle_panel = HBox(children=[domains, handson.plot]) middle_panel = HBox(children=[domains]) layout = VBox(children=[top_panel, middle_panel, self.termite.plot]) self.layout = layout return layout def update_data(self): self.harvest.source = self.harvest.update_source() self.domain.sort_relevant_source, self.domain.sort_crawled_source, self.domain.sort_frontier_source = self.domain.update_source() self.termite.data, self.termite.source = self.termite.update_source() #self.session.store_objects(ds) self.session.store_document(self.document) def run(self, poll_interval=0.5): #link = self.session.object_link(self.document.context) #print("Please visit %s to see the plots (press ctrl-C to exit)" % link) try: while True: self.update_data() self.session.load_document(self.document) time.sleep(poll_interval) except KeyboardInterrupt: print() except ConnectionError: print("Connection to bokeh-server was terminated")
class DataTables(object): def __init__(self): self.document = Document() self.session = Session() self.session.use_doc('data_tables_server') self.session.load_document(self.document) self.manufacturer_filter = None self.model_filter = None self.transmission_filter = None self.drive_filter = None self.class_filter = None self.source = ColumnDataSource() self.update_data() self.document.add(self.create()) self.session.store_document(self.document) def create(self): manufacturers = sorted(mpg["manufacturer"].unique()) models = sorted(mpg["model"].unique()) transmissions = sorted(mpg["trans"].unique()) drives = sorted(mpg["drv"].unique()) classes = sorted(mpg["class"].unique()) manufacturer_select = Select(title="Manufacturer:", value="All", options=["All"] + manufacturers) manufacturer_select.on_change('value', self.on_manufacturer_change) model_select = Select(title="Model:", value="All", options=["All"] + models) model_select.on_change('value', self.on_model_change) transmission_select = Select(title="Transmission:", value="All", options=["All"] + transmissions) transmission_select.on_change('value', self.on_transmission_change) drive_select = Select(title="Drive:", value="All", options=["All"] + drives) drive_select.on_change('value', self.on_drive_change) class_select = Select(title="Class:", value="All", options=["All"] + classes) class_select.on_change('value', self.on_class_change) columns = [ TableColumn(field="manufacturer", title="Manufacturer", editor=SelectEditor(options=manufacturers), formatter=StringFormatter(font_style="bold")), TableColumn(field="model", title="Model", editor=StringEditor(completions=models)), TableColumn(field="displ", title="Displacement", editor=NumberEditor(step=0.1), formatter=NumberFormatter(format="0.0")), TableColumn(field="year", title="Year", editor=IntEditor()), TableColumn(field="cyl", title="Cylinders", editor=IntEditor()), TableColumn(field="trans", title="Transmission", editor=SelectEditor(options=transmissions)), TableColumn(field="drv", title="Drive", editor=SelectEditor(options=drives)), TableColumn(field="class", title="Class", editor=SelectEditor(options=classes)), TableColumn(field="cty", title="City MPG", editor=IntEditor()), TableColumn(field="hwy", title="Highway MPG", editor=IntEditor()), ] data_table = DataTable(source=self.source, columns=columns, editable=True) xdr = DataRange1d() ydr = DataRange1d() plot = Plot(title=None, x_range=xdr, y_range=ydr, plot_width=800, plot_height=300) xaxis = LinearAxis(plot=plot) plot.below.append(xaxis) yaxis = LinearAxis(plot=plot) ygrid = Grid(plot=plot, dimension=1, ticker=yaxis.ticker) plot.left.append(yaxis) cty_glyph = Circle(x="index", y="cty", fill_color="#396285", size=8, fill_alpha=0.5, line_alpha=0.5) hwy_glyph = Circle(x="index", y="hwy", fill_color="#CE603D", size=8, fill_alpha=0.5, line_alpha=0.5) cty = GlyphRenderer(data_source=self.source, glyph=cty_glyph) hwy = GlyphRenderer(data_source=self.source, glyph=hwy_glyph) tooltips = [ ("Manufacturer", "@manufacturer"), ("Model", "@model"), ("Displacement", "@displ"), ("Year", "@year"), ("Cylinders", "@cyl"), ("Transmission", "@trans"), ("Drive", "@drv"), ("Class", "@class"), ] cty_hover_tool = HoverTool(plot=plot, renderers=[cty], tooltips=tooltips + [("City MPG", "@cty")]) hwy_hover_tool = HoverTool(plot=plot, renderers=[hwy], tooltips=tooltips + [("Highway MPG", "@hwy")]) select_tool = BoxSelectTool(plot=plot, renderers=[cty, hwy], dimensions=['width']) plot.tools.extend([cty_hover_tool, hwy_hover_tool, select_tool]) plot.renderers.extend([cty, hwy, ygrid]) controls = VBox(children=[manufacturer_select, model_select, transmission_select, drive_select, class_select], width=200) top_panel = HBox(children=[controls, plot]) layout = VBox(children=[top_panel, data_table]) return layout def on_manufacturer_change(self, obj, attr, _, value): self.manufacturer_filter = None if value == "All" else value self.update_data() def on_model_change(self, obj, attr, _, value): self.model_filter = None if value == "All" else value self.update_data() def on_transmission_change(self, obj, attr, _, value): self.transmission_filter = None if value == "All" else value self.update_data() def on_drive_change(self, obj, attr, _, value): self.drive_filter = None if value == "All" else value self.update_data() def on_class_change(self, obj, attr, _, value): self.class_filter = None if value == "All" else value self.update_data() def update_data(self): df = mpg if self.manufacturer_filter: df = df[df["manufacturer"] == self.manufacturer_filter] if self.model_filter: df = df[df["model"] == self.model_filter] if self.transmission_filter: df = df[df["trans"] == self.transmission_filter] if self.drive_filter: df = df[df["drv"] == self.drive_filter] if self.class_filter: df = df[df["class"] == self.class_filter] self.source.data = ColumnDataSource.from_df(df) self.session.store_document(self.document) def run(self, do_view=False, poll_interval=0.5): link = self.session.object_link(self.document.context) print("Please visit %s to see the plots" % link) if do_view: view(link) print("\npress ctrl-C to exit") self.session.poll_document(self.document)
class DataTables(object): def __init__(self): self.document = Document() self.session = Session() self.session.use_doc('data_tables_server') self.session.load_document(self.document) self.manufacturer_filter = None self.model_filter = None self.transmission_filter = None self.drive_filter = None self.class_filter = None self.source = ColumnDataSource() self.update_data() self.document.add(self.create()) self.session.store_document(self.document) def create(self): manufacturers = sorted(mpg["manufacturer"].unique()) models = sorted(mpg["model"].unique()) transmissions = sorted(mpg["trans"].unique()) drives = sorted(mpg["drv"].unique()) classes = sorted(mpg["class"].unique()) manufacturer_select = Select(title="Manufacturer:", value="All", options=["All"] + manufacturers) manufacturer_select.on_change('value', self.on_manufacturer_change) model_select = Select(title="Model:", value="All", options=["All"] + models) model_select.on_change('value', self.on_model_change) transmission_select = Select(title="Transmission:", value="All", options=["All"] + transmissions) transmission_select.on_change('value', self.on_transmission_change) drive_select = Select(title="Drive:", value="All", options=["All"] + drives) drive_select.on_change('value', self.on_drive_change) class_select = Select(title="Class:", value="All", options=["All"] + classes) class_select.on_change('value', self.on_class_change) columns = [ TableColumn(field="manufacturer", header="Manufacturer", type="autocomplete", source=manufacturers), TableColumn(field="model", header="Model", type="autocomplete", source=models), TableColumn(field="displ", header="Displacement", type="numeric", format="0.00"), TableColumn(field="year", header="Year", type="numeric"), TableColumn(field="cyl", header="Cylinders", type="numeric"), TableColumn(field="trans", header="Transmission", type="dropdown", strict=True, source=transmissions), TableColumn(field="drv", header="Drive", type="autocomplete", strict=True, source=drives), TableColumn(field="class", header="Class", type="autocomplete", strict=True, source=classes), TableColumn(field="cty", header="City MPG", type="numeric"), TableColumn(field="hwy", header="Highway MPG", type="numeric"), ] handson_table = HandsonTable(source=self.source, columns=columns, sorting=True) xdr = DataRange1d(sources=[self.source.columns("index")]) #xdr = FactorRange(factors=manufacturers) ydr = DataRange1d( sources=[self.source.columns("cty"), self.source.columns("hwy")]) plot = Plot(title=None, data_sources=[self.source], x_range=xdr, y_range=ydr, plot_width=800, plot_height=300) xaxis = LinearAxis(plot=plot) plot.below.append(xaxis) yaxis = LinearAxis(plot=plot) ygrid = Grid(plot=plot, dimension=1, ticker=yaxis.ticker) plot.left.append(yaxis) cty = Glyph(data_source=self.source, glyph=Circle(x="index", y="cty", fill_color="#396285", size=8, fill_alpha=0.5, line_alpha=0.5)) hwy = Glyph(data_source=self.source, glyph=Circle(x="index", y="hwy", fill_color="#CE603D", size=8, fill_alpha=0.5, line_alpha=0.5)) select_tool = BoxSelectTool(renderers=[cty, hwy], select_y=False) plot.tools.append(select_tool) overlay = BoxSelectionOverlay(tool=select_tool) plot.renderers.extend([cty, hwy, ygrid, overlay]) controls = VBox(children=[ manufacturer_select, model_select, transmission_select, drive_select, class_select ], width=200) top_panel = HBox(children=[controls, plot]) layout = VBox(children=[top_panel, handson_table]) return layout def on_manufacturer_change(self, obj, attr, _, value): self.manufacturer_filter = None if value == "All" else value self.update_data() def on_model_change(self, obj, attr, _, value): self.model_filter = None if value == "All" else value self.update_data() def on_transmission_change(self, obj, attr, _, value): self.transmission_filter = None if value == "All" else value self.update_data() def on_drive_change(self, obj, attr, _, value): self.drive_filter = None if value == "All" else value self.update_data() def on_class_change(self, obj, attr, _, value): self.class_filter = None if value == "All" else value self.update_data() def update_data(self): df = mpg if self.manufacturer_filter: df = df[df["manufacturer"] == self.manufacturer_filter] if self.model_filter: df = df[df["model"] == self.model_filter] if self.transmission_filter: df = df[df["trans"] == self.transmission_filter] if self.drive_filter: df = df[df["drv"] == self.drive_filter] if self.class_filter: df = df[df["class"] == self.class_filter] self.source.data = ColumnDataSource.from_df(df) self.session.store_document(self.document) def run(self, do_view=False, poll_interval=0.5): link = self.session.object_link(self.document.context) print("Please visit %s to see the plots" % link) if do_view: view(link) print("\npress ctrl-C to exit") self.session.poll_document(self.document)
class DashBoard(object): def __init__(self, path, url): self.document = Document() self.session = Session(name=url, root_url=url) #self.session = Session('load_from_config=False') self.session.use_doc('crawler_dashboard') self.session.load_document(self.document) #self.harvest_source = ColumnDataSource(data=dict()) #self.domain_relevant_source = ColumnDataSource(data=dict()) #self.domain_crawled_source = ColumnDataSource(data=dict()) #self.domain_frontier_source = ColumnDataSource(data=dict()) #self.handson_source = ColumnDataSource(data=dict()) #self.termite_source = ColumnDataSource(data=dict()) self.harvest = Harvest(path) self.domain = Domain(path) #handson = Handson() #self.termite = Termite() self.document.add(self.create_layout()) self.session.store_document(self.document) def render(self): self.create_layout() self.document.add(self.layout) self.update_data() def create_layout(self): #button = Button(label="Randomize data", type="success") #button.on_click(update_data) #top_panel = HBox(children=[button, self.harvest.plot, self.harvest.rate_plot]) top_panel = HBox(children=[self.harvest.plot, self.harvest.rate_plot]) domains = VBox(children=[ self.domain.sort_relevant_plot, self.domain.sort_crawled_plot, self.domain.sort_frontier_plot ], width=200) #middle_panel = HBox(children=[domains, handson.plot]) middle_panel = HBox(children=[domains]) layout = VBox(children=[top_panel, middle_panel]) self.layout = layout return layout def update_data(self): self.harvest.source = self.harvest.update_source() self.domain.sort_relevant_source, self.domain.sort_crawled_source, self.domain.sort_frontier_source = self.domain.update_source( ) #self.termite.data, self.termite.source = self.termite.update_source() #self.session.store_objects(ds) self.session.store_document(self.document) def run(self, poll_interval=0.5): #link = self.session.object_link(self.document.context) #print("Please visit %s to see the plots (press ctrl-C to exit)" % link) try: while True: self.update_data() self.session.load_document(self.document) time.sleep(poll_interval) except KeyboardInterrupt: print() except ConnectionError: print("Connection to bokeh-server was terminated")
class DataTables(object): def __init__(self): self.document = Document() self.session = Session() self.session.use_doc('data_tables_server') self.session.load_document(self.document) self.manufacturer_filter = None self.model_filter = None self.transmission_filter = None self.drive_filter = None self.class_filter = None self.source = ColumnDataSource() self.update_data() self.document.add(self.create()) self.session.store_document(self.document) def create(self): manufacturers = sorted(mpg["manufacturer"].unique()) models = sorted(mpg["model"].unique()) transmissions = sorted(mpg["trans"].unique()) drives = sorted(mpg["drv"].unique()) classes = sorted(mpg["class"].unique()) manufacturer_select = Select(title="Manufacturer:", value="All", options=["All"] + manufacturers) manufacturer_select.on_change('value', self.on_manufacturer_change) model_select = Select(title="Model:", value="All", options=["All"] + models) model_select.on_change('value', self.on_model_change) transmission_select = Select(title="Transmission:", value="All", options=["All"] + transmissions) transmission_select.on_change('value', self.on_transmission_change) drive_select = Select(title="Drive:", value="All", options=["All"] + drives) drive_select.on_change('value', self.on_drive_change) class_select = Select(title="Class:", value="All", options=["All"] + classes) class_select.on_change('value', self.on_class_change) columns = [ TableColumn(field="manufacturer", title="Manufacturer", editor=SelectEditor(options=manufacturers), formatter=StringFormatter(font_style="bold")), TableColumn(field="model", title="Model", editor=StringEditor(completions=models)), TableColumn(field="displ", title="Displacement", editor=NumberEditor(step=0.1), formatter=NumberFormatter(format="0.0")), TableColumn(field="year", title="Year", editor=IntEditor()), TableColumn(field="cyl", title="Cylinders", editor=IntEditor()), TableColumn(field="trans", title="Transmission", editor=SelectEditor(options=transmissions)), TableColumn(field="drv", title="Drive", editor=SelectEditor(options=drives)), TableColumn(field="class", title="Class", editor=SelectEditor(options=classes)), TableColumn(field="cty", title="City MPG", editor=IntEditor()), TableColumn(field="hwy", title="Highway MPG", editor=IntEditor()), ] data_table = DataTable(source=self.source, columns=columns, editable=True) xdr = DataRange1d() ydr = DataRange1d() plot = Plot(title=None, x_range=xdr, y_range=ydr, plot_width=800, plot_height=300) xaxis = LinearAxis(plot=plot) plot.below.append(xaxis) yaxis = LinearAxis(plot=plot) ygrid = Grid(plot=plot, dimension=1, ticker=yaxis.ticker) plot.left.append(yaxis) cty_glyph = Circle(x="index", y="cty", fill_color="#396285", size=8, fill_alpha=0.5, line_alpha=0.5) hwy_glyph = Circle(x="index", y="hwy", fill_color="#CE603D", size=8, fill_alpha=0.5, line_alpha=0.5) cty = GlyphRenderer(data_source=self.source, glyph=cty_glyph) hwy = GlyphRenderer(data_source=self.source, glyph=hwy_glyph) tooltips = [ ("Manufacturer", "@manufacturer"), ("Model", "@model"), ("Displacement", "@displ"), ("Year", "@year"), ("Cylinders", "@cyl"), ("Transmission", "@trans"), ("Drive", "@drv"), ("Class", "@class"), ] cty_hover_tool = HoverTool(plot=plot, renderers=[cty], tooltips=tooltips + [("City MPG", "@cty")]) hwy_hover_tool = HoverTool(plot=plot, renderers=[hwy], tooltips=tooltips + [("Highway MPG", "@hwy")]) select_tool = BoxSelectTool(plot=plot, renderers=[cty, hwy], dimensions=['width']) plot.tools.extend([cty_hover_tool, hwy_hover_tool, select_tool]) plot.renderers.extend([cty, hwy, ygrid]) controls = VBox(children=[ manufacturer_select, model_select, transmission_select, drive_select, class_select ], width=200) top_panel = HBox(children=[controls, plot]) layout = VBox(children=[top_panel, data_table]) return layout def on_manufacturer_change(self, obj, attr, _, value): self.manufacturer_filter = None if value == "All" else value self.update_data() def on_model_change(self, obj, attr, _, value): self.model_filter = None if value == "All" else value self.update_data() def on_transmission_change(self, obj, attr, _, value): self.transmission_filter = None if value == "All" else value self.update_data() def on_drive_change(self, obj, attr, _, value): self.drive_filter = None if value == "All" else value self.update_data() def on_class_change(self, obj, attr, _, value): self.class_filter = None if value == "All" else value self.update_data() def update_data(self): df = mpg if self.manufacturer_filter: df = df[df["manufacturer"] == self.manufacturer_filter] if self.model_filter: df = df[df["model"] == self.model_filter] if self.transmission_filter: df = df[df["trans"] == self.transmission_filter] if self.drive_filter: df = df[df["drv"] == self.drive_filter] if self.class_filter: df = df[df["class"] == self.class_filter] self.source.data = ColumnDataSource.from_df(df) self.session.store_document(self.document) def run(self, do_view=False, poll_interval=0.5): link = self.session.object_link(self.document.context) print("Please visit %s to see the plots" % link) if do_view: view(link) print("\npress ctrl-C to exit") self.session.poll_document(self.document)
class Population(object): year = 2010 location = "World" def __init__(self): from bokeh.models import ColumnDataSource from bokeh.document import Document from bokeh.session import Session from bokeh.sampledata.population import load_population self.document = Document() self.session = Session() self.session.use_doc('population_reveal') self.session.load_document(self.document) self.df = load_population() self.source_pyramid = ColumnDataSource(data=dict()) # just render at the initialization self._render() def _render(self): self.pyramid_plot() self.create_layout() self.document.add(self.layout) self.update_pyramid() def pyramid_plot(self): from bokeh.models import (Plot, DataRange1d, LinearAxis, Grid, Legend, SingleIntervalTicker) from bokeh.models.glyphs import Quad xdr = DataRange1d(sources=[self.source_pyramid.columns("male"), self.source_pyramid.columns("female")]) ydr = DataRange1d(sources=[self.source_pyramid.columns("groups")]) self.plot = Plot(title="Widgets", x_range=xdr, y_range=ydr, plot_width=600, plot_height=600) xaxis = LinearAxis() self.plot.add_layout(xaxis, 'below') yaxis = LinearAxis(ticker=SingleIntervalTicker(interval=5)) self.plot.add_layout(yaxis, 'left') self.plot.add_layout(Grid(dimension=0, ticker=xaxis.ticker)) self.plot.add_layout(Grid(dimension=1, ticker=yaxis.ticker)) male_quad = Quad(left="male", right=0, bottom="groups", top="shifted", fill_color="#3B8686") male_quad_glyph = self.plot.add_glyph(self.source_pyramid, male_quad) female_quad = Quad(left=0, right="female", bottom="groups", top="shifted", fill_color="#CFF09E") female_quad_glyph = self.plot.add_glyph(self.source_pyramid, female_quad) self.plot.add_layout(Legend(legends=dict(Male=[male_quad_glyph], Female=[female_quad_glyph]))) def on_year_change(self, obj, attr, old, new): self.year = int(new) self.update_pyramid() def on_location_change(self, obj, attr, old, new): self.location = new self.update_pyramid() def create_layout(self): from bokeh.models.widgets import Select, HBox, VBox years = list(map(str, sorted(self.df.Year.unique()))) locations = sorted(self.df.Location.unique()) year_select = Select(title="Year:", value="2010", options=years) location_select = Select(title="Location:", value="World", options=locations) year_select.on_change('value', self.on_year_change) location_select.on_change('value', self.on_location_change) controls = HBox(year_select, location_select) self.layout = VBox(controls, self.plot) def update_pyramid(self): pyramid = self.df[(self.df.Location == self.location) & (self.df.Year == self.year)] male = pyramid[pyramid.Sex == "Male"] female = pyramid[pyramid.Sex == "Female"] total = male.Value.sum() + female.Value.sum() male_percent = -male.Value / total female_percent = female.Value / total groups = male.AgeGrpStart.tolist() shifted = groups[1:] + [groups[-1] + 5] self.source_pyramid.data = dict( groups=groups, shifted=shifted, male=male_percent, female=female_percent, ) self.session.store_document(self.document)
class Population(object): year = 2010 location = "World" def __init__(self): from bokeh.document import Document from bokeh.session import Session from bokeh.models import ColumnDataSource from bokeh.sampledata.population import load_population self.document = Document() self.session = Session() self.session.use_doc('population') self.session.load_document(self.document) self.df = load_population() self.source_pyramid = ColumnDataSource(data=dict()) def render(self): self.pyramid_plot() self.create_layout() self.document.add(self.layout) self.update_pyramid() def pyramid_plot(self): from bokeh.models import ( Plot, DataRange1d, LinearAxis, Grid, Legend, SingleIntervalTicker ) from bokeh.models.glyphs import Quad xdr = DataRange1d() ydr = DataRange1d() self.plot = Plot(title=None, x_range=xdr, y_range=ydr, plot_width=600, plot_height=600) xaxis = LinearAxis() self.plot.add_layout(xaxis, 'below') yaxis = LinearAxis(ticker=SingleIntervalTicker(interval=5)) self.plot.add_layout(yaxis, 'left') self.plot.add_layout(Grid(dimension=0, ticker=xaxis.ticker)) self.plot.add_layout(Grid(dimension=1, ticker=yaxis.ticker)) male_quad = Quad(left="male", right=0, bottom="groups", top="shifted", fill_color="#3B8686") male_quad_glyph = self.plot.add_glyph(self.source_pyramid, male_quad) female_quad = Quad(left=0, right="female", bottom="groups", top="shifted", fill_color="#CFF09E") female_quad_glyph = self.plot.add_glyph(self.source_pyramid, female_quad) self.plot.add_layout(Legend(legends=dict(Male=[male_quad_glyph], Female=[female_quad_glyph]))) def on_year_change(self, obj, attr, old, new): self.year = int(new) self.update_pyramid() def on_location_change(self, obj, attr, old, new): self.location = new self.update_pyramid() def create_layout(self): from bokeh.models.widgets import Select, HBox, VBox years = list(map(str, sorted(self.df.Year.unique()))) locations = sorted(self.df.Location.unique()) year_select = Select(title="Year:", value="2010", options=years) location_select = Select(title="Location:", value="World", options=locations) year_select.on_change('value', self.on_year_change) location_select.on_change('value', self.on_location_change) controls = HBox(year_select, location_select) self.layout = VBox(controls, self.plot) def update_pyramid(self): pyramid = self.df[(self.df.Location == self.location) & (self.df.Year == self.year)] male = pyramid[pyramid.Sex == "Male"] female = pyramid[pyramid.Sex == "Female"] total = male.Value.sum() + female.Value.sum() male_percent = -male.Value / total female_percent = female.Value / total groups = male.AgeGrpStart.tolist() shifted = groups[1:] + [groups[-1] + 5] self.source_pyramid.data = dict( groups=groups, shifted=shifted, male=male_percent, female=female_percent, ) self.session.store_document(self.document)
class NutchUrlTrails: """ Class for managing URL Trails visualizations """ @staticmethod def strip_url(url): """ Make a URL safe for visualization in Bokeh server :param url: a URL to be shortened/stripped :return: The stripped URL """ # TODO: remove protocol-stripping on next Bokeh release stripped_url = url.replace('https://', '').replace('http://', '').replace(':', '_').replace('-', '_') if len(stripped_url) <= URL_CHAR_WIDTH: return stripped_url else: return stripped_url[:int(URL_CHAR_WIDTH/2)] + '...' + stripped_url[-int(URL_CHAR_WIDTH/2)-3:] @staticmethod def jtime_to_datetime(t): """ Convert a Java-format Epoch time stamp into np.datetime64 object :param t: Java-format Epoch time stamp (milliseconds) :return: A np.datetime64 scalar """ return np.datetime64(datetime.fromtimestamp(t/1000.0)) def __init__(self, crawl_name, num_urls=DEFAULT_NUM_URLS): """ Create a NutchUrlTrails instance for visualizing a running Nutch crawl in real-time using Bokeh :param name: The name of the crawl (as identified by the queue) :param num_urls: The number of URLs to display in the visualization :return: A NutchUrLTrails instance """ self.crawl_name = crawl_name self.num_urls = num_urls self.open_urls = {} self.closed_urls = {} self.old_segments = None self.old_circles = None self.session = Session() self.session.use_doc(self.crawl_name) self.document = Document() con = Connection() exchange = Exchange(EXCHANGE_NAME, 'direct', durable=False) queue = Queue(crawl_name, exchange=exchange, routing_key=crawl_name) self.queue = con.SimpleQueue(name=queue) def handle_messages(self): """ Get and parse up to 250 messages from the queue then plot. Break early if less. """ for i in range(250): try: m = self.queue.get(block=True, timeout=1) self.parse_message(m) except Empty: break self.plot_urls() def parse_message(self, message): """ Parse a single message arriving from the queue. Updates list of open/closed urls. :param message: A message from the queue """ print(message.body) message = json.loads(message.body) url = message["url"] if message["eventType"] == "START": self.open_urls[url] = NutchUrlTrails.jtime_to_datetime(message["timestamp"]) elif message["eventType"] == "END": if url in self.open_urls: self.closed_urls[url] = (self.open_urls[url], NutchUrlTrails.jtime_to_datetime(message["timestamp"])) del self.open_urls[url] else: # TODO: Log mismatched messages instead of just swallowing them pass else: raise Exception("Unexpected message type") def plot_urls(self): """ Visualize crawler activity by showing the most recently crawled URLs and the fetch time. """ self.session.load_document(self.document) plot = self.document.context.children[0] # don't plot if no URLs available if not (self.open_urls or self.closed_urls): return # x0/x0, left and right boundaries of segments, correspond to fetch time x0 = [] x = [] # y-axis, name of URL being fetched urls = [] # maintain x and URL of circles in a separate list circles = [] circle_urls = [] current_time = np.datetime64(datetime.now()) # For open URLs (not completed fetching), draw a segment from start time to now for url, start_t in self.open_urls.items(): url = NutchUrlTrails.strip_url(url) x0.append(start_t) x.append(current_time) urls.append(url) # For closed URLs (completed fetching), draw a segment from start to end time, and a circle as well. for url, (start_t, end_t) in self.closed_urls.items(): url = NutchUrlTrails.strip_url(url) x0.append(start_t) x.append(end_t) circles.append(end_t) urls.append(url) circle_urls.append(url) x0 = np.asarray(x0) x = np.asarray(x) circles = np.asarray(circles) # sort segments sort_index = np.argsort(x0)[::-1] x0 = x0[sort_index] x = x[sort_index] urls = [urls[i] for i in sort_index] # sort circles if self.closed_urls: circle_sort_index = np.argsort(circles)[::-1] circles = circles[circle_sort_index] circle_urls = [circle_urls[i] for i in circle_sort_index] # Filter to latest num_url URLs (ascending order) # filter segments active_x0 = x0[:self.num_urls] active_x = x[:self.num_urls] active_urls = urls[:self.num_urls] min_x = min(active_x0) plot.x_range.start = min_x plot.x_range.end = np.datetime64(datetime.now()) plot.y_range.factors = active_urls # make sure these are turned back on! # turn y axis grid lines back on for r in plot.renderers: if type(r) == Grid: r.grid_line_color = 'black' break # turn tickers and their labels back on plot.right[0].minor_tick_line_color = 'black' plot.right[0].major_tick_line_color = 'black' plot.right[0].major_label_text_font_size = '12pt' plot.below[0].minor_tick_line_color = 'black' plot.below[0].major_tick_line_color = 'black' plot.below[0].major_label_text_font_size = '12pt' # TODO: Find a more correct way to remove old segment/circle glyphs if self.old_circles: plot.renderers.pop() self.old_circles = None if self.old_segments: plot.renderers.pop() self.old_segments = None segment_source = ColumnDataSource(dict(x0=active_x0, x1=active_x, urls=active_urls)) self.old_segments = Segment(x0="x0", y0="urls", x1="x1", y1="urls", line_color="orange", line_width=10) plot.add_glyph(segment_source, self.old_segments) if self.closed_urls and PLOT_CIRCLES: # filter circles (some of these might not be displayed) active_circles = circles[:self.num_urls] active_circle_urls = circle_urls[:self.num_urls] circle_source = ColumnDataSource(dict(x=active_circles, urls=active_circle_urls)) self.old_circles = Circle(x="x", y="urls", size=12, fill_color="green", line_color="orange", line_width=2) plot.add_glyph(circle_source, self.old_circles) self.session.store_document(self.document, dirty_only=False)