Beispiel #1
0
def init_plot(crawl_name):
    session = Session()
    document = Document()
    session.use_doc(crawl_name)
    session.load_document(document)

    if document.context.children:
        plot = document.context.children[0]
    else:
        output_server(crawl_name)
        # TODO: Remove these when Bokeh is upgraded
        # placeholders or Bokeh can't inject properly
        current = np.datetime64(datetime.now())
        xdr = Range1d(current, current + 1)
        ydr = ["urls"]

        # styling suggested by Bryan
        plot = figure(title="Crawler Monitor",
                      tools="hover",
                      x_axis_type="datetime",
                      y_axis_location="right",
                      x_range=xdr,
                      y_range=ydr,
                      width=1200,
                      height=600)
        plot.toolbar_location = None
        plot.xgrid.grid_line_color = None

    document.add(plot)
    session.store_document(document)
    script = autoload_server(plot, session)

    #TODO: Looks like a Bokeh bug, probably not repeatable with current code
    script = script.replace("'modelid': u'", "'modelid': '")
    return script
Beispiel #2
0
def init_plot(crawl_name):
    session = Session()
    document = Document()
    session.use_doc(crawl_name)
    session.load_document(document)

    if document.context.children:
        plot = document.context.children[0]
    else:
        output_server(crawl_name)
        # TODO: Remove these when Bokeh is upgraded
        # placeholders or Bokeh can't inject properly
        current = np.datetime64(datetime.now())
        xdr = Range1d(current, current + 1)
        ydr = ["urls"]

        # styling suggested by Bryan
        plot = figure(title="Crawler Monitor", tools="hover",
                      x_axis_type="datetime", y_axis_location="right", x_range=xdr, y_range=ydr,
                      width=1200, height=600)
        plot.toolbar_location = None
        plot.xgrid.grid_line_color = None

    document.add(plot)
    session.store_document(document)
    script = autoload_server(plot, session)

    #TODO: Looks like a Bokeh bug, probably not repeatable with current code
    script = script.replace("'modelid': u'", "'modelid': '")
    return script
        def wrapper(*args, **kwargs):
            document = Document()
            session = Session(name=url, root_url=url)
            session.use_doc(document.docid)
            session.load_document(document)
            session.publish()
            document.autoadd = False
            document.autostore = False

            obj = func(*args, **kwargs)
            obj._docid = session.docid
            obj._root_url = session.root_url

            document.add(obj)
            session.store_document(document)
            return obj
Beispiel #4
0
        def wrapper(*args, **kwargs):
            docname = prefix + str(uuid.uuid4())
            session = Session(name=url, root_url=url)
            session.use_doc(docname)
            session.load_document(curdoc())
            curdoc().autoadd = False
            curdoc().autostore = False

            obj = func(*args, **kwargs)
            tag = embed.autoload_server(obj, session)
            obj.tag = tag

            curdoc().add(obj)
            changed = session.store_document(curdoc())

            logger.debug("stored: %s", str(changed))

            return obj
Beispiel #5
0
        def wrapper(*args, **kwargs):
            docname = prefix + str(uuid.uuid4())
            session = Session(name=url, root_url=url)
            session.use_doc(docname)
            session.load_document(curdoc())
            curdoc().autoadd = False
            curdoc().autostore = False

            obj = func(*args, **kwargs)
            tag = embed.autoload_server(obj, session)
            obj.tag = tag

            curdoc().add(obj)
            changed = session.store_document(curdoc())

            logger.debug("stored: %s", str(changed))

            return obj
Beispiel #6
0
class NutchUrlTrails:
    """
    Class for managing URL Trails visualizations
    """
    @staticmethod
    def strip_url(url):
        """
        Make a URL safe for visualization in Bokeh server
        :param url: a URL to be shortened/stripped
        :return: The stripped URL
        """
        # TODO: remove protocol-stripping on next Bokeh release
        stripped_url = url.replace('https://',
                                   '').replace('http://', '').replace(
                                       ':', '_').replace('-', '_')

        if len(stripped_url) <= URL_CHAR_WIDTH:
            return stripped_url
        else:
            return stripped_url[:int(URL_CHAR_WIDTH /
                                     2)] + '...' + stripped_url[
                                         -int(URL_CHAR_WIDTH / 2) - 3:]

    @staticmethod
    def jtime_to_datetime(t):
        """
        Convert a Java-format Epoch time stamp into np.datetime64 object
        :param t: Java-format Epoch time stamp (milliseconds)
        :return: A np.datetime64 scalar
        """
        return np.datetime64(datetime.fromtimestamp(t / 1000.0))

    def __init__(self, crawl_name, num_urls=DEFAULT_NUM_URLS):
        """
        Create a NutchUrlTrails instance for visualizing a running Nutch crawl in real-time using Bokeh
        :param name: The name of the crawl (as identified by the queue)
        :param num_urls: The number of URLs to display in the visualization
        :return: A NutchUrLTrails instance
        """
        self.crawl_name = crawl_name
        self.num_urls = num_urls
        self.open_urls = {}
        self.closed_urls = {}
        self.old_segments = None
        self.old_circles = None

        self.session = Session()
        self.session.use_doc(self.crawl_name)
        self.document = Document()

        con = Connection()

        exchange = Exchange(EXCHANGE_NAME, 'direct', durable=False)
        queue = Queue(crawl_name, exchange=exchange, routing_key=crawl_name)
        self.queue = con.SimpleQueue(name=queue)

    def handle_messages(self):
        """
        Get and parse up to 250 messages from the queue then plot.  Break early if less.
        """

        for i in range(250):
            try:
                m = self.queue.get(block=True, timeout=1)
                self.parse_message(m)
            except Empty:
                break
        self.plot_urls()

    def parse_message(self, message):
        """
        Parse a single message arriving from the queue.  Updates list of open/closed urls.
        :param message: A message from the queue
        """
        print(message.body)
        message = json.loads(message.body)
        url = message["url"]
        if message["eventType"] == "START":
            self.open_urls[url] = NutchUrlTrails.jtime_to_datetime(
                message["timestamp"])
        elif message["eventType"] == "END":
            if url in self.open_urls:
                self.closed_urls[url] = (self.open_urls[url],
                                         NutchUrlTrails.jtime_to_datetime(
                                             message["timestamp"]))
                del self.open_urls[url]
            else:
                # TODO: Log mismatched messages instead of just swallowing them
                pass
        else:
            raise Exception("Unexpected message type")

    def plot_urls(self):
        """
        Visualize crawler activity by showing the most recently crawled URLs and the fetch time.
        """

        self.session.load_document(self.document)
        plot = self.document.context.children[0]

        # don't plot if no URLs available
        if not (self.open_urls or self.closed_urls):
            return

        # x0/x0, left and right boundaries of segments, correspond to fetch time
        x0 = []
        x = []
        # y-axis, name of URL being fetched
        urls = []

        # maintain x and URL of circles in a separate list
        circles = []
        circle_urls = []

        current_time = np.datetime64(datetime.now())

        # For open URLs (not completed fetching), draw a segment from start time to now
        for url, start_t in self.open_urls.items():
            url = NutchUrlTrails.strip_url(url)
            x0.append(start_t)
            x.append(current_time)
            urls.append(url)

        # For closed URLs (completed fetching), draw a segment from start to end time, and a circle as well.
        for url, (start_t, end_t) in self.closed_urls.items():
            url = NutchUrlTrails.strip_url(url)
            x0.append(start_t)
            x.append(end_t)
            circles.append(end_t)
            urls.append(url)
            circle_urls.append(url)

        x0 = np.asarray(x0)
        x = np.asarray(x)
        circles = np.asarray(circles)

        # sort segments
        sort_index = np.argsort(x0)[::-1]
        x0 = x0[sort_index]
        x = x[sort_index]
        urls = [urls[i] for i in sort_index]

        # sort circles
        if self.closed_urls:
            circle_sort_index = np.argsort(circles)[::-1]
            circles = circles[circle_sort_index]
            circle_urls = [circle_urls[i] for i in circle_sort_index]

        # Filter to latest num_url URLs (ascending order)
        # filter segments
        active_x0 = x0[:self.num_urls]
        active_x = x[:self.num_urls]
        active_urls = urls[:self.num_urls]

        min_x = min(active_x0)
        plot.x_range.start = min_x
        plot.x_range.end = np.datetime64(datetime.now())
        plot.y_range.factors = active_urls

        # make sure these are turned back on!
        # turn y axis grid lines back on
        for r in plot.renderers:
            if type(r) == Grid:
                r.grid_line_color = 'black'
                break

        # turn tickers and their labels back on
        plot.right[0].minor_tick_line_color = 'black'
        plot.right[0].major_tick_line_color = 'black'
        plot.right[0].major_label_text_font_size = '12pt'
        plot.below[0].minor_tick_line_color = 'black'
        plot.below[0].major_tick_line_color = 'black'
        plot.below[0].major_label_text_font_size = '12pt'

        # TODO: Find a more correct way to remove old segment/circle glyphs
        if self.old_circles:
            plot.renderers.pop()
            self.old_circles = None
        if self.old_segments:
            plot.renderers.pop()
            self.old_segments = None

        segment_source = ColumnDataSource(
            dict(x0=active_x0, x1=active_x, urls=active_urls))

        self.old_segments = Segment(x0="x0",
                                    y0="urls",
                                    x1="x1",
                                    y1="urls",
                                    line_color="orange",
                                    line_width=10)
        plot.add_glyph(segment_source, self.old_segments)

        if self.closed_urls and PLOT_CIRCLES:
            # filter circles (some of these might not be displayed)
            active_circles = circles[:self.num_urls]
            active_circle_urls = circle_urls[:self.num_urls]

            circle_source = ColumnDataSource(
                dict(x=active_circles, urls=active_circle_urls))

            self.old_circles = Circle(x="x",
                                      y="urls",
                                      size=12,
                                      fill_color="green",
                                      line_color="orange",
                                      line_width=2)
            plot.add_glyph(circle_source, self.old_circles)

        self.session.store_document(self.document, dirty_only=False)
Beispiel #7
0
class DataTables(object):

    def __init__(self):
        self.document = Document()
        self.session = Session()
        self.session.use_doc('data_tables_server')
        self.session.load_document(self.document)

        self.manufacturer_filter = None
        self.model_filter = None
        self.transmission_filter = None
        self.drive_filter = None
        self.class_filter = None

        self.source = ColumnDataSource()
        self.update_data()

        self.document.add(self.create())
        self.session.store_document(self.document)

    def create(self):
        manufacturers = sorted(mpg["manufacturer"].unique())
        models = sorted(mpg["model"].unique())
        transmissions = sorted(mpg["trans"].unique())
        drives = sorted(mpg["drv"].unique())
        classes = sorted(mpg["class"].unique())

        manufacturer_select = Select(title="Manufacturer:", value="All", options=["All"] + manufacturers)
        manufacturer_select.on_change('value', self.on_manufacturer_change)
        model_select = Select(title="Model:", value="All", options=["All"] + models)
        model_select.on_change('value', self.on_model_change)
        transmission_select = Select(title="Transmission:", value="All", options=["All"] + transmissions)
        transmission_select.on_change('value', self.on_transmission_change)
        drive_select = Select(title="Drive:", value="All", options=["All"] + drives)
        drive_select.on_change('value', self.on_drive_change)
        class_select = Select(title="Class:", value="All", options=["All"] + classes)
        class_select.on_change('value', self.on_class_change)

        columns = [
            TableColumn(field="manufacturer", header="Manufacturer", type="autocomplete", source=manufacturers),
            TableColumn(field="model", header="Model", type="autocomplete", source=models),
            TableColumn(field="displ", header="Displacement", type="numeric", format="0.00"),
            TableColumn(field="year", header="Year", type="numeric"),
            TableColumn(field="cyl", header="Cylinders", type="numeric"),
            TableColumn(field="trans", header="Transmission", type="dropdown", strict=True, source=transmissions),
            TableColumn(field="drv", header="Drive", type="autocomplete", strict=True, source=drives),
            TableColumn(field="class", header="Class", type="autocomplete", strict=True, source=classes),
            TableColumn(field="cty", header="City MPG", type="numeric"),
            TableColumn(field="hwy", header="Highway MPG", type="numeric"),
        ]
        handson_table = HandsonTable(source=self.source, columns=columns, sorting=True)

        xdr = DataRange1d(sources=[self.source.columns("index")])
        #xdr = FactorRange(factors=manufacturers)
        ydr = DataRange1d(sources=[self.source.columns("cty"), self.source.columns("hwy")])
        plot = Plot(title=None, data_sources=[self.source], x_range=xdr, y_range=ydr, plot_width=800, plot_height=300)
        xaxis = LinearAxis(plot=plot)
        plot.below.append(xaxis)
        yaxis = LinearAxis(plot=plot)
        ygrid = Grid(plot=plot, dimension=1, ticker=yaxis.ticker)
        plot.left.append(yaxis)
        cty = Glyph(data_source=self.source, glyph=Circle(x="index", y="cty", fill_color="green"))
        hwy = Glyph(data_source=self.source, glyph=Circle(x="index", y="hwy", fill_color="red"))
        select_tool = BoxSelectTool(renderers=[cty, hwy], select_y=False)
        plot.tools.append(select_tool)
        overlay = BoxSelectionOverlay(tool=select_tool)
        plot.renderers.extend([cty, hwy, ygrid, overlay])

        controls = VBox(children=[manufacturer_select, model_select, transmission_select, drive_select, class_select], width=200)
        top_panel = HBox(children=[controls, plot])
        layout = VBox(children=[top_panel, handson_table])

        return layout

    def on_manufacturer_change(self, obj, attr, _, value):
        self.manufacturer_filter = None if value == "All" else value
        self.update_data()

    def on_model_change(self, obj, attr, _, value):
        self.model_filter = None if value == "All" else value
        self.update_data()

    def on_transmission_change(self, obj, attr, _, value):
        self.transmission_filter = None if value == "All" else value
        self.update_data()

    def on_drive_change(self, obj, attr, _, value):
        self.drive_filter = None if value == "All" else value
        self.update_data()

    def on_class_change(self, obj, attr, _, value):
        self.class_filter = None if value == "All" else value
        self.update_data()

    def update_data(self):
        df = mpg
        if self.manufacturer_filter:
            df = df[df["manufacturer"] == self.manufacturer_filter]
        if self.model_filter:
            df = df[df["model"] == self.model_filter]
        if self.transmission_filter:
            df = df[df["trans"] == self.transmission_filter]
        if self.drive_filter:
            df = df[df["drv"] == self.drive_filter]
        if self.class_filter:
            df = df[df["class"] == self.class_filter]
        self.source.data = ColumnDataSource.from_df(df)
        self.session.store_document(self.document)

    def run(self, poll_interval=0.5):
        link = self.session.object_link(self.document.context)
        print("Please visit %s to see the plots (press ctrl-C to exit)" % link)

        try:
            while True:
                self.session.load_document(self.document)
                time.sleep(poll_interval)
        except KeyboardInterrupt:
            print()
        except ConnectionError:
            print("Connection to bokeh-server was terminated")
Beispiel #8
0
dropdown = Dropdown(label="Dropdown button", type="warning", menu=menu)
dropdown.on_click(dropdown_handler)

menu = [("Item 1", "foo"), ("Item 2", "bar"), None, ("Item 3", "baz")]
split = Dropdown(label="Split button", type="danger", menu=menu, default_action="baz")
split.on_click(split_handler)

checkbox_group = CheckboxGroup(labels=["Option 1", "Option 2", "Option 3"], active=[0, 1])
checkbox_group.on_click(checkbox_group_handler)

radio_group = RadioGroup(labels=["Option 1", "Option 2", "Option 3"], active=0)
radio_group.on_click(radio_group_handler)

checkbox_button_group = CheckboxButtonGroup(labels=["Option 1", "Option 2", "Option 3"], active=[0, 1])
checkbox_button_group.on_click(checkbox_button_group_handler)

radio_button_group = RadioButtonGroup(labels=["Option 1", "Option 2", "Option 3"], active=0)
radio_button_group.on_click(radio_button_group_handler)

vbox = VBox(children=[button, toggle, dropdown, split, checkbox_group, radio_group, checkbox_button_group, radio_button_group])

document.add(vbox)
session.store_document(document)

if __name__ == "__main__":
    link = session.object_link(document.context)
    print("Please visit %s to see the plots" % link)
    view(link)
    print("\npress ctrl-C to exit")
    session.poll_document(document)
Beispiel #9
0
def make_layout():
    plot, source = make_plot()
    columns = [
        TableColumn(field="dates",
                    title="Date",
                    editor=DateEditor(),
                    formatter=DateFormatter()),
        TableColumn(field="downloads", title="Downloads", editor=IntEditor()),
    ]
    data_table = DataTable(source=source,
                           columns=columns,
                           width=400,
                           height=400,
                           editable=True)
    button = Button(label="Randomize data", type="success")
    button.on_click(click_handler)
    buttons = VBox(children=[button])
    vbox = VBox(children=[buttons, plot, data_table])
    return vbox


document.add(make_layout())
session.store_document(document)

if __name__ == "__main__":
    link = session.object_link(document.context)
    print("Please visit %s to see the plots" % link)
    view(link)
    print("\npress ctrl-C to exit")
    session.poll_document(document)
Beispiel #10
0
class Population(object):

    year = 2010
    location = "World"

    def __init__(self):
        from bokeh.objects import ColumnDataSource
        from bokeh.document import Document
        from bokeh.session import Session
        from bokeh.sampledata.population import load_population

        self.document = Document()
        self.session = Session()
        self.session.use_doc('population_reveal')
        self.session.load_document(self.document)

        self.df = load_population()
        self.source_pyramid = ColumnDataSource(data=dict())

    def render(self):
        self.pyramid_plot()
        self.create_layout()
        self.document.add(self.layout)
        self.update_pyramid()

    def pyramid_plot(self):
        from bokeh.objects import (Plot, DataRange1d, LinearAxis, Grid, Glyph,
                                   Legend, SingleIntervalTicker)
        from bokeh.glyphs import Quad

        xdr = DataRange1d(sources=[self.source_pyramid.columns("male"),
                                   self.source_pyramid.columns("female")])
        ydr = DataRange1d(sources=[self.source_pyramid.columns("groups")])

        self.plot = Plot(title="Widgets", x_range=xdr, y_range=ydr, plot_width=600, plot_height=600)

        xaxis = LinearAxis(plot=self.plot)
        self.plot.below.append(xaxis)
        yaxis = LinearAxis(plot=self.plot, ticker=SingleIntervalTicker(interval=5))
        self.plot.left.append(yaxis)

        xgrid = Grid(plot=self.plot, dimension=0, ticker=xaxis.ticker)
        ygrid = Grid(plot=self.plot, dimension=1, ticker=yaxis.ticker)

        male_quad = Quad(left="male", right=0, bottom="groups", top="shifted", fill_color="blue")
        male_quad_glyph = Glyph(data_source=self.source_pyramid,
                                xdata_range=xdr, ydata_range=ydr, glyph=male_quad)
        self.plot.renderers.append(male_quad_glyph)

        female_quad = Quad(left=0, right="female", bottom="groups", top="shifted",
                           fill_color="violet")
        female_quad_glyph = Glyph(data_source=self.source_pyramid,
                                  xdata_range=xdr, ydata_range=ydr, glyph=female_quad)
        self.plot.renderers.append(female_quad_glyph)

        legend = Legend(plot=self.plot, legends=dict(Male=[male_quad_glyph],
                        Female=[female_quad_glyph]))
        self.plot.renderers.append(legend)

    def on_year_change(self, obj, attr, old, new):
        self.year = int(new)
        self.update_pyramid()

    def on_location_change(self, obj, attr, old, new):
        self.location = new
        self.update_pyramid()

    def create_layout(self):
        from bokeh.widgetobjects import Select, HBox, VBox

        years = list(map(str, sorted(self.df.Year.unique())))
        locations = sorted(self.df.Location.unique())

        year_select = Select(title="Year:", value="2010", options=years)
        location_select = Select(title="Location:", value="World", options=locations)

        year_select.on_change('value', self.on_year_change)
        location_select.on_change('value', self.on_location_change)

        controls = HBox(children=[year_select, location_select])
        self.layout = VBox(children=[controls, self.plot])

    def update_pyramid(self):
        pyramid = self.df[(self.df.Location == self.location) & (self.df.Year == self.year)]

        male = pyramid[pyramid.Sex == "Male"]
        female = pyramid[pyramid.Sex == "Female"]

        total = male.Value.sum() + female.Value.sum()

        male_percent = -male.Value / total
        female_percent = female.Value / total

        groups = male.AgeGrpStart.tolist()
        shifted = groups[1:] + [groups[-1] + 5]

        self.source_pyramid.data = dict(
            groups=groups,
            shifted=shifted,
            male=male_percent,
            female=female_percent,
        )
        self.session.store_document(self.document)
Beispiel #11
0
class Dashboard(object):
    """
    A collection of plots and widgets served by bokeh-server.
    """

    def __init__(self,
            port = BK_SERVER_PORT,
            doc_name = DOC_NAME,
            colors = COLORS,
            show = False,
        ):
        """
        Initialize Bokeh session and document.
        """

        self.document = Document()

        self.port = port
        self.show = show
        self.doc_name = doc_name

        name="http://*****:*****@type"),
        ])

        seabornify(plot)
        return plot

# Update Routine
# --------------

    def update(self):
        self.session.store_objects(self.data_source)

# Run
# ---

    def run(self, poll_interval=1):

        self.session.store_document(self.document)

        link = self.session.object_link(self.document.context)
        if self.show:
            import webbrowser
            webbrowser.open(link)
        else:
            print("Please visit %s to see the plots (press ctrl-C to exit)" % link)

        try:
            while True:
                self.session.load_document(self.document)
                sleep(poll_interval)
        except KeyboardInterrupt:
            print()
        except ConnectionError:
            print("Connection to bokeh-server was terminated.")

if __name__ == "__main__":

    args = parse_args()

    dashboard = Dashboard(**vars(args))
    dashboard.run()
Beispiel #12
0
class DashBoard(object):

    def __init__(self):
        self.document = Document()
        self.session = Session()
        self.session.use_doc('crawler_dashboard')
        self.session.load_document(self.document)

        #self.harvest_source = ColumnDataSource(data=dict())
        #self.domain_relevant_source = ColumnDataSource(data=dict())
        #self.domain_crawled_source = ColumnDataSource(data=dict())
        #self.domain_frontier_source = ColumnDataSource(data=dict())
        #self.handson_source = ColumnDataSource(data=dict())
        #self.termite_source = ColumnDataSource(data=dict())

        self.harvest = Harvest()
        self.domain = Domain()
        #handson = Handson()
        self.termite = Termite()

        self.document.add(self.create_layout())
        self.session.store_document(self.document)

    def render(self):
        self.create_layout()
        self.document.add(self.layout)
        self.update_data()

    def create_layout(self):

        #button = Button(label="Randomize data", type="success")
        #button.on_click(update_data)
        #top_panel = HBox(children=[button, self.harvest.plot, self.harvest.rate_plot])
        top_panel = HBox(children=[self.harvest.plot, self.harvest.rate_plot])
        domains = VBox(children=[self.domain.sort_relevant_plot, self.domain.sort_crawled_plot, self.domain.sort_frontier_plot], width=200)   
        #middle_panel = HBox(children=[domains, handson.plot])
        middle_panel = HBox(children=[domains])
        layout = VBox(children=[top_panel, middle_panel, self.termite.plot])
        self.layout = layout
        return layout

    def update_data(self):

        self.harvest.source = self.harvest.update_source()
        self.domain.sort_relevant_source, self.domain.sort_crawled_source, self.domain.sort_frontier_source = self.domain.update_source()
        self.termite.data, self.termite.source = self.termite.update_source()
        #self.session.store_objects(ds)
        self.session.store_document(self.document)

        
    def run(self, poll_interval=0.5):
        #link = self.session.object_link(self.document.context)
        #print("Please visit %s to see the plots (press ctrl-C to exit)" % link)

        try:
            while True:
                self.update_data()
                self.session.load_document(self.document)
                time.sleep(poll_interval)
        except KeyboardInterrupt:
            print()
        except ConnectionError:
            print("Connection to bokeh-server was terminated")
Beispiel #13
0
class DataTables(object):

    def __init__(self):
        self.document = Document()
        self.session = Session()
        self.session.use_doc('data_tables_server')
        self.session.load_document(self.document)

        self.manufacturer_filter = None
        self.model_filter = None
        self.transmission_filter = None
        self.drive_filter = None
        self.class_filter = None

        self.source = ColumnDataSource()
        self.update_data()

        self.document.add(self.create())
        self.session.store_document(self.document)

    def create(self):
        manufacturers = sorted(mpg["manufacturer"].unique())
        models = sorted(mpg["model"].unique())
        transmissions = sorted(mpg["trans"].unique())
        drives = sorted(mpg["drv"].unique())
        classes = sorted(mpg["class"].unique())

        manufacturer_select = Select(title="Manufacturer:", value="All", options=["All"] + manufacturers)
        manufacturer_select.on_change('value', self.on_manufacturer_change)
        model_select = Select(title="Model:", value="All", options=["All"] + models)
        model_select.on_change('value', self.on_model_change)
        transmission_select = Select(title="Transmission:", value="All", options=["All"] + transmissions)
        transmission_select.on_change('value', self.on_transmission_change)
        drive_select = Select(title="Drive:", value="All", options=["All"] + drives)
        drive_select.on_change('value', self.on_drive_change)
        class_select = Select(title="Class:", value="All", options=["All"] + classes)
        class_select.on_change('value', self.on_class_change)

        columns = [
            TableColumn(field="manufacturer", title="Manufacturer", editor=SelectEditor(options=manufacturers), formatter=StringFormatter(font_style="bold")),
            TableColumn(field="model",        title="Model",        editor=StringEditor(completions=models)),
            TableColumn(field="displ",        title="Displacement", editor=NumberEditor(step=0.1),              formatter=NumberFormatter(format="0.0")),
            TableColumn(field="year",         title="Year",         editor=IntEditor()),
            TableColumn(field="cyl",          title="Cylinders",    editor=IntEditor()),
            TableColumn(field="trans",        title="Transmission", editor=SelectEditor(options=transmissions)),
            TableColumn(field="drv",          title="Drive",        editor=SelectEditor(options=drives)),
            TableColumn(field="class",        title="Class",        editor=SelectEditor(options=classes)),
            TableColumn(field="cty",          title="City MPG",     editor=IntEditor()),
            TableColumn(field="hwy",          title="Highway MPG",  editor=IntEditor()),
        ]
        data_table = DataTable(source=self.source, columns=columns, editable=True)

        xdr = DataRange1d()
        ydr = DataRange1d()
        plot = Plot(title=None, x_range=xdr, y_range=ydr, plot_width=800, plot_height=300)
        xaxis = LinearAxis(plot=plot)
        plot.below.append(xaxis)
        yaxis = LinearAxis(plot=plot)
        ygrid = Grid(plot=plot, dimension=1, ticker=yaxis.ticker)
        plot.left.append(yaxis)
        cty_glyph = Circle(x="index", y="cty", fill_color="#396285", size=8, fill_alpha=0.5, line_alpha=0.5)
        hwy_glyph = Circle(x="index", y="hwy", fill_color="#CE603D", size=8, fill_alpha=0.5, line_alpha=0.5)
        cty = GlyphRenderer(data_source=self.source, glyph=cty_glyph)
        hwy = GlyphRenderer(data_source=self.source, glyph=hwy_glyph)
        tooltips = [
            ("Manufacturer", "@manufacturer"),
            ("Model", "@model"),
            ("Displacement", "@displ"),
            ("Year", "@year"),
            ("Cylinders", "@cyl"),
            ("Transmission", "@trans"),
            ("Drive", "@drv"),
            ("Class", "@class"),
        ]
        cty_hover_tool = HoverTool(plot=plot, renderers=[cty], tooltips=tooltips + [("City MPG", "@cty")])
        hwy_hover_tool = HoverTool(plot=plot, renderers=[hwy], tooltips=tooltips + [("Highway MPG", "@hwy")])
        select_tool = BoxSelectTool(plot=plot, renderers=[cty, hwy], dimensions=['width'])
        plot.tools.extend([cty_hover_tool, hwy_hover_tool, select_tool])
        plot.renderers.extend([cty, hwy, ygrid])

        controls = VBox(children=[manufacturer_select, model_select, transmission_select, drive_select, class_select], width=200)
        top_panel = HBox(children=[controls, plot])
        layout = VBox(children=[top_panel, data_table])

        return layout

    def on_manufacturer_change(self, obj, attr, _, value):
        self.manufacturer_filter = None if value == "All" else value
        self.update_data()

    def on_model_change(self, obj, attr, _, value):
        self.model_filter = None if value == "All" else value
        self.update_data()

    def on_transmission_change(self, obj, attr, _, value):
        self.transmission_filter = None if value == "All" else value
        self.update_data()

    def on_drive_change(self, obj, attr, _, value):
        self.drive_filter = None if value == "All" else value
        self.update_data()

    def on_class_change(self, obj, attr, _, value):
        self.class_filter = None if value == "All" else value
        self.update_data()

    def update_data(self):
        df = mpg
        if self.manufacturer_filter:
            df = df[df["manufacturer"] == self.manufacturer_filter]
        if self.model_filter:
            df = df[df["model"] == self.model_filter]
        if self.transmission_filter:
            df = df[df["trans"] == self.transmission_filter]
        if self.drive_filter:
            df = df[df["drv"] == self.drive_filter]
        if self.class_filter:
            df = df[df["class"] == self.class_filter]
        self.source.data = ColumnDataSource.from_df(df)
        self.session.store_document(self.document)

    def run(self, do_view=False, poll_interval=0.5):
        link = self.session.object_link(self.document.context)
        print("Please visit %s to see the plots" % link)
        if do_view: view(link)
        print("\npress ctrl-C to exit")
        self.session.poll_document(self.document)
Beispiel #14
0
class DataTables(object):
    def __init__(self):
        self.document = Document()
        self.session = Session()
        self.session.use_doc('data_tables_server')
        self.session.load_document(self.document)

        self.manufacturer_filter = None
        self.model_filter = None
        self.transmission_filter = None
        self.drive_filter = None
        self.class_filter = None

        self.source = ColumnDataSource()
        self.update_data()

        self.document.add(self.create())
        self.session.store_document(self.document)

    def create(self):
        manufacturers = sorted(mpg["manufacturer"].unique())
        models = sorted(mpg["model"].unique())
        transmissions = sorted(mpg["trans"].unique())
        drives = sorted(mpg["drv"].unique())
        classes = sorted(mpg["class"].unique())

        manufacturer_select = Select(title="Manufacturer:",
                                     value="All",
                                     options=["All"] + manufacturers)
        manufacturer_select.on_change('value', self.on_manufacturer_change)
        model_select = Select(title="Model:",
                              value="All",
                              options=["All"] + models)
        model_select.on_change('value', self.on_model_change)
        transmission_select = Select(title="Transmission:",
                                     value="All",
                                     options=["All"] + transmissions)
        transmission_select.on_change('value', self.on_transmission_change)
        drive_select = Select(title="Drive:",
                              value="All",
                              options=["All"] + drives)
        drive_select.on_change('value', self.on_drive_change)
        class_select = Select(title="Class:",
                              value="All",
                              options=["All"] + classes)
        class_select.on_change('value', self.on_class_change)

        columns = [
            TableColumn(field="manufacturer",
                        header="Manufacturer",
                        type="autocomplete",
                        source=manufacturers),
            TableColumn(field="model",
                        header="Model",
                        type="autocomplete",
                        source=models),
            TableColumn(field="displ",
                        header="Displacement",
                        type="numeric",
                        format="0.00"),
            TableColumn(field="year", header="Year", type="numeric"),
            TableColumn(field="cyl", header="Cylinders", type="numeric"),
            TableColumn(field="trans",
                        header="Transmission",
                        type="dropdown",
                        strict=True,
                        source=transmissions),
            TableColumn(field="drv",
                        header="Drive",
                        type="autocomplete",
                        strict=True,
                        source=drives),
            TableColumn(field="class",
                        header="Class",
                        type="autocomplete",
                        strict=True,
                        source=classes),
            TableColumn(field="cty", header="City MPG", type="numeric"),
            TableColumn(field="hwy", header="Highway MPG", type="numeric"),
        ]
        handson_table = HandsonTable(source=self.source,
                                     columns=columns,
                                     sorting=True)

        xdr = DataRange1d(sources=[self.source.columns("index")])
        #xdr = FactorRange(factors=manufacturers)
        ydr = DataRange1d(
            sources=[self.source.columns("cty"),
                     self.source.columns("hwy")])
        plot = Plot(title=None,
                    data_sources=[self.source],
                    x_range=xdr,
                    y_range=ydr,
                    plot_width=800,
                    plot_height=300)
        xaxis = LinearAxis(plot=plot)
        plot.below.append(xaxis)
        yaxis = LinearAxis(plot=plot)
        ygrid = Grid(plot=plot, dimension=1, ticker=yaxis.ticker)
        plot.left.append(yaxis)
        cty = Glyph(data_source=self.source,
                    glyph=Circle(x="index",
                                 y="cty",
                                 fill_color="#396285",
                                 size=8,
                                 fill_alpha=0.5,
                                 line_alpha=0.5))
        hwy = Glyph(data_source=self.source,
                    glyph=Circle(x="index",
                                 y="hwy",
                                 fill_color="#CE603D",
                                 size=8,
                                 fill_alpha=0.5,
                                 line_alpha=0.5))
        select_tool = BoxSelectTool(renderers=[cty, hwy], select_y=False)
        plot.tools.append(select_tool)
        overlay = BoxSelectionOverlay(tool=select_tool)
        plot.renderers.extend([cty, hwy, ygrid, overlay])

        controls = VBox(children=[
            manufacturer_select, model_select, transmission_select,
            drive_select, class_select
        ],
                        width=200)
        top_panel = HBox(children=[controls, plot])
        layout = VBox(children=[top_panel, handson_table])

        return layout

    def on_manufacturer_change(self, obj, attr, _, value):
        self.manufacturer_filter = None if value == "All" else value
        self.update_data()

    def on_model_change(self, obj, attr, _, value):
        self.model_filter = None if value == "All" else value
        self.update_data()

    def on_transmission_change(self, obj, attr, _, value):
        self.transmission_filter = None if value == "All" else value
        self.update_data()

    def on_drive_change(self, obj, attr, _, value):
        self.drive_filter = None if value == "All" else value
        self.update_data()

    def on_class_change(self, obj, attr, _, value):
        self.class_filter = None if value == "All" else value
        self.update_data()

    def update_data(self):
        df = mpg
        if self.manufacturer_filter:
            df = df[df["manufacturer"] == self.manufacturer_filter]
        if self.model_filter:
            df = df[df["model"] == self.model_filter]
        if self.transmission_filter:
            df = df[df["trans"] == self.transmission_filter]
        if self.drive_filter:
            df = df[df["drv"] == self.drive_filter]
        if self.class_filter:
            df = df[df["class"] == self.class_filter]
        self.source.data = ColumnDataSource.from_df(df)
        self.session.store_document(self.document)

    def run(self, do_view=False, poll_interval=0.5):
        link = self.session.object_link(self.document.context)
        print("Please visit %s to see the plots" % link)
        if do_view: view(link)
        print("\npress ctrl-C to exit")
        self.session.poll_document(self.document)
Beispiel #15
0
class DashBoard(object):
    def __init__(self, path, url):
        self.document = Document()
        self.session = Session(name=url, root_url=url)
        #self.session = Session('load_from_config=False')
        self.session.use_doc('crawler_dashboard')
        self.session.load_document(self.document)

        #self.harvest_source = ColumnDataSource(data=dict())
        #self.domain_relevant_source = ColumnDataSource(data=dict())
        #self.domain_crawled_source = ColumnDataSource(data=dict())
        #self.domain_frontier_source = ColumnDataSource(data=dict())
        #self.handson_source = ColumnDataSource(data=dict())
        #self.termite_source = ColumnDataSource(data=dict())
        self.harvest = Harvest(path)
        self.domain = Domain(path)
        #handson = Handson()
        #self.termite = Termite()

        self.document.add(self.create_layout())
        self.session.store_document(self.document)

    def render(self):
        self.create_layout()
        self.document.add(self.layout)
        self.update_data()

    def create_layout(self):

        #button = Button(label="Randomize data", type="success")
        #button.on_click(update_data)
        #top_panel = HBox(children=[button, self.harvest.plot, self.harvest.rate_plot])
        top_panel = HBox(children=[self.harvest.plot, self.harvest.rate_plot])
        domains = VBox(children=[
            self.domain.sort_relevant_plot, self.domain.sort_crawled_plot,
            self.domain.sort_frontier_plot
        ],
                       width=200)
        #middle_panel = HBox(children=[domains, handson.plot])
        middle_panel = HBox(children=[domains])
        layout = VBox(children=[top_panel, middle_panel])
        self.layout = layout
        return layout

    def update_data(self):

        self.harvest.source = self.harvest.update_source()
        self.domain.sort_relevant_source, self.domain.sort_crawled_source, self.domain.sort_frontier_source = self.domain.update_source(
        )
        #self.termite.data, self.termite.source = self.termite.update_source()
        #self.session.store_objects(ds)
        self.session.store_document(self.document)

    def run(self, poll_interval=0.5):
        #link = self.session.object_link(self.document.context)
        #print("Please visit %s to see the plots (press ctrl-C to exit)" % link)

        try:
            while True:
                self.update_data()
                self.session.load_document(self.document)
                time.sleep(poll_interval)
        except KeyboardInterrupt:
            print()
        except ConnectionError:
            print("Connection to bokeh-server was terminated")
Beispiel #16
0
class DataTables(object):
    def __init__(self):
        self.document = Document()
        self.session = Session()
        self.session.use_doc('data_tables_server')
        self.session.load_document(self.document)

        self.manufacturer_filter = None
        self.model_filter = None
        self.transmission_filter = None
        self.drive_filter = None
        self.class_filter = None

        self.source = ColumnDataSource()
        self.update_data()

        self.document.add(self.create())
        self.session.store_document(self.document)

    def create(self):
        manufacturers = sorted(mpg["manufacturer"].unique())
        models = sorted(mpg["model"].unique())
        transmissions = sorted(mpg["trans"].unique())
        drives = sorted(mpg["drv"].unique())
        classes = sorted(mpg["class"].unique())

        manufacturer_select = Select(title="Manufacturer:",
                                     value="All",
                                     options=["All"] + manufacturers)
        manufacturer_select.on_change('value', self.on_manufacturer_change)
        model_select = Select(title="Model:",
                              value="All",
                              options=["All"] + models)
        model_select.on_change('value', self.on_model_change)
        transmission_select = Select(title="Transmission:",
                                     value="All",
                                     options=["All"] + transmissions)
        transmission_select.on_change('value', self.on_transmission_change)
        drive_select = Select(title="Drive:",
                              value="All",
                              options=["All"] + drives)
        drive_select.on_change('value', self.on_drive_change)
        class_select = Select(title="Class:",
                              value="All",
                              options=["All"] + classes)
        class_select.on_change('value', self.on_class_change)

        columns = [
            TableColumn(field="manufacturer",
                        title="Manufacturer",
                        editor=SelectEditor(options=manufacturers),
                        formatter=StringFormatter(font_style="bold")),
            TableColumn(field="model",
                        title="Model",
                        editor=StringEditor(completions=models)),
            TableColumn(field="displ",
                        title="Displacement",
                        editor=NumberEditor(step=0.1),
                        formatter=NumberFormatter(format="0.0")),
            TableColumn(field="year", title="Year", editor=IntEditor()),
            TableColumn(field="cyl", title="Cylinders", editor=IntEditor()),
            TableColumn(field="trans",
                        title="Transmission",
                        editor=SelectEditor(options=transmissions)),
            TableColumn(field="drv",
                        title="Drive",
                        editor=SelectEditor(options=drives)),
            TableColumn(field="class",
                        title="Class",
                        editor=SelectEditor(options=classes)),
            TableColumn(field="cty", title="City MPG", editor=IntEditor()),
            TableColumn(field="hwy", title="Highway MPG", editor=IntEditor()),
        ]
        data_table = DataTable(source=self.source,
                               columns=columns,
                               editable=True)

        xdr = DataRange1d()
        ydr = DataRange1d()
        plot = Plot(title=None,
                    x_range=xdr,
                    y_range=ydr,
                    plot_width=800,
                    plot_height=300)
        xaxis = LinearAxis(plot=plot)
        plot.below.append(xaxis)
        yaxis = LinearAxis(plot=plot)
        ygrid = Grid(plot=plot, dimension=1, ticker=yaxis.ticker)
        plot.left.append(yaxis)
        cty_glyph = Circle(x="index",
                           y="cty",
                           fill_color="#396285",
                           size=8,
                           fill_alpha=0.5,
                           line_alpha=0.5)
        hwy_glyph = Circle(x="index",
                           y="hwy",
                           fill_color="#CE603D",
                           size=8,
                           fill_alpha=0.5,
                           line_alpha=0.5)
        cty = GlyphRenderer(data_source=self.source, glyph=cty_glyph)
        hwy = GlyphRenderer(data_source=self.source, glyph=hwy_glyph)
        tooltips = [
            ("Manufacturer", "@manufacturer"),
            ("Model", "@model"),
            ("Displacement", "@displ"),
            ("Year", "@year"),
            ("Cylinders", "@cyl"),
            ("Transmission", "@trans"),
            ("Drive", "@drv"),
            ("Class", "@class"),
        ]
        cty_hover_tool = HoverTool(plot=plot,
                                   renderers=[cty],
                                   tooltips=tooltips + [("City MPG", "@cty")])
        hwy_hover_tool = HoverTool(plot=plot,
                                   renderers=[hwy],
                                   tooltips=tooltips +
                                   [("Highway MPG", "@hwy")])
        select_tool = BoxSelectTool(plot=plot,
                                    renderers=[cty, hwy],
                                    dimensions=['width'])
        plot.tools.extend([cty_hover_tool, hwy_hover_tool, select_tool])
        plot.renderers.extend([cty, hwy, ygrid])

        controls = VBox(children=[
            manufacturer_select, model_select, transmission_select,
            drive_select, class_select
        ],
                        width=200)
        top_panel = HBox(children=[controls, plot])
        layout = VBox(children=[top_panel, data_table])

        return layout

    def on_manufacturer_change(self, obj, attr, _, value):
        self.manufacturer_filter = None if value == "All" else value
        self.update_data()

    def on_model_change(self, obj, attr, _, value):
        self.model_filter = None if value == "All" else value
        self.update_data()

    def on_transmission_change(self, obj, attr, _, value):
        self.transmission_filter = None if value == "All" else value
        self.update_data()

    def on_drive_change(self, obj, attr, _, value):
        self.drive_filter = None if value == "All" else value
        self.update_data()

    def on_class_change(self, obj, attr, _, value):
        self.class_filter = None if value == "All" else value
        self.update_data()

    def update_data(self):
        df = mpg
        if self.manufacturer_filter:
            df = df[df["manufacturer"] == self.manufacturer_filter]
        if self.model_filter:
            df = df[df["model"] == self.model_filter]
        if self.transmission_filter:
            df = df[df["trans"] == self.transmission_filter]
        if self.drive_filter:
            df = df[df["drv"] == self.drive_filter]
        if self.class_filter:
            df = df[df["class"] == self.class_filter]
        self.source.data = ColumnDataSource.from_df(df)
        self.session.store_document(self.document)

    def run(self, do_view=False, poll_interval=0.5):
        link = self.session.object_link(self.document.context)
        print("Please visit %s to see the plots" % link)
        if do_view: view(link)
        print("\npress ctrl-C to exit")
        self.session.poll_document(self.document)
Beispiel #17
0
class Population(object):

    year = 2010
    location = "World"

    def __init__(self):
        from bokeh.models import ColumnDataSource
        from bokeh.document import Document
        from bokeh.session import Session
        from bokeh.sampledata.population import load_population

        self.document = Document()
        self.session = Session()
        self.session.use_doc('population_reveal')
        self.session.load_document(self.document)

        self.df = load_population()
        self.source_pyramid = ColumnDataSource(data=dict())

        # just render at the initialization
        self._render()

    def _render(self):
        self.pyramid_plot()
        self.create_layout()
        self.document.add(self.layout)
        self.update_pyramid()

    def pyramid_plot(self):
        from bokeh.models import (Plot, DataRange1d, LinearAxis, Grid,
                                  Legend, SingleIntervalTicker)
        from bokeh.models.glyphs import Quad

        xdr = DataRange1d(sources=[self.source_pyramid.columns("male"),
                                   self.source_pyramid.columns("female")])
        ydr = DataRange1d(sources=[self.source_pyramid.columns("groups")])

        self.plot = Plot(title="Widgets", x_range=xdr, y_range=ydr,
                         plot_width=600, plot_height=600)

        xaxis = LinearAxis()
        self.plot.add_layout(xaxis, 'below')
        yaxis = LinearAxis(ticker=SingleIntervalTicker(interval=5))
        self.plot.add_layout(yaxis, 'left')

        self.plot.add_layout(Grid(dimension=0, ticker=xaxis.ticker))
        self.plot.add_layout(Grid(dimension=1, ticker=yaxis.ticker))

        male_quad = Quad(left="male", right=0, bottom="groups", top="shifted",
                         fill_color="#3B8686")
        male_quad_glyph = self.plot.add_glyph(self.source_pyramid, male_quad)

        female_quad = Quad(left=0, right="female", bottom="groups", top="shifted",
                           fill_color="#CFF09E")
        female_quad_glyph = self.plot.add_glyph(self.source_pyramid, female_quad)

        self.plot.add_layout(Legend(legends=dict(Male=[male_quad_glyph],
                                                 Female=[female_quad_glyph])))

    def on_year_change(self, obj, attr, old, new):
        self.year = int(new)
        self.update_pyramid()

    def on_location_change(self, obj, attr, old, new):
        self.location = new
        self.update_pyramid()

    def create_layout(self):
        from bokeh.models.widgets import Select, HBox, VBox

        years = list(map(str, sorted(self.df.Year.unique())))
        locations = sorted(self.df.Location.unique())

        year_select = Select(title="Year:", value="2010", options=years)
        location_select = Select(title="Location:", value="World", options=locations)

        year_select.on_change('value', self.on_year_change)
        location_select.on_change('value', self.on_location_change)

        controls = HBox(year_select, location_select)
        self.layout = VBox(controls, self.plot)

    def update_pyramid(self):
        pyramid = self.df[(self.df.Location == self.location) & (self.df.Year == self.year)]

        male = pyramid[pyramid.Sex == "Male"]
        female = pyramid[pyramid.Sex == "Female"]

        total = male.Value.sum() + female.Value.sum()

        male_percent = -male.Value / total
        female_percent = female.Value / total

        groups = male.AgeGrpStart.tolist()
        shifted = groups[1:] + [groups[-1] + 5]

        self.source_pyramid.data = dict(
            groups=groups,
            shifted=shifted,
            male=male_percent,
            female=female_percent,
        )
        self.session.store_document(self.document)
Beispiel #18
0
class Population(object):

    year = 2010
    location = "World"

    def __init__(self):
        from bokeh.document import Document
        from bokeh.session import Session
        from bokeh.models import ColumnDataSource
        from bokeh.sampledata.population import load_population

        self.document = Document()
        self.session = Session()
        self.session.use_doc('population')
        self.session.load_document(self.document)

        self.df = load_population()
        self.source_pyramid = ColumnDataSource(data=dict())

    def render(self):
        self.pyramid_plot()
        self.create_layout()
        self.document.add(self.layout)
        self.update_pyramid()

    def pyramid_plot(self):
        from bokeh.models import (
            Plot, DataRange1d, LinearAxis, Grid, Legend,
            SingleIntervalTicker
        )
        from bokeh.models.glyphs import Quad

        xdr = DataRange1d()
        ydr = DataRange1d()

        self.plot = Plot(title=None, x_range=xdr, y_range=ydr,
                         plot_width=600, plot_height=600)

        xaxis = LinearAxis()
        self.plot.add_layout(xaxis, 'below')
        yaxis = LinearAxis(ticker=SingleIntervalTicker(interval=5))
        self.plot.add_layout(yaxis, 'left')

        self.plot.add_layout(Grid(dimension=0, ticker=xaxis.ticker))
        self.plot.add_layout(Grid(dimension=1, ticker=yaxis.ticker))

        male_quad = Quad(left="male", right=0, bottom="groups", top="shifted",
                         fill_color="#3B8686")
        male_quad_glyph = self.plot.add_glyph(self.source_pyramid, male_quad)

        female_quad = Quad(left=0, right="female", bottom="groups", top="shifted",
                           fill_color="#CFF09E")
        female_quad_glyph = self.plot.add_glyph(self.source_pyramid, female_quad)

        self.plot.add_layout(Legend(legends=dict(Male=[male_quad_glyph],
                                                 Female=[female_quad_glyph])))

    def on_year_change(self, obj, attr, old, new):
        self.year = int(new)
        self.update_pyramid()

    def on_location_change(self, obj, attr, old, new):
        self.location = new
        self.update_pyramid()

    def create_layout(self):
        from bokeh.models.widgets import Select, HBox, VBox

        years = list(map(str, sorted(self.df.Year.unique())))
        locations = sorted(self.df.Location.unique())

        year_select = Select(title="Year:", value="2010", options=years)
        location_select = Select(title="Location:", value="World", options=locations)

        year_select.on_change('value', self.on_year_change)
        location_select.on_change('value', self.on_location_change)

        controls = HBox(year_select, location_select)
        self.layout = VBox(controls, self.plot)

    def update_pyramid(self):
        pyramid = self.df[(self.df.Location == self.location) & (self.df.Year == self.year)]

        male = pyramid[pyramid.Sex == "Male"]
        female = pyramid[pyramid.Sex == "Female"]

        total = male.Value.sum() + female.Value.sum()

        male_percent = -male.Value / total
        female_percent = female.Value / total

        groups = male.AgeGrpStart.tolist()
        shifted = groups[1:] + [groups[-1] + 5]

        self.source_pyramid.data = dict(
            groups=groups,
            shifted=shifted,
            male=male_percent,
            female=female_percent,
        )
        self.session.store_document(self.document)
Beispiel #19
0
class NutchUrlTrails:
    """
    Class for managing URL Trails visualizations
    """

    @staticmethod
    def strip_url(url):
        """
        Make a URL safe for visualization in Bokeh server
        :param url: a URL to be shortened/stripped
        :return: The stripped URL
        """
        # TODO: remove protocol-stripping on next Bokeh release
        stripped_url = url.replace('https://', '').replace('http://', '').replace(':', '_').replace('-', '_')

        if len(stripped_url) <= URL_CHAR_WIDTH:
            return stripped_url
        else:
            return stripped_url[:int(URL_CHAR_WIDTH/2)] + '...' + stripped_url[-int(URL_CHAR_WIDTH/2)-3:]

    @staticmethod
    def jtime_to_datetime(t):
        """
        Convert a Java-format Epoch time stamp into np.datetime64 object
        :param t: Java-format Epoch time stamp (milliseconds)
        :return: A np.datetime64 scalar
        """
        return np.datetime64(datetime.fromtimestamp(t/1000.0))

    def __init__(self, crawl_name, num_urls=DEFAULT_NUM_URLS):
        """
        Create a NutchUrlTrails instance for visualizing a running Nutch crawl in real-time using Bokeh
        :param name: The name of the crawl (as identified by the queue)
        :param num_urls: The number of URLs to display in the visualization
        :return: A NutchUrLTrails instance
        """
        self.crawl_name = crawl_name
        self.num_urls = num_urls
        self.open_urls = {}
        self.closed_urls = {}
        self.old_segments = None
        self.old_circles = None
        
        self.session = Session()
        self.session.use_doc(self.crawl_name)
        self.document = Document()

        con = Connection()

        exchange = Exchange(EXCHANGE_NAME, 'direct', durable=False)
        queue = Queue(crawl_name, exchange=exchange, routing_key=crawl_name)
        self.queue = con.SimpleQueue(name=queue)

    def handle_messages(self):
        """
        Get and parse up to 250 messages from the queue then plot.  Break early if less.
        """

        for i in range(250):
            try:
                m = self.queue.get(block=True, timeout=1)
                self.parse_message(m)
            except Empty:
                break
        self.plot_urls()

    def parse_message(self, message):
        """
        Parse a single message arriving from the queue.  Updates list of open/closed urls.
        :param message: A message from the queue
        """
        print(message.body)
        message = json.loads(message.body)
        url = message["url"]
        if message["eventType"] == "START":
            self.open_urls[url] = NutchUrlTrails.jtime_to_datetime(message["timestamp"])
        elif message["eventType"] == "END":
            if url in self.open_urls:
                self.closed_urls[url] = (self.open_urls[url], NutchUrlTrails.jtime_to_datetime(message["timestamp"]))
                del self.open_urls[url]
            else:
                # TODO: Log mismatched messages instead of just swallowing them
                pass
        else:
            raise Exception("Unexpected message type")

    def plot_urls(self):
        """
        Visualize crawler activity by showing the most recently crawled URLs and the fetch time.
        """

        self.session.load_document(self.document)
        plot = self.document.context.children[0]

        # don't plot if no URLs available
        if not (self.open_urls or self.closed_urls):
            return

        # x0/x0, left and right boundaries of segments, correspond to fetch time
        x0 = []
        x = []
        # y-axis, name of URL being fetched
        urls = []

        # maintain x and URL of circles in a separate list
        circles = []
        circle_urls = []

        current_time = np.datetime64(datetime.now())

        # For open URLs (not completed fetching), draw a segment from start time to now
        for url, start_t in self.open_urls.items():
            url = NutchUrlTrails.strip_url(url)
            x0.append(start_t)
            x.append(current_time)
            urls.append(url)

        # For closed URLs (completed fetching), draw a segment from start to end time, and a circle as well.
        for url, (start_t, end_t) in self.closed_urls.items():
            url = NutchUrlTrails.strip_url(url)
            x0.append(start_t)
            x.append(end_t)
            circles.append(end_t)
            urls.append(url)
            circle_urls.append(url)

        x0 = np.asarray(x0)
        x = np.asarray(x)
        circles = np.asarray(circles)

        # sort segments
        sort_index = np.argsort(x0)[::-1]
        x0 = x0[sort_index]
        x = x[sort_index]
        urls = [urls[i] for i in sort_index]

        # sort circles
        if self.closed_urls:
            circle_sort_index = np.argsort(circles)[::-1]
            circles = circles[circle_sort_index]
            circle_urls = [circle_urls[i] for i in circle_sort_index]

        # Filter to latest num_url URLs (ascending order)
        # filter segments
        active_x0 = x0[:self.num_urls]
        active_x = x[:self.num_urls]
        active_urls = urls[:self.num_urls]

        min_x = min(active_x0)
        plot.x_range.start = min_x
        plot.x_range.end = np.datetime64(datetime.now())
        plot.y_range.factors = active_urls

        # make sure these are turned back on!
        # turn y axis grid lines back on
        for r in plot.renderers:
            if type(r) == Grid:
                r.grid_line_color = 'black'
                break

        # turn tickers and their labels back on
        plot.right[0].minor_tick_line_color = 'black'
        plot.right[0].major_tick_line_color = 'black'
        plot.right[0].major_label_text_font_size = '12pt'
        plot.below[0].minor_tick_line_color = 'black'
        plot.below[0].major_tick_line_color = 'black'
        plot.below[0].major_label_text_font_size = '12pt'

        # TODO: Find a more correct way to remove old segment/circle glyphs
        if self.old_circles:
            plot.renderers.pop()
            self.old_circles = None
        if self.old_segments:
            plot.renderers.pop()
            self.old_segments = None

        segment_source = ColumnDataSource(dict(x0=active_x0,
                                               x1=active_x,
                                               urls=active_urls))

        self.old_segments = Segment(x0="x0", y0="urls", x1="x1", y1="urls", line_color="orange", line_width=10)
        plot.add_glyph(segment_source, self.old_segments)

        if self.closed_urls and PLOT_CIRCLES:
            # filter circles (some of these might not be displayed)
            active_circles = circles[:self.num_urls]
            active_circle_urls = circle_urls[:self.num_urls]

            circle_source = ColumnDataSource(dict(x=active_circles, urls=active_circle_urls))

            self.old_circles = Circle(x="x", y="urls", size=12, fill_color="green", line_color="orange", line_width=2)
            plot.add_glyph(circle_source, self.old_circles)

        self.session.store_document(self.document, dirty_only=False)