Ejemplo n.º 1
0
def init_plot(crawl_name):
    session = Session()
    document = Document()
    session.use_doc(crawl_name)
    session.load_document(document)

    if document.context.children:
        plot = document.context.children[0]
    else:
        output_server(crawl_name)
        # TODO: Remove these when Bokeh is upgraded
        # placeholders or Bokeh can't inject properly
        current = np.datetime64(datetime.now())
        xdr = Range1d(current, current + 1)
        ydr = ["urls"]

        # styling suggested by Bryan
        plot = figure(title="Crawler Monitor",
                      tools="hover",
                      x_axis_type="datetime",
                      y_axis_location="right",
                      x_range=xdr,
                      y_range=ydr,
                      width=1200,
                      height=600)
        plot.toolbar_location = None
        plot.xgrid.grid_line_color = None

    document.add(plot)
    session.store_document(document)
    script = autoload_server(plot, session)

    #TODO: Looks like a Bokeh bug, probably not repeatable with current code
    script = script.replace("'modelid': u'", "'modelid': '")
    return script
Ejemplo n.º 2
0
def init_plot(crawl_name):
    session = Session()
    document = Document()
    session.use_doc(crawl_name)
    session.load_document(document)

    if document.context.children:
        plot = document.context.children[0]
    else:
        output_server(crawl_name)
        # TODO: Remove these when Bokeh is upgraded
        # placeholders or Bokeh can't inject properly
        current = np.datetime64(datetime.now())
        xdr = Range1d(current, current + 1)
        ydr = ["urls"]

        # styling suggested by Bryan
        plot = figure(title="Crawler Monitor", tools="hover",
                      x_axis_type="datetime", y_axis_location="right", x_range=xdr, y_range=ydr,
                      width=1200, height=600)
        plot.toolbar_location = None
        plot.xgrid.grid_line_color = None

    document.add(plot)
    session.store_document(document)
    script = autoload_server(plot, session)

    #TODO: Looks like a Bokeh bug, probably not repeatable with current code
    script = script.replace("'modelid': u'", "'modelid': '")
    return script
Ejemplo n.º 3
0
        def wrapper(*args, **kwargs):
            document = Document()
            session = Session(name=url, root_url=url)
            session.use_doc(document.docid)
            session.load_document(document)
            session.publish()
            document.autoadd = False
            document.autostore = False

            obj = func(*args, **kwargs)
            obj._docid = session.docid
            obj._root_url = session.root_url

            document.add(obj)
            session.store_document(document)
            return obj
Ejemplo n.º 4
0
        def wrapper(*args, **kwargs):
            docname = prefix + str(uuid.uuid4())
            session = Session(name=url, root_url=url)
            session.use_doc(docname)
            session.load_document(curdoc())
            curdoc().autoadd = False
            curdoc().autostore = False

            obj = func(*args, **kwargs)
            tag = embed.autoload_server(obj, session)
            obj.tag = tag

            curdoc().add(obj)
            changed = session.store_document(curdoc())

            logger.debug("stored: %s", str(changed))

            return obj
Ejemplo n.º 5
0
        def wrapper(*args, **kwargs):
            docname = prefix + str(uuid.uuid4())
            session = Session(name=url, root_url=url)
            session.use_doc(docname)
            session.load_document(curdoc())
            curdoc().autoadd = False
            curdoc().autostore = False

            obj = func(*args, **kwargs)
            tag = embed.autoload_server(obj, session)
            obj.tag = tag

            curdoc().add(obj)
            changed = session.store_document(curdoc())

            logger.debug("stored: %s", str(changed))

            return obj
Ejemplo n.º 6
0
import pandas as pd
from bokeh.models import (Plot, ColumnDataSource, DataRange1d, FactorRange,
                           LinearAxis, CategoricalAxis, Grid, Glyph)
from bokeh.models.widgets import (DateRangeSlider, HBox, VBox, Paragraph,
                                  Select, VBoxModelForm)
from bokeh.glyphs import Rect
from bokeh.document import Document
from bokeh.session import Session

import employment_data_reader as emp
from employment_utils import get_country_for_byte, get_jobtype_for_byte

document = Document()
session = Session()
session.use_doc('employment_server')
session.load_document(document)

days_of_week = {0: "Monday", 1: "Tuesday", 2: "Wednesday", 3: "Thursday",
                4: "Friday", 5: "Saturday", 6: "Sunday"}
bounds_start = datetime.date(2012, 11, 1)
bounds_end = datetime.date(2013, 12, 30)
start = datetime.date(2013, 5, 26)
end = datetime.date(2013, 7, 5)
source_country = ColumnDataSource(data=dict())
source_dow = ColumnDataSource(data=dict())
source_jobtype = ColumnDataSource(data=dict())
source_par = Paragraph()
country_choices = ["Dominican Republic___________________",
                   "Colombia_____________________________",
                   "Mexico_______________________________",
                   "Peru_________________________________",
Ejemplo n.º 7
0
class Population(object):

    year = 2010
    location = "World"

    def __init__(self):
        from bokeh.objects import ColumnDataSource
        from bokeh.document import Document
        from bokeh.session import Session
        from bokeh.sampledata.population import load_population

        self.document = Document()
        self.session = Session()
        self.session.use_doc('population_reveal')
        self.session.load_document(self.document)

        self.df = load_population()
        self.source_pyramid = ColumnDataSource(data=dict())

    def render(self):
        self.pyramid_plot()
        self.create_layout()
        self.document.add(self.layout)
        self.update_pyramid()

    def pyramid_plot(self):
        from bokeh.objects import (Plot, DataRange1d, LinearAxis, Grid, Glyph,
                                   Legend, SingleIntervalTicker)
        from bokeh.glyphs import Quad

        xdr = DataRange1d(sources=[self.source_pyramid.columns("male"),
                                   self.source_pyramid.columns("female")])
        ydr = DataRange1d(sources=[self.source_pyramid.columns("groups")])

        self.plot = Plot(title="Widgets", x_range=xdr, y_range=ydr, plot_width=600, plot_height=600)

        xaxis = LinearAxis(plot=self.plot)
        self.plot.below.append(xaxis)
        yaxis = LinearAxis(plot=self.plot, ticker=SingleIntervalTicker(interval=5))
        self.plot.left.append(yaxis)

        xgrid = Grid(plot=self.plot, dimension=0, ticker=xaxis.ticker)
        ygrid = Grid(plot=self.plot, dimension=1, ticker=yaxis.ticker)

        male_quad = Quad(left="male", right=0, bottom="groups", top="shifted", fill_color="blue")
        male_quad_glyph = Glyph(data_source=self.source_pyramid,
                                xdata_range=xdr, ydata_range=ydr, glyph=male_quad)
        self.plot.renderers.append(male_quad_glyph)

        female_quad = Quad(left=0, right="female", bottom="groups", top="shifted",
                           fill_color="violet")
        female_quad_glyph = Glyph(data_source=self.source_pyramid,
                                  xdata_range=xdr, ydata_range=ydr, glyph=female_quad)
        self.plot.renderers.append(female_quad_glyph)

        legend = Legend(plot=self.plot, legends=dict(Male=[male_quad_glyph],
                        Female=[female_quad_glyph]))
        self.plot.renderers.append(legend)

    def on_year_change(self, obj, attr, old, new):
        self.year = int(new)
        self.update_pyramid()

    def on_location_change(self, obj, attr, old, new):
        self.location = new
        self.update_pyramid()

    def create_layout(self):
        from bokeh.widgetobjects import Select, HBox, VBox

        years = list(map(str, sorted(self.df.Year.unique())))
        locations = sorted(self.df.Location.unique())

        year_select = Select(title="Year:", value="2010", options=years)
        location_select = Select(title="Location:", value="World", options=locations)

        year_select.on_change('value', self.on_year_change)
        location_select.on_change('value', self.on_location_change)

        controls = HBox(children=[year_select, location_select])
        self.layout = VBox(children=[controls, self.plot])

    def update_pyramid(self):
        pyramid = self.df[(self.df.Location == self.location) & (self.df.Year == self.year)]

        male = pyramid[pyramid.Sex == "Male"]
        female = pyramid[pyramid.Sex == "Female"]

        total = male.Value.sum() + female.Value.sum()

        male_percent = -male.Value / total
        female_percent = female.Value / total

        groups = male.AgeGrpStart.tolist()
        shifted = groups[1:] + [groups[-1] + 5]

        self.source_pyramid.data = dict(
            groups=groups,
            shifted=shifted,
            male=male_percent,
            female=female_percent,
        )
        self.session.store_document(self.document)
Ejemplo n.º 8
0
class Dashboard(object):
    """
    A collection of plots and widgets served by bokeh-server.
    """

    def __init__(self,
            port = BK_SERVER_PORT,
            doc_name = DOC_NAME,
            colors = COLORS,
            show = False,
        ):
        """
        Initialize Bokeh session and document.
        """

        self.document = Document()

        self.port = port
        self.show = show
        self.doc_name = doc_name

        name="http://*****:*****@type"),
        ])

        seabornify(plot)
        return plot

# Update Routine
# --------------

    def update(self):
        self.session.store_objects(self.data_source)

# Run
# ---

    def run(self, poll_interval=1):

        self.session.store_document(self.document)

        link = self.session.object_link(self.document.context)
        if self.show:
            import webbrowser
            webbrowser.open(link)
        else:
            print("Please visit %s to see the plots (press ctrl-C to exit)" % link)

        try:
            while True:
                self.session.load_document(self.document)
                sleep(poll_interval)
        except KeyboardInterrupt:
            print()
        except ConnectionError:
            print("Connection to bokeh-server was terminated.")

if __name__ == "__main__":

    args = parse_args()

    dashboard = Dashboard(**vars(args))
    dashboard.run()
Ejemplo n.º 9
0
class DashBoard(object):

    def __init__(self):
        self.document = Document()
        self.session = Session()
        self.session.use_doc('crawler_dashboard')
        self.session.load_document(self.document)

        #self.harvest_source = ColumnDataSource(data=dict())
        #self.domain_relevant_source = ColumnDataSource(data=dict())
        #self.domain_crawled_source = ColumnDataSource(data=dict())
        #self.domain_frontier_source = ColumnDataSource(data=dict())
        #self.handson_source = ColumnDataSource(data=dict())
        #self.termite_source = ColumnDataSource(data=dict())

        self.harvest = Harvest()
        self.domain = Domain()
        #handson = Handson()
        self.termite = Termite()

        self.document.add(self.create_layout())
        self.session.store_document(self.document)

    def render(self):
        self.create_layout()
        self.document.add(self.layout)
        self.update_data()

    def create_layout(self):

        #button = Button(label="Randomize data", type="success")
        #button.on_click(update_data)
        #top_panel = HBox(children=[button, self.harvest.plot, self.harvest.rate_plot])
        top_panel = HBox(children=[self.harvest.plot, self.harvest.rate_plot])
        domains = VBox(children=[self.domain.sort_relevant_plot, self.domain.sort_crawled_plot, self.domain.sort_frontier_plot], width=200)   
        #middle_panel = HBox(children=[domains, handson.plot])
        middle_panel = HBox(children=[domains])
        layout = VBox(children=[top_panel, middle_panel, self.termite.plot])
        self.layout = layout
        return layout

    def update_data(self):

        self.harvest.source = self.harvest.update_source()
        self.domain.sort_relevant_source, self.domain.sort_crawled_source, self.domain.sort_frontier_source = self.domain.update_source()
        self.termite.data, self.termite.source = self.termite.update_source()
        #self.session.store_objects(ds)
        self.session.store_document(self.document)

        
    def run(self, poll_interval=0.5):
        #link = self.session.object_link(self.document.context)
        #print("Please visit %s to see the plots (press ctrl-C to exit)" % link)

        try:
            while True:
                self.update_data()
                self.session.load_document(self.document)
                time.sleep(poll_interval)
        except KeyboardInterrupt:
            print()
        except ConnectionError:
            print("Connection to bokeh-server was terminated")
Ejemplo n.º 10
0
class DataTables(object):

    def __init__(self):
        self.document = Document()
        self.session = Session()
        self.session.use_doc('data_tables_server')
        self.session.load_document(self.document)

        self.manufacturer_filter = None
        self.model_filter = None
        self.transmission_filter = None
        self.drive_filter = None
        self.class_filter = None

        self.source = ColumnDataSource()
        self.update_data()

        self.document.add(self.create())
        self.session.store_document(self.document)

    def create(self):
        manufacturers = sorted(mpg["manufacturer"].unique())
        models = sorted(mpg["model"].unique())
        transmissions = sorted(mpg["trans"].unique())
        drives = sorted(mpg["drv"].unique())
        classes = sorted(mpg["class"].unique())

        manufacturer_select = Select(title="Manufacturer:", value="All", options=["All"] + manufacturers)
        manufacturer_select.on_change('value', self.on_manufacturer_change)
        model_select = Select(title="Model:", value="All", options=["All"] + models)
        model_select.on_change('value', self.on_model_change)
        transmission_select = Select(title="Transmission:", value="All", options=["All"] + transmissions)
        transmission_select.on_change('value', self.on_transmission_change)
        drive_select = Select(title="Drive:", value="All", options=["All"] + drives)
        drive_select.on_change('value', self.on_drive_change)
        class_select = Select(title="Class:", value="All", options=["All"] + classes)
        class_select.on_change('value', self.on_class_change)

        columns = [
            TableColumn(field="manufacturer", title="Manufacturer", editor=SelectEditor(options=manufacturers), formatter=StringFormatter(font_style="bold")),
            TableColumn(field="model",        title="Model",        editor=StringEditor(completions=models)),
            TableColumn(field="displ",        title="Displacement", editor=NumberEditor(step=0.1),              formatter=NumberFormatter(format="0.0")),
            TableColumn(field="year",         title="Year",         editor=IntEditor()),
            TableColumn(field="cyl",          title="Cylinders",    editor=IntEditor()),
            TableColumn(field="trans",        title="Transmission", editor=SelectEditor(options=transmissions)),
            TableColumn(field="drv",          title="Drive",        editor=SelectEditor(options=drives)),
            TableColumn(field="class",        title="Class",        editor=SelectEditor(options=classes)),
            TableColumn(field="cty",          title="City MPG",     editor=IntEditor()),
            TableColumn(field="hwy",          title="Highway MPG",  editor=IntEditor()),
        ]
        data_table = DataTable(source=self.source, columns=columns, editable=True)

        xdr = DataRange1d()
        ydr = DataRange1d()
        plot = Plot(title=None, x_range=xdr, y_range=ydr, plot_width=800, plot_height=300)
        xaxis = LinearAxis(plot=plot)
        plot.below.append(xaxis)
        yaxis = LinearAxis(plot=plot)
        ygrid = Grid(plot=plot, dimension=1, ticker=yaxis.ticker)
        plot.left.append(yaxis)
        cty_glyph = Circle(x="index", y="cty", fill_color="#396285", size=8, fill_alpha=0.5, line_alpha=0.5)
        hwy_glyph = Circle(x="index", y="hwy", fill_color="#CE603D", size=8, fill_alpha=0.5, line_alpha=0.5)
        cty = GlyphRenderer(data_source=self.source, glyph=cty_glyph)
        hwy = GlyphRenderer(data_source=self.source, glyph=hwy_glyph)
        tooltips = [
            ("Manufacturer", "@manufacturer"),
            ("Model", "@model"),
            ("Displacement", "@displ"),
            ("Year", "@year"),
            ("Cylinders", "@cyl"),
            ("Transmission", "@trans"),
            ("Drive", "@drv"),
            ("Class", "@class"),
        ]
        cty_hover_tool = HoverTool(plot=plot, renderers=[cty], tooltips=tooltips + [("City MPG", "@cty")])
        hwy_hover_tool = HoverTool(plot=plot, renderers=[hwy], tooltips=tooltips + [("Highway MPG", "@hwy")])
        select_tool = BoxSelectTool(plot=plot, renderers=[cty, hwy], dimensions=['width'])
        plot.tools.extend([cty_hover_tool, hwy_hover_tool, select_tool])
        plot.renderers.extend([cty, hwy, ygrid])

        controls = VBox(children=[manufacturer_select, model_select, transmission_select, drive_select, class_select], width=200)
        top_panel = HBox(children=[controls, plot])
        layout = VBox(children=[top_panel, data_table])

        return layout

    def on_manufacturer_change(self, obj, attr, _, value):
        self.manufacturer_filter = None if value == "All" else value
        self.update_data()

    def on_model_change(self, obj, attr, _, value):
        self.model_filter = None if value == "All" else value
        self.update_data()

    def on_transmission_change(self, obj, attr, _, value):
        self.transmission_filter = None if value == "All" else value
        self.update_data()

    def on_drive_change(self, obj, attr, _, value):
        self.drive_filter = None if value == "All" else value
        self.update_data()

    def on_class_change(self, obj, attr, _, value):
        self.class_filter = None if value == "All" else value
        self.update_data()

    def update_data(self):
        df = mpg
        if self.manufacturer_filter:
            df = df[df["manufacturer"] == self.manufacturer_filter]
        if self.model_filter:
            df = df[df["model"] == self.model_filter]
        if self.transmission_filter:
            df = df[df["trans"] == self.transmission_filter]
        if self.drive_filter:
            df = df[df["drv"] == self.drive_filter]
        if self.class_filter:
            df = df[df["class"] == self.class_filter]
        self.source.data = ColumnDataSource.from_df(df)
        self.session.store_document(self.document)

    def run(self, do_view=False, poll_interval=0.5):
        link = self.session.object_link(self.document.context)
        print("Please visit %s to see the plots" % link)
        if do_view: view(link)
        print("\npress ctrl-C to exit")
        self.session.poll_document(self.document)
Ejemplo n.º 11
0
class DataTables(object):
    def __init__(self):
        self.document = Document()
        self.session = Session()
        self.session.use_doc('data_tables_server')
        self.session.load_document(self.document)

        self.manufacturer_filter = None
        self.model_filter = None
        self.transmission_filter = None
        self.drive_filter = None
        self.class_filter = None

        self.source = ColumnDataSource()
        self.update_data()

        self.document.add(self.create())
        self.session.store_document(self.document)

    def create(self):
        manufacturers = sorted(mpg["manufacturer"].unique())
        models = sorted(mpg["model"].unique())
        transmissions = sorted(mpg["trans"].unique())
        drives = sorted(mpg["drv"].unique())
        classes = sorted(mpg["class"].unique())

        manufacturer_select = Select(title="Manufacturer:",
                                     value="All",
                                     options=["All"] + manufacturers)
        manufacturer_select.on_change('value', self.on_manufacturer_change)
        model_select = Select(title="Model:",
                              value="All",
                              options=["All"] + models)
        model_select.on_change('value', self.on_model_change)
        transmission_select = Select(title="Transmission:",
                                     value="All",
                                     options=["All"] + transmissions)
        transmission_select.on_change('value', self.on_transmission_change)
        drive_select = Select(title="Drive:",
                              value="All",
                              options=["All"] + drives)
        drive_select.on_change('value', self.on_drive_change)
        class_select = Select(title="Class:",
                              value="All",
                              options=["All"] + classes)
        class_select.on_change('value', self.on_class_change)

        columns = [
            TableColumn(field="manufacturer",
                        header="Manufacturer",
                        type="autocomplete",
                        source=manufacturers),
            TableColumn(field="model",
                        header="Model",
                        type="autocomplete",
                        source=models),
            TableColumn(field="displ",
                        header="Displacement",
                        type="numeric",
                        format="0.00"),
            TableColumn(field="year", header="Year", type="numeric"),
            TableColumn(field="cyl", header="Cylinders", type="numeric"),
            TableColumn(field="trans",
                        header="Transmission",
                        type="dropdown",
                        strict=True,
                        source=transmissions),
            TableColumn(field="drv",
                        header="Drive",
                        type="autocomplete",
                        strict=True,
                        source=drives),
            TableColumn(field="class",
                        header="Class",
                        type="autocomplete",
                        strict=True,
                        source=classes),
            TableColumn(field="cty", header="City MPG", type="numeric"),
            TableColumn(field="hwy", header="Highway MPG", type="numeric"),
        ]
        handson_table = HandsonTable(source=self.source,
                                     columns=columns,
                                     sorting=True)

        xdr = DataRange1d(sources=[self.source.columns("index")])
        #xdr = FactorRange(factors=manufacturers)
        ydr = DataRange1d(
            sources=[self.source.columns("cty"),
                     self.source.columns("hwy")])
        plot = Plot(title=None,
                    data_sources=[self.source],
                    x_range=xdr,
                    y_range=ydr,
                    plot_width=800,
                    plot_height=300)
        xaxis = LinearAxis(plot=plot)
        plot.below.append(xaxis)
        yaxis = LinearAxis(plot=plot)
        ygrid = Grid(plot=plot, dimension=1, ticker=yaxis.ticker)
        plot.left.append(yaxis)
        cty = Glyph(data_source=self.source,
                    glyph=Circle(x="index",
                                 y="cty",
                                 fill_color="#396285",
                                 size=8,
                                 fill_alpha=0.5,
                                 line_alpha=0.5))
        hwy = Glyph(data_source=self.source,
                    glyph=Circle(x="index",
                                 y="hwy",
                                 fill_color="#CE603D",
                                 size=8,
                                 fill_alpha=0.5,
                                 line_alpha=0.5))
        select_tool = BoxSelectTool(renderers=[cty, hwy], select_y=False)
        plot.tools.append(select_tool)
        overlay = BoxSelectionOverlay(tool=select_tool)
        plot.renderers.extend([cty, hwy, ygrid, overlay])

        controls = VBox(children=[
            manufacturer_select, model_select, transmission_select,
            drive_select, class_select
        ],
                        width=200)
        top_panel = HBox(children=[controls, plot])
        layout = VBox(children=[top_panel, handson_table])

        return layout

    def on_manufacturer_change(self, obj, attr, _, value):
        self.manufacturer_filter = None if value == "All" else value
        self.update_data()

    def on_model_change(self, obj, attr, _, value):
        self.model_filter = None if value == "All" else value
        self.update_data()

    def on_transmission_change(self, obj, attr, _, value):
        self.transmission_filter = None if value == "All" else value
        self.update_data()

    def on_drive_change(self, obj, attr, _, value):
        self.drive_filter = None if value == "All" else value
        self.update_data()

    def on_class_change(self, obj, attr, _, value):
        self.class_filter = None if value == "All" else value
        self.update_data()

    def update_data(self):
        df = mpg
        if self.manufacturer_filter:
            df = df[df["manufacturer"] == self.manufacturer_filter]
        if self.model_filter:
            df = df[df["model"] == self.model_filter]
        if self.transmission_filter:
            df = df[df["trans"] == self.transmission_filter]
        if self.drive_filter:
            df = df[df["drv"] == self.drive_filter]
        if self.class_filter:
            df = df[df["class"] == self.class_filter]
        self.source.data = ColumnDataSource.from_df(df)
        self.session.store_document(self.document)

    def run(self, do_view=False, poll_interval=0.5):
        link = self.session.object_link(self.document.context)
        print("Please visit %s to see the plots" % link)
        if do_view: view(link)
        print("\npress ctrl-C to exit")
        self.session.poll_document(self.document)
Ejemplo n.º 12
0
class DashBoard(object):
    def __init__(self, path, url):
        self.document = Document()
        self.session = Session(name=url, root_url=url)
        #self.session = Session('load_from_config=False')
        self.session.use_doc('crawler_dashboard')
        self.session.load_document(self.document)

        #self.harvest_source = ColumnDataSource(data=dict())
        #self.domain_relevant_source = ColumnDataSource(data=dict())
        #self.domain_crawled_source = ColumnDataSource(data=dict())
        #self.domain_frontier_source = ColumnDataSource(data=dict())
        #self.handson_source = ColumnDataSource(data=dict())
        #self.termite_source = ColumnDataSource(data=dict())
        self.harvest = Harvest(path)
        self.domain = Domain(path)
        #handson = Handson()
        #self.termite = Termite()

        self.document.add(self.create_layout())
        self.session.store_document(self.document)

    def render(self):
        self.create_layout()
        self.document.add(self.layout)
        self.update_data()

    def create_layout(self):

        #button = Button(label="Randomize data", type="success")
        #button.on_click(update_data)
        #top_panel = HBox(children=[button, self.harvest.plot, self.harvest.rate_plot])
        top_panel = HBox(children=[self.harvest.plot, self.harvest.rate_plot])
        domains = VBox(children=[
            self.domain.sort_relevant_plot, self.domain.sort_crawled_plot,
            self.domain.sort_frontier_plot
        ],
                       width=200)
        #middle_panel = HBox(children=[domains, handson.plot])
        middle_panel = HBox(children=[domains])
        layout = VBox(children=[top_panel, middle_panel])
        self.layout = layout
        return layout

    def update_data(self):

        self.harvest.source = self.harvest.update_source()
        self.domain.sort_relevant_source, self.domain.sort_crawled_source, self.domain.sort_frontier_source = self.domain.update_source(
        )
        #self.termite.data, self.termite.source = self.termite.update_source()
        #self.session.store_objects(ds)
        self.session.store_document(self.document)

    def run(self, poll_interval=0.5):
        #link = self.session.object_link(self.document.context)
        #print("Please visit %s to see the plots (press ctrl-C to exit)" % link)

        try:
            while True:
                self.update_data()
                self.session.load_document(self.document)
                time.sleep(poll_interval)
        except KeyboardInterrupt:
            print()
        except ConnectionError:
            print("Connection to bokeh-server was terminated")
Ejemplo n.º 13
0
class DataTables(object):

    def __init__(self):
        self.document = Document()
        self.session = Session()
        self.session.use_doc('data_tables_server')
        self.session.load_document(self.document)

        self.manufacturer_filter = None
        self.model_filter = None
        self.transmission_filter = None
        self.drive_filter = None
        self.class_filter = None

        self.source = ColumnDataSource()
        self.update_data()

        self.document.add(self.create())
        self.session.store_document(self.document)

    def create(self):
        manufacturers = sorted(mpg["manufacturer"].unique())
        models = sorted(mpg["model"].unique())
        transmissions = sorted(mpg["trans"].unique())
        drives = sorted(mpg["drv"].unique())
        classes = sorted(mpg["class"].unique())

        manufacturer_select = Select(title="Manufacturer:", value="All", options=["All"] + manufacturers)
        manufacturer_select.on_change('value', self.on_manufacturer_change)
        model_select = Select(title="Model:", value="All", options=["All"] + models)
        model_select.on_change('value', self.on_model_change)
        transmission_select = Select(title="Transmission:", value="All", options=["All"] + transmissions)
        transmission_select.on_change('value', self.on_transmission_change)
        drive_select = Select(title="Drive:", value="All", options=["All"] + drives)
        drive_select.on_change('value', self.on_drive_change)
        class_select = Select(title="Class:", value="All", options=["All"] + classes)
        class_select.on_change('value', self.on_class_change)

        columns = [
            TableColumn(field="manufacturer", header="Manufacturer", type="autocomplete", source=manufacturers),
            TableColumn(field="model", header="Model", type="autocomplete", source=models),
            TableColumn(field="displ", header="Displacement", type="numeric", format="0.00"),
            TableColumn(field="year", header="Year", type="numeric"),
            TableColumn(field="cyl", header="Cylinders", type="numeric"),
            TableColumn(field="trans", header="Transmission", type="dropdown", strict=True, source=transmissions),
            TableColumn(field="drv", header="Drive", type="autocomplete", strict=True, source=drives),
            TableColumn(field="class", header="Class", type="autocomplete", strict=True, source=classes),
            TableColumn(field="cty", header="City MPG", type="numeric"),
            TableColumn(field="hwy", header="Highway MPG", type="numeric"),
        ]
        handson_table = HandsonTable(source=self.source, columns=columns, sorting=True)

        xdr = DataRange1d(sources=[self.source.columns("index")])
        #xdr = FactorRange(factors=manufacturers)
        ydr = DataRange1d(sources=[self.source.columns("cty"), self.source.columns("hwy")])
        plot = Plot(title=None, data_sources=[self.source], x_range=xdr, y_range=ydr, plot_width=800, plot_height=300)
        xaxis = LinearAxis(plot=plot)
        plot.below.append(xaxis)
        yaxis = LinearAxis(plot=plot)
        ygrid = Grid(plot=plot, dimension=1, ticker=yaxis.ticker)
        plot.left.append(yaxis)
        cty = Glyph(data_source=self.source, glyph=Circle(x="index", y="cty", fill_color="green"))
        hwy = Glyph(data_source=self.source, glyph=Circle(x="index", y="hwy", fill_color="red"))
        select_tool = BoxSelectTool(renderers=[cty, hwy], select_y=False)
        plot.tools.append(select_tool)
        overlay = BoxSelectionOverlay(tool=select_tool)
        plot.renderers.extend([cty, hwy, ygrid, overlay])

        controls = VBox(children=[manufacturer_select, model_select, transmission_select, drive_select, class_select], width=200)
        top_panel = HBox(children=[controls, plot])
        layout = VBox(children=[top_panel, handson_table])

        return layout

    def on_manufacturer_change(self, obj, attr, _, value):
        self.manufacturer_filter = None if value == "All" else value
        self.update_data()

    def on_model_change(self, obj, attr, _, value):
        self.model_filter = None if value == "All" else value
        self.update_data()

    def on_transmission_change(self, obj, attr, _, value):
        self.transmission_filter = None if value == "All" else value
        self.update_data()

    def on_drive_change(self, obj, attr, _, value):
        self.drive_filter = None if value == "All" else value
        self.update_data()

    def on_class_change(self, obj, attr, _, value):
        self.class_filter = None if value == "All" else value
        self.update_data()

    def update_data(self):
        df = mpg
        if self.manufacturer_filter:
            df = df[df["manufacturer"] == self.manufacturer_filter]
        if self.model_filter:
            df = df[df["model"] == self.model_filter]
        if self.transmission_filter:
            df = df[df["trans"] == self.transmission_filter]
        if self.drive_filter:
            df = df[df["drv"] == self.drive_filter]
        if self.class_filter:
            df = df[df["class"] == self.class_filter]
        self.source.data = ColumnDataSource.from_df(df)
        self.session.store_document(self.document)

    def run(self, poll_interval=0.5):
        link = self.session.object_link(self.document.context)
        print("Please visit %s to see the plots (press ctrl-C to exit)" % link)

        try:
            while True:
                self.session.load_document(self.document)
                time.sleep(poll_interval)
        except KeyboardInterrupt:
            print()
        except ConnectionError:
            print("Connection to bokeh-server was terminated")
Ejemplo n.º 14
0
class DataTables(object):
    def __init__(self):
        self.document = Document()
        self.session = Session()
        self.session.use_doc('data_tables_server')
        self.session.load_document(self.document)

        self.manufacturer_filter = None
        self.model_filter = None
        self.transmission_filter = None
        self.drive_filter = None
        self.class_filter = None

        self.source = ColumnDataSource()
        self.update_data()

        self.document.add(self.create())
        self.session.store_document(self.document)

    def create(self):
        manufacturers = sorted(mpg["manufacturer"].unique())
        models = sorted(mpg["model"].unique())
        transmissions = sorted(mpg["trans"].unique())
        drives = sorted(mpg["drv"].unique())
        classes = sorted(mpg["class"].unique())

        manufacturer_select = Select(title="Manufacturer:",
                                     value="All",
                                     options=["All"] + manufacturers)
        manufacturer_select.on_change('value', self.on_manufacturer_change)
        model_select = Select(title="Model:",
                              value="All",
                              options=["All"] + models)
        model_select.on_change('value', self.on_model_change)
        transmission_select = Select(title="Transmission:",
                                     value="All",
                                     options=["All"] + transmissions)
        transmission_select.on_change('value', self.on_transmission_change)
        drive_select = Select(title="Drive:",
                              value="All",
                              options=["All"] + drives)
        drive_select.on_change('value', self.on_drive_change)
        class_select = Select(title="Class:",
                              value="All",
                              options=["All"] + classes)
        class_select.on_change('value', self.on_class_change)

        columns = [
            TableColumn(field="manufacturer",
                        title="Manufacturer",
                        editor=SelectEditor(options=manufacturers),
                        formatter=StringFormatter(font_style="bold")),
            TableColumn(field="model",
                        title="Model",
                        editor=StringEditor(completions=models)),
            TableColumn(field="displ",
                        title="Displacement",
                        editor=NumberEditor(step=0.1),
                        formatter=NumberFormatter(format="0.0")),
            TableColumn(field="year", title="Year", editor=IntEditor()),
            TableColumn(field="cyl", title="Cylinders", editor=IntEditor()),
            TableColumn(field="trans",
                        title="Transmission",
                        editor=SelectEditor(options=transmissions)),
            TableColumn(field="drv",
                        title="Drive",
                        editor=SelectEditor(options=drives)),
            TableColumn(field="class",
                        title="Class",
                        editor=SelectEditor(options=classes)),
            TableColumn(field="cty", title="City MPG", editor=IntEditor()),
            TableColumn(field="hwy", title="Highway MPG", editor=IntEditor()),
        ]
        data_table = DataTable(source=self.source,
                               columns=columns,
                               editable=True)

        xdr = DataRange1d()
        ydr = DataRange1d()
        plot = Plot(title=None,
                    x_range=xdr,
                    y_range=ydr,
                    plot_width=800,
                    plot_height=300)
        xaxis = LinearAxis(plot=plot)
        plot.below.append(xaxis)
        yaxis = LinearAxis(plot=plot)
        ygrid = Grid(plot=plot, dimension=1, ticker=yaxis.ticker)
        plot.left.append(yaxis)
        cty_glyph = Circle(x="index",
                           y="cty",
                           fill_color="#396285",
                           size=8,
                           fill_alpha=0.5,
                           line_alpha=0.5)
        hwy_glyph = Circle(x="index",
                           y="hwy",
                           fill_color="#CE603D",
                           size=8,
                           fill_alpha=0.5,
                           line_alpha=0.5)
        cty = GlyphRenderer(data_source=self.source, glyph=cty_glyph)
        hwy = GlyphRenderer(data_source=self.source, glyph=hwy_glyph)
        tooltips = [
            ("Manufacturer", "@manufacturer"),
            ("Model", "@model"),
            ("Displacement", "@displ"),
            ("Year", "@year"),
            ("Cylinders", "@cyl"),
            ("Transmission", "@trans"),
            ("Drive", "@drv"),
            ("Class", "@class"),
        ]
        cty_hover_tool = HoverTool(plot=plot,
                                   renderers=[cty],
                                   tooltips=tooltips + [("City MPG", "@cty")])
        hwy_hover_tool = HoverTool(plot=plot,
                                   renderers=[hwy],
                                   tooltips=tooltips +
                                   [("Highway MPG", "@hwy")])
        select_tool = BoxSelectTool(plot=plot,
                                    renderers=[cty, hwy],
                                    dimensions=['width'])
        plot.tools.extend([cty_hover_tool, hwy_hover_tool, select_tool])
        plot.renderers.extend([cty, hwy, ygrid])

        controls = VBox(children=[
            manufacturer_select, model_select, transmission_select,
            drive_select, class_select
        ],
                        width=200)
        top_panel = HBox(children=[controls, plot])
        layout = VBox(children=[top_panel, data_table])

        return layout

    def on_manufacturer_change(self, obj, attr, _, value):
        self.manufacturer_filter = None if value == "All" else value
        self.update_data()

    def on_model_change(self, obj, attr, _, value):
        self.model_filter = None if value == "All" else value
        self.update_data()

    def on_transmission_change(self, obj, attr, _, value):
        self.transmission_filter = None if value == "All" else value
        self.update_data()

    def on_drive_change(self, obj, attr, _, value):
        self.drive_filter = None if value == "All" else value
        self.update_data()

    def on_class_change(self, obj, attr, _, value):
        self.class_filter = None if value == "All" else value
        self.update_data()

    def update_data(self):
        df = mpg
        if self.manufacturer_filter:
            df = df[df["manufacturer"] == self.manufacturer_filter]
        if self.model_filter:
            df = df[df["model"] == self.model_filter]
        if self.transmission_filter:
            df = df[df["trans"] == self.transmission_filter]
        if self.drive_filter:
            df = df[df["drv"] == self.drive_filter]
        if self.class_filter:
            df = df[df["class"] == self.class_filter]
        self.source.data = ColumnDataSource.from_df(df)
        self.session.store_document(self.document)

    def run(self, do_view=False, poll_interval=0.5):
        link = self.session.object_link(self.document.context)
        print("Please visit %s to see the plots" % link)
        if do_view: view(link)
        print("\npress ctrl-C to exit")
        self.session.poll_document(self.document)
Ejemplo n.º 15
0
class PlottingExtension(SimpleExtension):
    """Base class for extensions doing Bokeh plotting.

    Parameters
    ----------
    document_name : str
        The name of the Bokeh document. Use a different name for each
        experiment if you are storing your plots.
    start_server : bool, optional
        Whether to try and start the Bokeh plotting server. Defaults to
        ``False``. The server started is not persistent i.e. after shutting
        it down you will lose your plots. If you want to store your plots,
        start the server manually using the ``bokeh-server`` command. Also
        see the warning above.
    server_url : str, optional
        Url of the bokeh-server. Ex: when starting the bokeh-server with
        ``bokeh-server --ip 0.0.0.0`` at ``alice``, server_url should be
        ``http://alice:5006``. When not specified the default configured
        by ``bokeh_server`` in ``.blocksrc`` will be used. Defaults to
        ``http://localhost:5006/``.
    clear_document : bool, optional
        Whether or not to clear the contents of the server-side document
        upon creation. If `False`, previously existing plots within the
        document will be kept. Defaults to `True`.

    """
    def __init__(self,
                 document_name,
                 server_url=None,
                 start_server=False,
                 clear_document=True,
                 **kwargs):
        self.document_name = document_name
        self.server_url = (config.bokeh_server
                           if server_url is None else server_url)
        self.start_server = start_server
        self.sub = self._start_server_process() if self.start_server else None
        self.session = Session(root_url=self.server_url)
        self.document = Document()
        self._setup_document(clear_document)
        super(PlottingExtension, self).__init__(**kwargs)

    def _start_server_process(self):
        def preexec_fn():
            """Prevents the server from dying on training interrupt."""
            signal.signal(signal.SIGINT, signal.SIG_IGN)

        # Only memory works with subprocess, need to wait for it to start
        logger.info('Starting plotting server on localhost:5006')
        self.sub = Popen('bokeh-server --ip 0.0.0.0 '
                         '--backend memory'.split(),
                         stdout=PIPE,
                         stderr=PIPE,
                         preexec_fn=preexec_fn)
        time.sleep(2)
        logger.info('Plotting server PID: {}'.format(self.sub.pid))

    def _setup_document(self, clear_document=False):
        self.session.use_doc(self.document_name)
        self.session.load_document(self.document)
        if clear_document:
            self.document.clear()
        self._document_setup_done = True

    def __getstate__(self):
        state = self.__dict__.copy()
        state['sub'] = None
        state.pop('session', None)
        state.pop('_push_thread', None)
        return state

    def __setstate__(self, state):
        self.__dict__.update(state)
        if self.start_server:
            self._start_server_process()
        self.session = Session(root_url=self.server_url)
        self._document_setup_done = False

    def do(self, which_callback, *args):
        if not self._document_setup_done:
            self._setup_document()

    @property
    def push_thread(self):
        if not hasattr(self, '_push_thread'):
            self._push_thread = PushThread(self.session, self.document)
            self._push_thread.start()
        return self._push_thread

    def store(self, obj):
        self.push_thread.put(obj, PushThread.PUT)

    def push(self, which_callback):
        self.push_thread.put(which_callback, PushThread.PUSH)
Ejemplo n.º 16
0
class Population(object):

    year = 2010
    location = "World"

    def __init__(self):
        from bokeh.models import ColumnDataSource
        from bokeh.document import Document
        from bokeh.session import Session
        from bokeh.sampledata.population import load_population

        self.document = Document()
        self.session = Session()
        self.session.use_doc('population_reveal')
        self.session.load_document(self.document)

        self.df = load_population()
        self.source_pyramid = ColumnDataSource(data=dict())

        # just render at the initialization
        self._render()

    def _render(self):
        self.pyramid_plot()
        self.create_layout()
        self.document.add(self.layout)
        self.update_pyramid()

    def pyramid_plot(self):
        from bokeh.models import (Plot, DataRange1d, LinearAxis, Grid,
                                  Legend, SingleIntervalTicker)
        from bokeh.models.glyphs import Quad

        xdr = DataRange1d(sources=[self.source_pyramid.columns("male"),
                                   self.source_pyramid.columns("female")])
        ydr = DataRange1d(sources=[self.source_pyramid.columns("groups")])

        self.plot = Plot(title="Widgets", x_range=xdr, y_range=ydr,
                         plot_width=600, plot_height=600)

        xaxis = LinearAxis()
        self.plot.add_layout(xaxis, 'below')
        yaxis = LinearAxis(ticker=SingleIntervalTicker(interval=5))
        self.plot.add_layout(yaxis, 'left')

        self.plot.add_layout(Grid(dimension=0, ticker=xaxis.ticker))
        self.plot.add_layout(Grid(dimension=1, ticker=yaxis.ticker))

        male_quad = Quad(left="male", right=0, bottom="groups", top="shifted",
                         fill_color="#3B8686")
        male_quad_glyph = self.plot.add_glyph(self.source_pyramid, male_quad)

        female_quad = Quad(left=0, right="female", bottom="groups", top="shifted",
                           fill_color="#CFF09E")
        female_quad_glyph = self.plot.add_glyph(self.source_pyramid, female_quad)

        self.plot.add_layout(Legend(legends=dict(Male=[male_quad_glyph],
                                                 Female=[female_quad_glyph])))

    def on_year_change(self, obj, attr, old, new):
        self.year = int(new)
        self.update_pyramid()

    def on_location_change(self, obj, attr, old, new):
        self.location = new
        self.update_pyramid()

    def create_layout(self):
        from bokeh.models.widgets import Select, HBox, VBox

        years = list(map(str, sorted(self.df.Year.unique())))
        locations = sorted(self.df.Location.unique())

        year_select = Select(title="Year:", value="2010", options=years)
        location_select = Select(title="Location:", value="World", options=locations)

        year_select.on_change('value', self.on_year_change)
        location_select.on_change('value', self.on_location_change)

        controls = HBox(year_select, location_select)
        self.layout = VBox(controls, self.plot)

    def update_pyramid(self):
        pyramid = self.df[(self.df.Location == self.location) & (self.df.Year == self.year)]

        male = pyramid[pyramid.Sex == "Male"]
        female = pyramid[pyramid.Sex == "Female"]

        total = male.Value.sum() + female.Value.sum()

        male_percent = -male.Value / total
        female_percent = female.Value / total

        groups = male.AgeGrpStart.tolist()
        shifted = groups[1:] + [groups[-1] + 5]

        self.source_pyramid.data = dict(
            groups=groups,
            shifted=shifted,
            male=male_percent,
            female=female_percent,
        )
        self.session.store_document(self.document)
Ejemplo n.º 17
0
class Population(object):

    year = 2010
    location = "World"

    def __init__(self):
        from bokeh.document import Document
        from bokeh.session import Session
        from bokeh.models import ColumnDataSource
        from bokeh.sampledata.population import load_population

        self.document = Document()
        self.session = Session()
        self.session.use_doc('population')
        self.session.load_document(self.document)

        self.df = load_population()
        self.source_pyramid = ColumnDataSource(data=dict())

    def render(self):
        self.pyramid_plot()
        self.create_layout()
        self.document.add(self.layout)
        self.update_pyramid()

    def pyramid_plot(self):
        from bokeh.models import (
            Plot, DataRange1d, LinearAxis, Grid, Legend,
            SingleIntervalTicker
        )
        from bokeh.models.glyphs import Quad

        xdr = DataRange1d()
        ydr = DataRange1d()

        self.plot = Plot(title=None, x_range=xdr, y_range=ydr,
                         plot_width=600, plot_height=600)

        xaxis = LinearAxis()
        self.plot.add_layout(xaxis, 'below')
        yaxis = LinearAxis(ticker=SingleIntervalTicker(interval=5))
        self.plot.add_layout(yaxis, 'left')

        self.plot.add_layout(Grid(dimension=0, ticker=xaxis.ticker))
        self.plot.add_layout(Grid(dimension=1, ticker=yaxis.ticker))

        male_quad = Quad(left="male", right=0, bottom="groups", top="shifted",
                         fill_color="#3B8686")
        male_quad_glyph = self.plot.add_glyph(self.source_pyramid, male_quad)

        female_quad = Quad(left=0, right="female", bottom="groups", top="shifted",
                           fill_color="#CFF09E")
        female_quad_glyph = self.plot.add_glyph(self.source_pyramid, female_quad)

        self.plot.add_layout(Legend(legends=dict(Male=[male_quad_glyph],
                                                 Female=[female_quad_glyph])))

    def on_year_change(self, obj, attr, old, new):
        self.year = int(new)
        self.update_pyramid()

    def on_location_change(self, obj, attr, old, new):
        self.location = new
        self.update_pyramid()

    def create_layout(self):
        from bokeh.models.widgets import Select, HBox, VBox

        years = list(map(str, sorted(self.df.Year.unique())))
        locations = sorted(self.df.Location.unique())

        year_select = Select(title="Year:", value="2010", options=years)
        location_select = Select(title="Location:", value="World", options=locations)

        year_select.on_change('value', self.on_year_change)
        location_select.on_change('value', self.on_location_change)

        controls = HBox(year_select, location_select)
        self.layout = VBox(controls, self.plot)

    def update_pyramid(self):
        pyramid = self.df[(self.df.Location == self.location) & (self.df.Year == self.year)]

        male = pyramid[pyramid.Sex == "Male"]
        female = pyramid[pyramid.Sex == "Female"]

        total = male.Value.sum() + female.Value.sum()

        male_percent = -male.Value / total
        female_percent = female.Value / total

        groups = male.AgeGrpStart.tolist()
        shifted = groups[1:] + [groups[-1] + 5]

        self.source_pyramid.data = dict(
            groups=groups,
            shifted=shifted,
            male=male_percent,
            female=female_percent,
        )
        self.session.store_document(self.document)
Ejemplo n.º 18
0
class NutchUrlTrails:
    """
    Class for managing URL Trails visualizations
    """

    @staticmethod
    def strip_url(url):
        """
        Make a URL safe for visualization in Bokeh server
        :param url: a URL to be shortened/stripped
        :return: The stripped URL
        """
        # TODO: remove protocol-stripping on next Bokeh release
        stripped_url = url.replace('https://', '').replace('http://', '').replace(':', '_').replace('-', '_')

        if len(stripped_url) <= URL_CHAR_WIDTH:
            return stripped_url
        else:
            return stripped_url[:int(URL_CHAR_WIDTH/2)] + '...' + stripped_url[-int(URL_CHAR_WIDTH/2)-3:]

    @staticmethod
    def jtime_to_datetime(t):
        """
        Convert a Java-format Epoch time stamp into np.datetime64 object
        :param t: Java-format Epoch time stamp (milliseconds)
        :return: A np.datetime64 scalar
        """
        return np.datetime64(datetime.fromtimestamp(t/1000.0))

    def __init__(self, crawl_name, num_urls=DEFAULT_NUM_URLS):
        """
        Create a NutchUrlTrails instance for visualizing a running Nutch crawl in real-time using Bokeh
        :param name: The name of the crawl (as identified by the queue)
        :param num_urls: The number of URLs to display in the visualization
        :return: A NutchUrLTrails instance
        """
        self.crawl_name = crawl_name
        self.num_urls = num_urls
        self.open_urls = {}
        self.closed_urls = {}
        self.old_segments = None
        self.old_circles = None
        
        self.session = Session()
        self.session.use_doc(self.crawl_name)
        self.document = Document()

        con = Connection()

        exchange = Exchange(EXCHANGE_NAME, 'direct', durable=False)
        queue = Queue(crawl_name, exchange=exchange, routing_key=crawl_name)
        self.queue = con.SimpleQueue(name=queue)

    def handle_messages(self):
        """
        Get and parse up to 250 messages from the queue then plot.  Break early if less.
        """

        for i in range(250):
            try:
                m = self.queue.get(block=True, timeout=1)
                self.parse_message(m)
            except Empty:
                break
        self.plot_urls()

    def parse_message(self, message):
        """
        Parse a single message arriving from the queue.  Updates list of open/closed urls.
        :param message: A message from the queue
        """
        print(message.body)
        message = json.loads(message.body)
        url = message["url"]
        if message["eventType"] == "START":
            self.open_urls[url] = NutchUrlTrails.jtime_to_datetime(message["timestamp"])
        elif message["eventType"] == "END":
            if url in self.open_urls:
                self.closed_urls[url] = (self.open_urls[url], NutchUrlTrails.jtime_to_datetime(message["timestamp"]))
                del self.open_urls[url]
            else:
                # TODO: Log mismatched messages instead of just swallowing them
                pass
        else:
            raise Exception("Unexpected message type")

    def plot_urls(self):
        """
        Visualize crawler activity by showing the most recently crawled URLs and the fetch time.
        """

        self.session.load_document(self.document)
        plot = self.document.context.children[0]

        # don't plot if no URLs available
        if not (self.open_urls or self.closed_urls):
            return

        # x0/x0, left and right boundaries of segments, correspond to fetch time
        x0 = []
        x = []
        # y-axis, name of URL being fetched
        urls = []

        # maintain x and URL of circles in a separate list
        circles = []
        circle_urls = []

        current_time = np.datetime64(datetime.now())

        # For open URLs (not completed fetching), draw a segment from start time to now
        for url, start_t in self.open_urls.items():
            url = NutchUrlTrails.strip_url(url)
            x0.append(start_t)
            x.append(current_time)
            urls.append(url)

        # For closed URLs (completed fetching), draw a segment from start to end time, and a circle as well.
        for url, (start_t, end_t) in self.closed_urls.items():
            url = NutchUrlTrails.strip_url(url)
            x0.append(start_t)
            x.append(end_t)
            circles.append(end_t)
            urls.append(url)
            circle_urls.append(url)

        x0 = np.asarray(x0)
        x = np.asarray(x)
        circles = np.asarray(circles)

        # sort segments
        sort_index = np.argsort(x0)[::-1]
        x0 = x0[sort_index]
        x = x[sort_index]
        urls = [urls[i] for i in sort_index]

        # sort circles
        if self.closed_urls:
            circle_sort_index = np.argsort(circles)[::-1]
            circles = circles[circle_sort_index]
            circle_urls = [circle_urls[i] for i in circle_sort_index]

        # Filter to latest num_url URLs (ascending order)
        # filter segments
        active_x0 = x0[:self.num_urls]
        active_x = x[:self.num_urls]
        active_urls = urls[:self.num_urls]

        min_x = min(active_x0)
        plot.x_range.start = min_x
        plot.x_range.end = np.datetime64(datetime.now())
        plot.y_range.factors = active_urls

        # make sure these are turned back on!
        # turn y axis grid lines back on
        for r in plot.renderers:
            if type(r) == Grid:
                r.grid_line_color = 'black'
                break

        # turn tickers and their labels back on
        plot.right[0].minor_tick_line_color = 'black'
        plot.right[0].major_tick_line_color = 'black'
        plot.right[0].major_label_text_font_size = '12pt'
        plot.below[0].minor_tick_line_color = 'black'
        plot.below[0].major_tick_line_color = 'black'
        plot.below[0].major_label_text_font_size = '12pt'

        # TODO: Find a more correct way to remove old segment/circle glyphs
        if self.old_circles:
            plot.renderers.pop()
            self.old_circles = None
        if self.old_segments:
            plot.renderers.pop()
            self.old_segments = None

        segment_source = ColumnDataSource(dict(x0=active_x0,
                                               x1=active_x,
                                               urls=active_urls))

        self.old_segments = Segment(x0="x0", y0="urls", x1="x1", y1="urls", line_color="orange", line_width=10)
        plot.add_glyph(segment_source, self.old_segments)

        if self.closed_urls and PLOT_CIRCLES:
            # filter circles (some of these might not be displayed)
            active_circles = circles[:self.num_urls]
            active_circle_urls = circle_urls[:self.num_urls]

            circle_source = ColumnDataSource(dict(x=active_circles, urls=active_circle_urls))

            self.old_circles = Circle(x="x", y="urls", size=12, fill_color="green", line_color="orange", line_width=2)
            plot.add_glyph(circle_source, self.old_circles)

        self.session.store_document(self.document, dirty_only=False)
Ejemplo n.º 19
0
class PlottingExtension(Callback):
    """Base class for extensions doing Bokeh plotting.

    Parameters
    ----------
    document_name : str
        The name of the Bokeh document. Use a different name for each
        experiment if you are storing your plots.

    ##start_server : Removed, as it has a number of flaws inkl. zombi bokeh-server processes!

    server_url : str, optional
        Url of the bokeh-server. Ex: when starting the bokeh-server with
        ``bokeh-server --ip 0.0.0.0`` at ``alice``, server_url should be
        ``http://alice:5006``. Defaults to http://localhost:5006.
    clear_document : bool, optional
        Whether or not to clear the contents of the server-side document
        upon creation. If `False`, previously existing plots within the
        document will be kept. Defaults to `True`.

    """
    def __init__(self, document_name, server_url=None, clear_document=True):
        super(PlottingExtension, self).__init__()
        self.document_name = document_name
        self.server_url = DEFAULT_SRV_URL if server_url is None else server_url
        self.session = Session(root_url=self.server_url)
        self.document = Document()
        self._setup_document(clear_document)

    def _setup_document(self, clear_document=False):
        self.session.use_doc(self.document_name)
        self.session.load_document(self.document)
        if clear_document:
            self.document.clear()
        self._document_setup_done = True

    def __getstate__(self):
        state = self.__dict__.copy()
        state.pop('_sub', None)
        state.pop('session', None)
        state.pop('_push_thread', None)
        return state

    def __setstate__(self, state):
        self.__dict__.update(state)
        self.session = Session(root_url=self.server_url)
        self._document_setup_done = False

    def on_callback(self, logs={}):
        if not self._document_setup_done:
            self._setup_document()

    @property
    def push_thread(self):
        if not hasattr(self, '_push_thread'):
            self._push_thread = PushThread(self.session, self.document)
            self._push_thread.start()
        return self._push_thread

    def store_data(self, obj):
        self.push_thread.put(obj, PushThread.PUT)

    def push_document(self, after_training=False):
        self.push_thread.put(after_training, PushThread.PUSH)
Ejemplo n.º 20
0
class NutchUrlTrails:
    """
    Class for managing URL Trails visualizations
    """
    @staticmethod
    def strip_url(url):
        """
        Make a URL safe for visualization in Bokeh server
        :param url: a URL to be shortened/stripped
        :return: The stripped URL
        """
        # TODO: remove protocol-stripping on next Bokeh release
        stripped_url = url.replace('https://',
                                   '').replace('http://', '').replace(
                                       ':', '_').replace('-', '_')

        if len(stripped_url) <= URL_CHAR_WIDTH:
            return stripped_url
        else:
            return stripped_url[:int(URL_CHAR_WIDTH /
                                     2)] + '...' + stripped_url[
                                         -int(URL_CHAR_WIDTH / 2) - 3:]

    @staticmethod
    def jtime_to_datetime(t):
        """
        Convert a Java-format Epoch time stamp into np.datetime64 object
        :param t: Java-format Epoch time stamp (milliseconds)
        :return: A np.datetime64 scalar
        """
        return np.datetime64(datetime.fromtimestamp(t / 1000.0))

    def __init__(self, crawl_name, num_urls=DEFAULT_NUM_URLS):
        """
        Create a NutchUrlTrails instance for visualizing a running Nutch crawl in real-time using Bokeh
        :param name: The name of the crawl (as identified by the queue)
        :param num_urls: The number of URLs to display in the visualization
        :return: A NutchUrLTrails instance
        """
        self.crawl_name = crawl_name
        self.num_urls = num_urls
        self.open_urls = {}
        self.closed_urls = {}
        self.old_segments = None
        self.old_circles = None

        self.session = Session()
        self.session.use_doc(self.crawl_name)
        self.document = Document()

        con = Connection()

        exchange = Exchange(EXCHANGE_NAME, 'direct', durable=False)
        queue = Queue(crawl_name, exchange=exchange, routing_key=crawl_name)
        self.queue = con.SimpleQueue(name=queue)

    def handle_messages(self):
        """
        Get and parse up to 250 messages from the queue then plot.  Break early if less.
        """

        for i in range(250):
            try:
                m = self.queue.get(block=True, timeout=1)
                self.parse_message(m)
            except Empty:
                break
        self.plot_urls()

    def parse_message(self, message):
        """
        Parse a single message arriving from the queue.  Updates list of open/closed urls.
        :param message: A message from the queue
        """
        print(message.body)
        message = json.loads(message.body)
        url = message["url"]
        if message["eventType"] == "START":
            self.open_urls[url] = NutchUrlTrails.jtime_to_datetime(
                message["timestamp"])
        elif message["eventType"] == "END":
            if url in self.open_urls:
                self.closed_urls[url] = (self.open_urls[url],
                                         NutchUrlTrails.jtime_to_datetime(
                                             message["timestamp"]))
                del self.open_urls[url]
            else:
                # TODO: Log mismatched messages instead of just swallowing them
                pass
        else:
            raise Exception("Unexpected message type")

    def plot_urls(self):
        """
        Visualize crawler activity by showing the most recently crawled URLs and the fetch time.
        """

        self.session.load_document(self.document)
        plot = self.document.context.children[0]

        # don't plot if no URLs available
        if not (self.open_urls or self.closed_urls):
            return

        # x0/x0, left and right boundaries of segments, correspond to fetch time
        x0 = []
        x = []
        # y-axis, name of URL being fetched
        urls = []

        # maintain x and URL of circles in a separate list
        circles = []
        circle_urls = []

        current_time = np.datetime64(datetime.now())

        # For open URLs (not completed fetching), draw a segment from start time to now
        for url, start_t in self.open_urls.items():
            url = NutchUrlTrails.strip_url(url)
            x0.append(start_t)
            x.append(current_time)
            urls.append(url)

        # For closed URLs (completed fetching), draw a segment from start to end time, and a circle as well.
        for url, (start_t, end_t) in self.closed_urls.items():
            url = NutchUrlTrails.strip_url(url)
            x0.append(start_t)
            x.append(end_t)
            circles.append(end_t)
            urls.append(url)
            circle_urls.append(url)

        x0 = np.asarray(x0)
        x = np.asarray(x)
        circles = np.asarray(circles)

        # sort segments
        sort_index = np.argsort(x0)[::-1]
        x0 = x0[sort_index]
        x = x[sort_index]
        urls = [urls[i] for i in sort_index]

        # sort circles
        if self.closed_urls:
            circle_sort_index = np.argsort(circles)[::-1]
            circles = circles[circle_sort_index]
            circle_urls = [circle_urls[i] for i in circle_sort_index]

        # Filter to latest num_url URLs (ascending order)
        # filter segments
        active_x0 = x0[:self.num_urls]
        active_x = x[:self.num_urls]
        active_urls = urls[:self.num_urls]

        min_x = min(active_x0)
        plot.x_range.start = min_x
        plot.x_range.end = np.datetime64(datetime.now())
        plot.y_range.factors = active_urls

        # make sure these are turned back on!
        # turn y axis grid lines back on
        for r in plot.renderers:
            if type(r) == Grid:
                r.grid_line_color = 'black'
                break

        # turn tickers and their labels back on
        plot.right[0].minor_tick_line_color = 'black'
        plot.right[0].major_tick_line_color = 'black'
        plot.right[0].major_label_text_font_size = '12pt'
        plot.below[0].minor_tick_line_color = 'black'
        plot.below[0].major_tick_line_color = 'black'
        plot.below[0].major_label_text_font_size = '12pt'

        # TODO: Find a more correct way to remove old segment/circle glyphs
        if self.old_circles:
            plot.renderers.pop()
            self.old_circles = None
        if self.old_segments:
            plot.renderers.pop()
            self.old_segments = None

        segment_source = ColumnDataSource(
            dict(x0=active_x0, x1=active_x, urls=active_urls))

        self.old_segments = Segment(x0="x0",
                                    y0="urls",
                                    x1="x1",
                                    y1="urls",
                                    line_color="orange",
                                    line_width=10)
        plot.add_glyph(segment_source, self.old_segments)

        if self.closed_urls and PLOT_CIRCLES:
            # filter circles (some of these might not be displayed)
            active_circles = circles[:self.num_urls]
            active_circle_urls = circle_urls[:self.num_urls]

            circle_source = ColumnDataSource(
                dict(x=active_circles, urls=active_circle_urls))

            self.old_circles = Circle(x="x",
                                      y="urls",
                                      size=12,
                                      fill_color="green",
                                      line_color="orange",
                                      line_width=2)
            plot.add_glyph(circle_source, self.old_circles)

        self.session.store_document(self.document, dirty_only=False)
Ejemplo n.º 21
0
import pandas as pd
from bokeh.objects import (Plot, ColumnDataSource, DataRange1d, FactorRange,
                           LinearAxis, CategoricalAxis, Grid, Glyph)
from bokeh.widgetobjects import (DateRangeSlider, HBox, VBox, Paragraph,
                                 Select, VBoxModelForm)
from bokeh.glyphs import Rect
from bokeh.document import Document
from bokeh.session import Session

import employment_data_reader as emp
from employment_utils import get_country_for_byte, get_jobtype_for_byte

document = Document()
session = Session()
session.use_doc('employment_server')
session.load_document(document)

days_of_week = {
    0: "Monday",
    1: "Tuesday",
    2: "Wednesday",
    3: "Thursday",
    4: "Friday",
    5: "Saturday",
    6: "Sunday"
}
bounds_start = datetime.date(2012, 11, 1)
bounds_end = datetime.date(2013, 12, 30)
start = datetime.date(2013, 5, 26)
end = datetime.date(2013, 7, 5)
source_country = ColumnDataSource(data=dict())
Ejemplo n.º 22
0
class PlottingExtension(SimpleExtension):
    """Base class for extensions doing Bokeh plotting.

    Parameters
    ----------
    document_name : str
        The name of the Bokeh document. Use a different name for each
        experiment if you are storing your plots.
    start_server : bool, optional
        Whether to try and start the Bokeh plotting server. Defaults to
        ``False``. The server started is not persistent i.e. after shutting
        it down you will lose your plots. If you want to store your plots,
        start the server manually using the ``bokeh-server`` command. Also
        see the warning above.
    server_url : str, optional
        Url of the bokeh-server. Ex: when starting the bokeh-server with
        ``bokeh-server --ip 0.0.0.0`` at ``alice``, server_url should be
        ``http://alice:5006``. When not specified the default configured
        by ``bokeh_server`` in ``.blocksrc`` will be used. Defaults to
        ``http://localhost:5006/``.
    clear_document : bool, optional
        Whether or not to clear the contents of the server-side document
        upon creation. If `False`, previously existing plots within the
        document will be kept. Defaults to `True`.

    """
    def __init__(self, document_name, server_url=None, start_server=False,
                 clear_document=True, **kwargs):
        self.document_name = document_name
        self.server_url = (config.bokeh_server if server_url is None
                           else server_url)
        self.start_server = start_server
        self.sub = self._start_server_process() if self.start_server else None
        self.session = Session(root_url=self.server_url)
        self.document = Document()
        self._setup_document(clear_document)
        super(PlottingExtension, self).__init__(**kwargs)

    def _start_server_process(self):
        def preexec_fn():
            """Prevents the server from dying on training interrupt."""
            signal.signal(signal.SIGINT, signal.SIG_IGN)
        # Only memory works with subprocess, need to wait for it to start
        logger.info('Starting plotting server on localhost:5006')
        self.sub = Popen('bokeh-server --ip 0.0.0.0 '
                         '--backend memory'.split(),
                         stdout=PIPE, stderr=PIPE, preexec_fn=preexec_fn)
        time.sleep(2)
        logger.info('Plotting server PID: {}'.format(self.sub.pid))

    def _setup_document(self, clear_document=False):
        self.session.use_doc(self.document_name)
        self.session.load_document(self.document)
        if clear_document:
            self.document.clear()
        self._document_setup_done = True

    def __getstate__(self):
        state = self.__dict__.copy()
        state['sub'] = None
        state.pop('session', None)
        state.pop('_push_thread', None)
        return state

    def __setstate__(self, state):
        self.__dict__.update(state)
        if self.start_server:
           self._start_server_process()
        self.session = Session(root_url=self.server_url)
        self._document_setup_done = False

    def do(self, which_callback, *args):
        if not self._document_setup_done:
            self._setup_document()

    @property
    def push_thread(self):
        if not hasattr(self, '_push_thread'):
            self._push_thread = PushThread(self.session, self.document)
            self._push_thread.start()
        return self._push_thread

    def store(self, obj):
        self.push_thread.put(obj, PushThread.PUT)

    def push(self, which_callback):
        self.push_thread.put(which_callback, PushThread.PUSH)