class ColumnDataSource(DataSource): # Maps names of columns to sequences or arrays data = Dict() # Maps field/column name to a DataRange or FactorRange object. If the # field is not in the dict, then a range is created automatically. cont_ranges = Dict() discrete_ranges = Dict() def add(self, data, name=None): """ Appends the data to the list of columns. Returns the name that was inserted. """ if name is None: n = len(self.data) while "Series %d" % n in self.data: n += 1 name = "Series %d" % n self.column_names.append(name) self.data[name] = data return name def remove(self, name): try: self.column_names.remove(name) del self.data[name] except (ValueError, KeyError): warnings.warn("Unable to find column '%s' in datasource" % name)
class ObjectArrayDataSource(DataSource): # List of tuples of values data = List() # Maps field/column name to a DataRange or FactorRange object. If the # field is not in the dict, then a range is created automatically. cont_ranges = Dict() discrete_ranges = Dict()
class ColumnDataSource(DataSource): # Maps names of columns to sequences or arrays data = Dict() # Maps field/column name to a DataRange or FactorRange object. If the # field is not in the dict, then a range is created automatically. cont_ranges = Dict() discrete_ranges = Dict()
class PandasDataSource(DataSource): """ Represents serverside data. This gets stored into the plot server's database, but it does not have any client side representation. Instead, a PandasPlotSource needs to be created and pointed at it. """ data = Dict()
class PivotTable(PlotObject): title = String("Pivot Table") description = String("") source = Instance(has_ref=True) data = Dict() fields = List() # List[{name: String, dtype: String}] rows = List() columns = List() values = List() filters = List() manual_update = Bool(True) def setup_events(self): self.on_change('rows', self, 'get_data') self.on_change('columns', self, 'get_data') self.on_change('values', self, 'get_data') self.on_change('filters', self, 'get_data') if not self.fields: self.fields = self.source.fields() if not self.data: self.get_data() def get_data(self, obj=None, attrname=None, old=None, new=None): self.data = self.source.pivot( dict( rows=self.rows, columns=self.columns, values=self.values, filters=self.filters, ))
class FixedTickFormatter(TickFormatter): """ Class used to allow custom axis tick labels on a bokeh chart Extends bokeh.model.formatters.TickFormatter """ labels = Dict(Int, String, help=""" A mapping of integer ticks values to their labels. """) COFFEESCRIPT = """ import {_} from "underscore" import {Model} from "model" import * as p from "core/properties" export class FixedTickFormatter extends Model type: 'FixedTickFormatter' @define { labels: [ p.Any ] } doFormat: (ticks) -> labels = @labels return (labels[tick] ? "" for tick in ticks) """ __implementation__ = CoffeeScript(COFFEESCRIPT)
class StyleableBox(BaseBox): ''' styleable box provides element level css_properties as a dictionary ''' __implementation__ = load_component('./styleable_box.coffee') css_properties = Dict(String, Any, default=None) orientation = String(default='vertical')
class DatetimeAxis(LinearAxis): type = String("datetime_axis") axis_label = String("date") scale = String("time") num_labels = Int(8) char_width = Int(10) fill_ratio = Float(0.3) formats = Dict({"days": ["%m/%d/%Y"]})
class StatsBox(BaseBox): __implementation__ = load_component('./stats_box.coffee') styles = String(default=None) display_items = Dict(String, Any, default=None) @validation.warning(EMPTY_LAYOUT) def _check_empty_layout(self): pass
class Legend(PlotObject): plot = Instance(Plot, has_ref=True) annotationspec = Dict(has_ref=True) def vm_serialize(self): #ensure that the type of the annotation spec is set result = super(Legend, self).vm_serialize() result['annotationspec']['type'] = 'legend' return result
def test_Dict(self): with self.assertRaises(TypeError): prop = Dict() prop = Dict(String, List(Int)) self.assertTrue(prop.is_valid(None)) self.assertFalse(prop.is_valid(False)) self.assertFalse(prop.is_valid(True)) self.assertFalse(prop.is_valid(0)) self.assertFalse(prop.is_valid(1)) self.assertFalse(prop.is_valid(0.0)) self.assertFalse(prop.is_valid(1.0)) self.assertFalse(prop.is_valid(1.0 + 1.0j)) self.assertFalse(prop.is_valid("")) self.assertFalse(prop.is_valid(())) self.assertFalse(prop.is_valid([])) self.assertTrue(prop.is_valid({})) self.assertFalse(prop.is_valid(Foo()))
class ColumnDataSource(DataSource): # Maps names of columns to sequences or arrays data = Dict() # Maps field/column name to a DataRange or FactorRange object. If the # field is not in the dict, then a range is created automatically. cont_ranges = Dict() discrete_ranges = Dict() def __init__(self, *args, **kw): """ Modify the basic DataSource/PlotObj constructor so that if we are called with a single argument that is a dict, then we treat that implicitly as our "data" attribute. """ if len(args) == 1 and "data" not in kw: kw["data"] = args[0] super(ColumnDataSource, self).__init__(**kw) def add(self, data, name=None): """ Appends the data to the list of columns. Returns the name that was inserted. """ if name is None: n = len(self.data) while "Series %d"%n in self.data: n += 1 name = "Series %d"%n self.column_names.append(name) self.data[name] = data return name def remove(self, name): try: self.column_names.remove(name) del self.data[name] except (ValueError, KeyError): warnings.warn("Unable to find column '%s' in datasource" % name)
class DataTable(PlotObject): source = Instance(has_ref=True) sort = List() group = List() offset = Int(default=0) length = Int(default=100) maxlength = Int() totallength = Int() tabledata = Dict() filterselected = Bool(default=False) def setup_events(self): self.on_change('sort', self, 'get_data') self.on_change('group', self, 'get_data') self.on_change('length', self, 'get_data') self.on_change('offset', self, 'get_data') self.on_change('filterselected', self, 'get_data') self.source.on_change('selected', self, 'get_data') self.source.on_change('data', self, 'get_data') self.source.on_change('computed_columns', self, 'get_data') if not self.tabledata: self.get_data() def transform(self): return dict( sort=self.sort, group=self.group, offset=self.offset, length=self.length, filterselected=self.filterselected, ) def setselect(self, select): self.source.setselect(select, self.transform()) self.get_data() def select(self, select): self.source.select(select, self.transform()) self.get_data() def deselect(self, deselect): self.source.deselect(deselect, self.transform()) self.get_data() def get_data(self, obj=None, attrname=None, old=None, new=None): data = self.source.get_data(self.transform()) self.maxlength = data.pop('maxlength') self.totallength = data.pop('totallength') self.tabledata = data
class IPythonRemoteData(PlotObject): host = String("localhost") port = Int(10020) varname = String() computed_columns = List() metadata = Dict() #hack... we're just using this field right now to trigger events selected = Int(0) data = Int(0) def setselect(self, select, transform): remotedata = self url = "http://%s:%s/array/%s/setselect" % ( remotedata.host, remotedata.port, remotedata.varname) data = transform data['selected'] = select requests.post(url, data=protocol.serialize_json(data)) self.selected = self.selected + 1 def search(self, search): remotedata = self url = "http://%s:%s/array/%s/search" % ( remotedata.host, remotedata.port, remotedata.varname) requests.post(url, data=search) self.selected = self.selected + 1 def select(self, select, transform): remotedata = self url = "http://%s:%s/array/%s/select" % ( remotedata.host, remotedata.port, remotedata.varname) data = transform data['selected'] = select requests.post(url, data=protocol.serialize_json(data)) self.selected = self.selected + 1 def deselect(self, deselect, transform): remotedata = self url = "http://%s:%s/array/%s/deselect" % ( remotedata.host, remotedata.port, remotedata.varname) data = transform data['selected'] = deselect requests.post(url, data=protocol.serialize_json(data)) self.selected = self.selected + 1 def get_data(self, transform): remotedata = self url = "http://%s:%s/array/%s" % (remotedata.host, remotedata.port, remotedata.varname) data = requests.get(url, data=protocol.serialize_json(transform)).json() self.metadata = data.pop('metadata', {}) return data def set_computed_columns(self, computed_columns): remotedata = self url = "http://%s:%s/array/%s/computed" % ( remotedata.host, remotedata.port, remotedata.varname) data = requests.get( url, data=protocol.serialize_json(computed_columns)).json() self.computed_columns = computed_columns self.data += 1 return data
class PandasPivotTable(PlotObject): source = Instance(has_ref=True) sort = List() group = List() offset = Int(default=0) length = Int(default=100) maxlength = Int() totallength = Int() precision = Dict() tabledata = Dict() filterselected = Bool(default=False) def setup_events(self): self.on_change('sort', self, 'get_data') self.on_change('group', self, 'get_data') self.on_change('length', self, 'get_data') self.on_change('offset', self, 'get_data') self.on_change('precision', self, 'get_data') self.on_change('filterselected', self, 'get_data') self.source.on_change('selected', self, 'get_data') self.source.on_change('data', self, 'get_data') self.source.on_change('computed_columns', self, 'get_data') if not self.tabledata: self.get_data() def format_data(self, jsondata): """inplace manipulation of jsondata """ precision = self.precision for colname, data in jsondata.iteritems(): if colname == '_selected' or colname == '_counts': continue if self.source.metadata.get(colname, {}).get('date'): isdate = True else: isdate = False for idx, val in enumerate(data): if isdate: timeobj = time.localtime(val / 1000.0) data[idx] = time.strftime("%Y-%m-%d %H:%M:%S", timeobj) if isinstance(val, float): data[idx] = "%%.%df" % precision.get(colname, 2) % data[idx] def transform(self): return dict( sort=self.sort, group=self.group, offset=self.offset, length=self.length, filterselected=self.filterselected, ) def setselect(self, select): self.source.setselect(select, self.transform()) self.get_data() def select(self, select): self.source.select(select, self.transform()) self.get_data() def deselect(self, deselect): self.source.deselect(deselect, self.transform()) self.get_data() def get_data(self, obj=None, attrname=None, old=None, new=None): data = self.source.get_data(self.transform()) print data['data']['_selected'] self.maxlength = data.pop('maxlength') self.totallength = data.pop('totallength') self.format_data(data['data']) self.tabledata = data
class HasIntDictProp(PlotObject): foo = Dict(Int, Any) def __init__(self, **kwargs): super(HasIntDictProp, self).__init__(**kwargs)
class HasStringDictProp(PlotObject): foo = Dict(String, Any) def __init__(self, **kwargs): super(HasStringDictProp, self).__init__(**kwargs)
class V(self.pObjectClass): u1 = Instance(U) u2 = List(Instance(U)) u3 = Tuple(Int, Instance(U)) u4 = Dict(String, Instance(U)) u5 = Dict(String, List(Instance(U)))
class AttrSpec(HasProps): """A container for assigning attributes to values and retrieving them as needed. A special function this provides is automatically handling cases where the provided iterator is too short compared to the distinct values provided. Once created as attr_spec, you can do attr_spec[data_label], where data_label must be a one dimensional tuple of values, representing the unique group in the data. See the :meth:`AttrSpec.setup` method for the primary way to provide an existing AttrSpec with data and column values and update all derived property values. """ id = Any() data = Instance(ColumnDataSource) name = String(help='Name of the attribute the spec provides.') columns = Either(ColumnLabel, List(ColumnLabel), help=""" The label or list of column labels that correspond to the columns that will be used to find all distinct values (single column) or combination of values ( multiple columns) to then assign a unique attribute to. If not enough unique attribute values are found, then the attribute values will be cycled. """) default = Any(default=None, help=""" The default value for the attribute, which is used if no column is assigned to the attribute for plotting. If the default value is not provided, the first value in the `iterable` property is used. """) attr_map = Dict(Any, Any, help=""" Created by the attribute specification when `iterable` and `data` are available. The `attr_map` will include a mapping between the distinct value(s) found in `columns` and the attribute value that has been assigned. """) iterable = List(Any, default=None, help=""" The iterable of attribute values to assign to the distinct values found in `columns` of `data`. """) items = List(Any, default=None, help=""" The attribute specification calculates this list of distinct values that are found in `columns` of `data`. """) sort = Bool(default=True, help=""" A boolean flag to tell the attribute specification to sort `items`, when it is calculated. This affects which value of `iterable` is assigned to each distinct value in `items`. """) ascending = Bool(default=True, help=""" A boolean flag to tell the attribute specification how to sort `items` if the `sort` property is set to `True`. The default setting for `ascending` is `True`. """) def __init__(self, columns=None, df=None, iterable=None, default=None, items=None, **properties): """Create a lazy evaluated attribute specification. Args: columns: a list of column labels df(:class:`~pandas.DataFrame`): the data source for the attribute spec. iterable: an iterable of distinct attribute values default: a value to use as the default attribute when no columns are passed items: the distinct values in columns. If items is provided as input, then the values provided are used instead of being calculated. This can be used to force a specific order for assignment. **properties: other properties to pass to parent :class:`HasProps` """ properties['columns'] = self._ensure_list(columns) if df is not None: properties['data'] = ColumnDataSource(df) if default is None and iterable is not None: default_iter = copy(iterable) properties['default'] = next(iter(default_iter)) elif default is not None: properties['default'] = default if iterable is not None: properties['iterable'] = iterable if items is not None: properties['items'] = items super(AttrSpec, self).__init__(**properties) @staticmethod def _ensure_list(attr): """Always returns a list with the provided value. Returns the value if a list.""" if isinstance(attr, str): return [attr] elif isinstance(attr, tuple): return list(attr) else: return attr @staticmethod def _ensure_tuple(attr): """Return tuple with the provided value. Returns the value if a tuple.""" if not isinstance(attr, tuple): return (attr, ) else: return attr def _setup_default(self): """Stores the first value of iterable into `default` property.""" self.default = next(self._setup_iterable()) def _setup_iterable(self): """Default behavior is to copy and cycle the provided iterable.""" return cycle(copy(self.iterable)) def _generate_items(self, df, columns): """Produce list of unique tuples that identify each item.""" if self.items is None or len(self.items) == 0: if self.sort: df = df.sort(columns=columns, ascending=self.ascending) items = df[columns].drop_duplicates() self.items = [tuple(x) for x in items.to_records(index=False)] def _create_attr_map(self, df, columns): """Creates map between unique values and available attributes.""" self._generate_items(df, columns) iterable = self._setup_iterable() iter_map = {} for item in self.items: item = self._ensure_tuple(item) iter_map[item] = next(iterable) return iter_map def set_columns(self, columns): """Set columns property and update derived properties as needed.""" columns = self._ensure_list(columns) if all([col in self.data.column_names for col in columns]): self.columns = columns else: # we have input values other than columns # assume this is now the iterable at this point self.iterable = columns self._setup_default() def setup(self, data=None, columns=None): """Set the data and update derived properties as needed.""" if data is not None: self.data = data if columns is not None: self.set_columns(columns) if self.columns is not None and self.data is not None: self.attr_map = self._create_attr_map(self.data.to_df(), self.columns) def __getitem__(self, item): """Lookup the attribute to use for the given unique group label.""" if not self.columns or not self.data or item is None: return self.default elif self._ensure_tuple(item) not in self.attr_map.keys(): # make sure we have attr map self.setup() return self.attr_map[self._ensure_tuple(item)]
class Namespace(PlotObject): datasets = Dict() name = String() def __str__(self): return "Namespace(name=%r, datasets=%s)" % ( self.name, sorted(self.datasets.keys())) __repr__ = __str__ def _namespace(self): return get_ipython().user_ns def statsmodels(self): """Populate namespace with statsmodels' datasets. """ from statsmodels import datasets ns = self._namespace() for name, dataset in datasets.__dict__.iteritems(): if hasattr(dataset, "load_pandas"): ns[name] = dataset.load_pandas().data def populate(self, to_disk=True): """Scan global namespace for pandas' datasets. """ ns = self._namespace() datasets = {} for name, dataset in ns.iteritems(): if isinstance(dataset, DataFrame) and not name.startswith("_"): datasets[name] = list(dataset.columns) if datasets == self.datasets: return self.datasets = datasets self.session.store_obj(self) if not to_disk: return to_write = dict([(name, ns[name]) for name in datasets.keys()]) with open(self.filename, "w+") as file: pickle.dump(to_write, file, protocol=-1) def clean(self): """Remove all pandas' datasets from global namespace. """ ns = self._namespace() for name, dataset in dict(ns).iteritems(): if isinstance(dataset, DataFrame) and not name.startswith("_"): del ns[name] @property def filename(self): return self.name + ".pickle" def load(self): ns = self._namespace() if os.path.exists(self.filename): fname = self.filename with open(fname) as f: data = pickle.load(f) for k, v in data.iteritems(): ns[k] = v
class HasDictDefault(Model): value = Dict(String, Int, default=dict(hello=42))
class V(PlotObject): u1 = Instance(U) u2 = List(Instance(U)) u3 = Tuple(Int, Instance(U)) u4 = Dict(String, Instance(U)) u5 = Dict(String, List(Instance(U)))
class TaxiApp(HBox): extra_generated_classes = [["TaxiApp", "TaxiApp", "HBox"]] extra_scripts = ['/bokehjs/static/app/src/js/ar_data_source.js'] extra_js=['window.ar_data_source.main();'] gbounds = ds.gbounds pickup_plot = Instance(Plot) pickup_raw_plot_source = Instance(ColumnDataSource) pickup_ar_plot_source = Instance(ARDataSource) dropoff_plot = Instance(Plot) dropoff_raw_plot_source = Instance(ColumnDataSource) dropoff_ar_plot_source = Instance(ARDataSource) pickup_comparison_plot = Instance(Plot) pickup_comparison_raw_plot_source = Instance(ColumnDataSource) pickup_comparison_ar_plot_source = Instance(ARDataSource) dropoff_comparison_plot = Instance(Plot) dropoff_comparison_raw_plot_source = Instance(ColumnDataSource) dropoff_comparison_ar_plot_source = Instance(ARDataSource) trip_distance_source = Instance(ColumnDataSource) trip_time_source = Instance(ColumnDataSource) trip_distance_ar_source = Instance(HistogramDataSource) trip_time_ar_source = Instance(HistogramDataSource) widgets = Instance(VBox) date_slider = Instance(DateRangeSlider) filters = Dict(String, Any) trip_time_bins = np.linspace(0, 3600, 25) trip_distance_bins = np.linspace(0.01, 20, 25) distance_histogram = Instance(Plot) time_histogram = Instance(Plot) hour_selector = Instance(Select) day_of_week_selector = Instance(Select) regular = Instance(HBox) filtered = Instance(HBox) images = Instance(VBox) def make_trip_distance_histogram(self): bins = self.trip_distance_bins centers = pd.rolling_mean(bins, 2)[1:] figure(title="trip distance in miles", title_text_font='12pt', plot_width=300, plot_height=200, x_range=[bins[0], bins[-1]], y_range=[0, 1], tools="pan,wheel_zoom,box_zoom,select,reset" ) source = HistogramDataSource( data_url="/bokeh/taxidata/distancehist/", ) hold() plot = rect("centers", "y", np.mean(np.diff(centers)) * 0.7, "counts", source=source) self.trip_distance_source = plot.select({'type' : ColumnDataSource})[0] self.trip_distance_ar_source = source plot.min_border=0 plot.h_symmetry=False plot.v_symmetry=False select_tool = _get_select_tool(plot) if select_tool: select_tool.dimensions = ['width'] self.distance_histogram = plot def make_trip_time_histogram(self): bins = self.trip_time_bins centers = pd.rolling_mean(bins, 2)[1:] figure(title="trip time in secs", title_text_font='12pt', plot_width=300, plot_height=200, x_range=[bins[0], bins[-1]], y_range=[0, 1], tools="pan,wheel_zoom,box_zoom,select,reset" ) source = HistogramDataSource( data_url="/bokeh/taxidata/timehist/", ) hold() plot = rect("centers", "y", np.mean(np.diff(centers)) * 0.7, "counts", source=source) self.trip_time_source = plot.select({'type' : ColumnDataSource})[0] self.trip_time_ar_source = source plot.min_border=0 plot.h_symmetry=False plot.v_symmetry=False select_tool = _get_select_tool(plot) if select_tool: select_tool.dimensions = ['width'] self.time_histogram = plot def update_filters(self, obj, attrname, old, new): ##hack - only call this once per req/rep cycle from flask import request if hasattr(request, 'filters_updated'): return if not self.trip_time_source.data_geometry: self.filters.pop('trip_time_in_secs', None) else: geom = self.trip_time_source.data_geometry lxmin = min(geom['x0'], geom['x1']) lxmax = max(geom['x0'], geom['x1']) self.filters['trip_time_in_secs'] = [lxmin, lxmax] if not self.trip_distance_source.data_geometry: self.filters.pop('trip_distance', None) else: geom = self.trip_distance_source.data_geometry lxmin = min(geom['x0'], geom['x1']) lxmax = max(geom['x0'], geom['x1']) self.filters['trip_distance'] = [lxmin, lxmax] if not self.pickup_raw_plot_source.data_geometry: self.filters.pop('pickup_latitude', None) self.filters.pop('pickup_longitude', None) else: geom = self.pickup_raw_plot_source.data_geometry lxmin = min(geom['x0'], geom['x1']) lxmax = max(geom['x0'], geom['x1']) lymin = min(geom['y0'], geom['y1']) lymax = max(geom['y0'], geom['y1']) self.filters['pickup_latitude'] = [lymin, lymax] self.filters['pickup_longitude'] = [lxmin, lxmax] if not self.dropoff_raw_plot_source.data_geometry: self.filters.pop('dropoff_latitude', None) self.filters.pop('dropoff_longitude', None) else: geom = self.dropoff_raw_plot_source.data_geometry lxmin = min(geom['x0'], geom['x1']) lxmax = max(geom['x0'], geom['x1']) lymin = min(geom['y0'], geom['y1']) lymax = max(geom['y0'], geom['y1']) self.filters['dropoff_latitude'] = [lymin, lymax] self.filters['dropoff_longitude'] = [lxmin, lxmax] # if not self.pickup_comparison_raw_plot_source.data_geometry: # self.filters.pop('pickup_latitude', None) # self.filters.pop('pickup_longitude', None) # else: # geom = self.pickup_comparison_raw_plot_source.data_geometry # lxmin = min(geom['x0'], geom['x1']) # lxmax = max(geom['x0'], geom['x1']) # lymin = min(geom['y0'], geom['y1']) # lymax = max(geom['y0'], geom['y1']) # self.filters['pickup_latitude'] = [lymin, lymax] # self.filters['pickup_longitude'] = [lxmin, lxmax] # if not self.dropoff_comparison_raw_plot_source.data_geometry: # self.filters.pop('dropoff_latitude', None) # self.filters.pop('dropoff_longitude', None) # else: # geom = self.dropoff_comparison_raw_plot_source.data_geometry # lxmin = min(geom['x0'], geom['x1']) # lxmax = max(geom['x0'], geom['x1']) # lymin = min(geom['y0'], geom['y1']) # lymax = max(geom['y0'], geom['y1']) # self.filters['dropoff_latitude'] = [lymin, lymax] # self.filters['dropoff_longitude'] = [lxmin, lxmax] self._dirty = True try: request.filters_updated = True except RuntimeError: pass self.filter() @classmethod def create(cls): gbounds = cls.gbounds xmin, xmax, ymin, ymax = gbounds app = cls() data = ARDataSource( data_url="/bokeh/taxidata/pickup/", data=dict( x=[0], y=[0], dw=[xmax-xmin], dh=[ymax-ymin], palette=["Greys-256"] ) ) app.pickup_ar_plot_source = data plot = image(source=data, image="image", x="x", y="y", dw="dw", dh="dh", plot_width=400, plot_height=400, palette='palette', x_range=[xmin, xmax], y_range=[ymin, ymax], tools="pan,wheel_zoom,box_zoom,select,reset", title='pickup' ) plot.title_text_font='12pt' app.pickup_plot = plot app.pickup_raw_plot_source = plot.select({'type' : ColumnDataSource})[0] data = ARDataSource( data_url="/bokeh/taxidatavsregular/pickup/", data=dict( x=[0], y=[0], dw=[xmax-xmin], dh=[ymax-ymin], palette=["Greys-256"] ) ) app.pickup_comparison_ar_plot_source = data plot = image(source=data, image="image", x="x", y="y", dw="dw", dh="dh", plot_width=400, plot_height=400, palette='palette', x_range=[xmin, xmax], y_range=[ymin, ymax], tools="pan,wheel_zoom,box_zoom,select,reset", title='pickup comparison plot' ) plot.title_text_font='12pt' app.pickup_comparison_plot = plot app.pickup_comparison_raw_plot_source = plot.select({'type' : ColumnDataSource})[0] data = ARDataSource( data_url="/bokeh/taxidatavsregular/dropoff/", data=dict( x=[0], y=[0], dw=[xmax-xmin], dh=[ymax-ymin], palette=["Greys-256"] ) ) app.dropoff_comparison_ar_plot_source = data plot = image(source=data, image="image", x="x", y="y", dw="dw", dh="dh", plot_width=400, plot_height=400, palette='palette', x_range=[xmin, xmax], y_range=[ymin, ymax], tools="pan,wheel_zoom,box_zoom,select,reset", title='dropoff comparison plot' ) plot.title_text_font='12pt' app.dropoff_comparison_plot = plot app.dropoff_comparison_raw_plot_source = plot.select({'type' : ColumnDataSource})[0] data = ARDataSource( data_url="/bokeh/taxidata/dropoff/", data=dict( x=[0], y=[0], dw=[xmax-xmin], dh=[ymax-ymin], palette=["Greys-256"] ) ) app.dropoff_ar_plot_source = data plot = image(source=data, image="image", plot_width=400, plot_height=400, x="x", y="y", dw="dw", dh="dh", palette='palette', x_range=[xmin, xmax], y_range=[ymin, ymax], tools="pan,wheel_zoom,box_zoom,reset,select,reset", title='dropoff' ) plot.title_text_font='12pt' app.dropoff_plot = plot app.dropoff_raw_plot_source = plot.select({'type' : ColumnDataSource})[0] app.make_trip_distance_histogram() app.make_trip_time_histogram() app.widgets = VBoxForm() app.day_of_week_selector = Select.create( options=["-----", 'Weekday', 'Friday/Saturday/Sunday', 'Saturday/Sunday'], name='Day Of Week' ) app.date_slider = DateRangeSlider(value=(dt.datetime(2012, 1, 1), dt.datetime(2013, 1, 28)), bounds=(dt.datetime(2012, 12, 31), dt.datetime(2013, 1, 31)), step={'days' : 1}, range=({'days' : 1},{'days':30}), name='period', title='period' ) app.hour_selector = Select.create(options=["-----", '8am-12pm', '12pm-4pm', '4pm-8pm', '8pm-12am', '12am-4am'], name='Hour of the Day' ) title = Paragraph(text="NYC Taxi Cab Data", width=250, height=50) app.widgets.children=[title, app.date_slider, Paragraph(width=250, height=10), app.hour_selector, app.day_of_week_selector, Paragraph(width=250, height=10), app.distance_histogram, Paragraph(text="", width=250, height=50), app.time_histogram] app.images = VBox() app.regular = HBox() app.filtered = HBox() app.regular.children = [app.pickup_plot, app.dropoff_plot] app.filtered.children = [app.pickup_comparison_plot, app.dropoff_comparison_plot] app.images.children = [app.regular] app.children = [app.widgets, app.images] return app def set_images(self): if self.pickup_ar_plot_source.filter_url: self.images.children = [self.regular, self.filtered] else: self.images.children = [self.regular] def filter(self): st = time.time() query_dict = {} def selector(minval, maxval): return lambda x : (x >= minval) & (x <= maxval) def in1d(data): return lambda x : np.in1d(x, data) for k,v in self.filters.items(): if k in {'pickup_datetime', 'pickup_latitude', 'pickup_longitude', 'dropoff_latitude', 'dropoff_longitude', 'trip_distance', 'trip_time_in_secs', 'hour_of_day', }: minval = min(v) maxval = max(v) query_dict[k] = [selector(minval, maxval)] if k in {'day_of_week'}: query_dict[k] = [in1d(v)] if len(query_dict) == 0: self.pickup_ar_plot_source.filter_url = None self.dropoff_ar_plot_source.filter_url = None self.trip_time_ar_source.filter_url = None self.trip_distance_ar_source.filter_url = None self.pickup_comparison_ar_plot_source.filter_url = None self.dropoff_comparison_ar_plot_source.filter_url = None self.set_images() return print query_dict obj = ds.query(query_dict) self.pickup_ar_plot_source.filter_url = obj.data_url self.dropoff_ar_plot_source.filter_url = obj.data_url self.trip_time_ar_source.filter_url = obj.data_url self.trip_distance_ar_source.filter_url = obj.data_url self.pickup_comparison_ar_plot_source.filter_url = obj.data_url self.dropoff_comparison_ar_plot_source.filter_url = obj.data_url self.set_images() ed = time.time() print 'FILTERING', ed-st def date_slider_change(self, obj, attrname, old, new): minval = min(new) maxval = max(new) if isinstance(minval, basestring): minval = np.datetime64(minval, 'ns').astype('int64') if isinstance(maxval, basestring): maxval = np.datetime64(maxval, 'ns').astype('int64') self.filters['pickup_datetime'] = [minval, maxval] self._dirty = True self.filter() def hour_change(self, obj, attrname, old, new): if new == "8am-12pm": self.filters['hour_of_day'] = [8,12] elif new == "12pm-4pm": self.filters['hour_of_day'] = [12,16] elif new == "4pm-8pm": self.filters['hour_of_day'] = [16,20] elif new == "8pm-12am": self.filters['hour_of_day'] = [20,24] elif new == "12am-4am": self.filters['hour_of_day'] = [0,4] elif new == "4am-8am": self.filters['hour_of_day'] = [4,8] else: self.filters.pop('hour_of_day') self._dirty = True self.filter() def day_of_week_change(self, obj, attrname, old, new): mapping = dict( Monday=0, Tuesday=1, Wednesday=2, Thursday=3, Friday=4, Saturday=5, Sunday=6 ) if new == 'Weekday': self.filters['day_of_week'] = [0,1,2,3,4] elif new == 'Friday/Saturday/Sunday': self.filters['day_of_week'] = [4,5,6] elif new == 'Saturday/Sunday': self.filters['day_of_week'] = [5,6] else: self.filters.pop('day_of_week') self._dirty = True self.filter() def setup_events(self): if self.hour_selector: self.hour_selector.on_change('value', self, 'hour_change') if self.day_of_week_selector: self.day_of_week_selector.on_change('value', self, 'day_of_week_change') if self.pickup_raw_plot_source: self.pickup_raw_plot_source.on_change('data_geometry', self, 'update_filters') if self.dropoff_raw_plot_source: self.dropoff_raw_plot_source.on_change('data_geometry', self, 'update_filters') if self.pickup_comparison_raw_plot_source: self.pickup_comparison_raw_plot_source.on_change('data_geometry', self, 'update_filters') if self.dropoff_comparison_raw_plot_source: self.dropoff_comparison_raw_plot_source.on_change('data_geometry', self, 'update_filters') if self.trip_distance_source: self.trip_distance_source.on_change('data_geometry', self, 'update_filters') if self.trip_time_source: self.trip_time_source.on_change('data_geometry', self, 'update_filters')
class RemoteDataSource(PlotObject): host = String("localhost") port = Int(10020) varname = String() computed_columns = List() metadata = Dict() #hack... we're just using this field right now to trigger events selected = Int(0) data = Int(0) # from IPython.kernel import KernelManager # kernel = KernelManager(connection_file="kernel-1.json") # kernel.load_connection_file() # client = kernel.client() # client.start_channels() # client.shell_channel.execute("x = 1", store_history=False) def _url(self, func=None): remotedata = self func = "/" + func if func is not None else "" url = "http://%s:%s/array/%s%s" % \ (remotedata.host, remotedata.port, remotedata.varname, func) return url def _is_ok(self, response): response.raise_for_status() def _trigger_events(self): self.selected = self.selected + 1 def setselect(self, select, transform): data = transform data['selected'] = select json = protocol.serialize_json(data) requests.post(self._url("setselect"), data=json) self._trigger_events() def search(self, search): requests.post(self._url("search"), data=search) self._trigger_events() def select(self, select, transform): data = transform data['selected'] = select json = protocol.serialize_json(data) requests.post(self._url("select"), data=json) self._trigger_events() def deselect(self, deselect, transform): data = transform data['selected'] = deselect requests.post(self._url("selected"), data=protocol.serialize_json(data)) self._trigger_events() def pivot(self, transform): json = protocol.serialize_json(transform) response = requests.post(self._url("pivot"), data=json) self._is_ok(response) data = response.json() self._trigger_events() return data def fields(self): json = protocol.serialize_json({}) response = requests.get(self._url("fields"), data=json) self._is_ok(response) data = response.json() self._trigger_events() return data def get_data(self, transform): json = protocol.serialize_json(transform) response = requests.get(self._url(), data=json) self._is_ok(response) data = response.json() self.metadata = data.pop('metadata', {}) return data def set_computed_columns(self, computed_columns): json = protocol.serialize_json(computed_columns) response = requests.get(self._url("computed"), data=json) self._is_ok(response) data = response.json() self.computed_columns = computed_columns self.data += 1 return data
class AttrSpec(HasProps): """A container for assigning attributes to values and retrieving them as needed. A special function this provides is automatically handling cases where the provided iterator is too short compared to the distinct values provided. Once created as attr_spec, you can do attr_spec[data_label], where data_label must be a one dimensional tuple of values, representing the unique group in the data. """ id = Any() data = Instance(ColumnDataSource) name = String(help='Name of the attribute the spec provides.') columns = Either(ColumnLabel, List(ColumnLabel)) default = Any(default=None) attr_map = Dict(Any, Any) iterable = List(Any, default=None) items = List(Any) def __init__(self, columns=None, df=None, iterable=None, default=None, **properties): properties['columns'] = self._ensure_list(columns) if df is not None: properties['data'] = ColumnDataSource(df) if default is None and iterable is not None: default_iter = copy(iterable) properties['default'] = next(iter(default_iter)) elif default is not None: properties['default'] = default if iterable is not None: properties['iterable'] = iterable super(AttrSpec, self).__init__(**properties) @staticmethod def _ensure_list(attr): if isinstance(attr, str): return [attr] elif isinstance(attr, tuple): return list(attr) else: return attr @staticmethod def _ensure_tuple(attr): if not isinstance(attr, tuple): return (attr, ) else: return attr def _setup_default(self): self.default = next(self._setup_iterable()) def _setup_iterable(self): """Default behavior is to copy and cycle the provided iterable.""" return cycle(copy(self.iterable)) def _generate_items(self, df, columns): """Produce list of unique tuples that identify each item.""" df = df.sort(columns=columns) items = df[columns].drop_duplicates() self.items = [tuple(x) for x in items.to_records(index=False)] def _create_attr_map(self, df, columns): """Creates map between unique values and available attributes.""" self._generate_items(df, columns) iterable = self._setup_iterable() iter_map = {} for item in self.items: item = self._ensure_tuple(item) iter_map[item] = next(iterable) return iter_map def set_columns(self, columns): columns = self._ensure_list(columns) if all([col in self.data.column_names for col in columns]): self.columns = columns else: # we have input values other than columns # assume this is now the iterable at this point self.iterable = columns self._setup_default() def setup(self, data=None, columns=None): if data is not None: self.data = data if columns is not None: self.set_columns(columns) if self.columns is not None and self.data is not None: self.attr_map = self._create_attr_map(self.data.to_df(), self.columns) def __getitem__(self, item): """Lookup the attribute to use for the given unique group label.""" if not self.columns or not self.data or item is None: return self.default elif self._ensure_tuple(item) not in self.attr_map.keys(): # make sure we have attr map self.setup() return self.attr_map[self._ensure_tuple(item)]