Beispiel #1
0
class ColumnDataSource(DataSource):
    # Maps names of columns to sequences or arrays
    data = Dict()

    # Maps field/column name to a DataRange or FactorRange object. If the
    # field is not in the dict, then a range is created automatically.
    cont_ranges = Dict()
    discrete_ranges = Dict()

    def add(self, data, name=None):
        """ Appends the data to the list of columns.  Returns the name
        that was inserted.
        """
        if name is None:
            n = len(self.data)
            while "Series %d" % n in self.data:
                n += 1
            name = "Series %d" % n
        self.column_names.append(name)
        self.data[name] = data
        return name

    def remove(self, name):
        try:
            self.column_names.remove(name)
            del self.data[name]
        except (ValueError, KeyError):
            warnings.warn("Unable to find column '%s' in datasource" % name)
Beispiel #2
0
class ObjectArrayDataSource(DataSource):
    # List of tuples of values
    data = List()

    # Maps field/column name to a DataRange or FactorRange object. If the
    # field is not in the dict, then a range is created automatically.
    cont_ranges = Dict()
    discrete_ranges = Dict()
Beispiel #3
0
class ColumnDataSource(DataSource):
    # Maps names of columns to sequences or arrays
    data = Dict()

    # Maps field/column name to a DataRange or FactorRange object. If the
    # field is not in the dict, then a range is created automatically.
    cont_ranges = Dict()
    discrete_ranges = Dict()
Beispiel #4
0
class PandasDataSource(DataSource):
    """ Represents serverside data.  This gets stored into the plot server's
    database, but it does not have any client side representation.  Instead,
    a PandasPlotSource needs to be created and pointed at it.
    """

    data = Dict()
Beispiel #5
0
class PivotTable(PlotObject):
    title = String("Pivot Table")
    description = String("")
    source = Instance(has_ref=True)
    data = Dict()
    fields = List()  # List[{name: String, dtype: String}]
    rows = List()
    columns = List()
    values = List()
    filters = List()
    manual_update = Bool(True)

    def setup_events(self):
        self.on_change('rows', self, 'get_data')
        self.on_change('columns', self, 'get_data')
        self.on_change('values', self, 'get_data')
        self.on_change('filters', self, 'get_data')

        if not self.fields:
            self.fields = self.source.fields()

        if not self.data:
            self.get_data()

    def get_data(self, obj=None, attrname=None, old=None, new=None):
        self.data = self.source.pivot(
            dict(
                rows=self.rows,
                columns=self.columns,
                values=self.values,
                filters=self.filters,
            ))
Beispiel #6
0
class FixedTickFormatter(TickFormatter):
    """
    Class used to allow custom axis tick labels on a bokeh chart
    Extends bokeh.model.formatters.TickFormatter
    """
    labels = Dict(Int,
                  String,
                  help="""
            A mapping of integer ticks values to their labels.
            """)
    COFFEESCRIPT = """
        import {_} from "underscore"
        import {Model} from "model"
        import * as p from "core/properties"
        export class FixedTickFormatter extends Model
          type: 'FixedTickFormatter'
          @define {
            labels: [ p.Any ]
          }
          doFormat: (ticks) ->
                labels = @labels
                return (labels[tick] ? "" for tick in ticks)
    """

    __implementation__ = CoffeeScript(COFFEESCRIPT)
Beispiel #7
0
class StyleableBox(BaseBox):
    '''
    styleable box provides element level css_properties as a dictionary
    '''
    __implementation__ = load_component('./styleable_box.coffee')
    css_properties = Dict(String, Any, default=None)
    orientation = String(default='vertical')
Beispiel #8
0
class DatetimeAxis(LinearAxis):
    type = String("datetime_axis")
    axis_label = String("date")
    scale = String("time")
    num_labels = Int(8)
    char_width = Int(10)
    fill_ratio = Float(0.3)
    formats = Dict({"days": ["%m/%d/%Y"]})
Beispiel #9
0
class StatsBox(BaseBox):
    __implementation__ = load_component('./stats_box.coffee')
    styles = String(default=None)
    display_items = Dict(String, Any, default=None)

    @validation.warning(EMPTY_LAYOUT)
    def _check_empty_layout(self):
        pass
Beispiel #10
0
class Legend(PlotObject):
    plot = Instance(Plot, has_ref=True)
    annotationspec = Dict(has_ref=True)

    def vm_serialize(self):
        #ensure that the type of the annotation spec is set
        result = super(Legend, self).vm_serialize()
        result['annotationspec']['type'] = 'legend'
        return result
Beispiel #11
0
    def test_Dict(self):
        with self.assertRaises(TypeError):
            prop = Dict()

        prop = Dict(String, List(Int))

        self.assertTrue(prop.is_valid(None))
        self.assertFalse(prop.is_valid(False))
        self.assertFalse(prop.is_valid(True))
        self.assertFalse(prop.is_valid(0))
        self.assertFalse(prop.is_valid(1))
        self.assertFalse(prop.is_valid(0.0))
        self.assertFalse(prop.is_valid(1.0))
        self.assertFalse(prop.is_valid(1.0 + 1.0j))
        self.assertFalse(prop.is_valid(""))
        self.assertFalse(prop.is_valid(()))
        self.assertFalse(prop.is_valid([]))
        self.assertTrue(prop.is_valid({}))
        self.assertFalse(prop.is_valid(Foo()))
Beispiel #12
0
class ColumnDataSource(DataSource):
    # Maps names of columns to sequences or arrays
    data = Dict()

    # Maps field/column name to a DataRange or FactorRange object. If the
    # field is not in the dict, then a range is created automatically.
    cont_ranges = Dict()
    discrete_ranges = Dict()

    def __init__(self, *args, **kw):
        """ Modify the basic DataSource/PlotObj constructor so that if we
        are called with a single argument that is a dict, then we treat
        that implicitly as our "data" attribute.
        """
        if len(args) == 1 and "data" not in kw:
            kw["data"] = args[0]
        super(ColumnDataSource, self).__init__(**kw)

    def add(self, data, name=None):
        """ Appends the data to the list of columns.  Returns the name
        that was inserted.
        """
        if name is None:
            n = len(self.data)
            while "Series %d"%n in self.data:
                n += 1
            name = "Series %d"%n
        self.column_names.append(name)
        self.data[name] = data
        return name

    def remove(self, name):
        try:
            self.column_names.remove(name)
            del self.data[name]
        except (ValueError, KeyError):
            warnings.warn("Unable to find column '%s' in datasource" % name)
Beispiel #13
0
class DataTable(PlotObject):
    source = Instance(has_ref=True)
    sort = List()
    group = List()
    offset = Int(default=0)
    length = Int(default=100)
    maxlength = Int()
    totallength = Int()
    tabledata = Dict()
    filterselected = Bool(default=False)

    def setup_events(self):
        self.on_change('sort', self, 'get_data')
        self.on_change('group', self, 'get_data')
        self.on_change('length', self, 'get_data')
        self.on_change('offset', self, 'get_data')
        self.on_change('filterselected', self, 'get_data')
        self.source.on_change('selected', self, 'get_data')
        self.source.on_change('data', self, 'get_data')
        self.source.on_change('computed_columns', self, 'get_data')
        if not self.tabledata:
            self.get_data()

    def transform(self):
        return dict(
            sort=self.sort,
            group=self.group,
            offset=self.offset,
            length=self.length,
            filterselected=self.filterselected,
        )

    def setselect(self, select):
        self.source.setselect(select, self.transform())
        self.get_data()

    def select(self, select):
        self.source.select(select, self.transform())
        self.get_data()

    def deselect(self, deselect):
        self.source.deselect(deselect, self.transform())
        self.get_data()

    def get_data(self, obj=None, attrname=None, old=None, new=None):
        data = self.source.get_data(self.transform())
        self.maxlength = data.pop('maxlength')
        self.totallength = data.pop('totallength')
        self.tabledata = data
Beispiel #14
0
class IPythonRemoteData(PlotObject):
    host = String("localhost")
    port = Int(10020)
    varname = String()
    computed_columns = List()
    metadata = Dict()

    #hack... we're just using this field right now to trigger events
    selected = Int(0)
    data = Int(0)

    def setselect(self, select, transform):

        remotedata = self
        url = "http://%s:%s/array/%s/setselect" % (
            remotedata.host, remotedata.port, remotedata.varname)
        data = transform
        data['selected'] = select
        requests.post(url, data=protocol.serialize_json(data))
        self.selected = self.selected + 1

    def search(self, search):
        remotedata = self
        url = "http://%s:%s/array/%s/search" % (
            remotedata.host, remotedata.port, remotedata.varname)
        requests.post(url, data=search)
        self.selected = self.selected + 1

    def select(self, select, transform):

        remotedata = self
        url = "http://%s:%s/array/%s/select" % (
            remotedata.host, remotedata.port, remotedata.varname)
        data = transform
        data['selected'] = select
        requests.post(url, data=protocol.serialize_json(data))
        self.selected = self.selected + 1

    def deselect(self, deselect, transform):
        remotedata = self
        url = "http://%s:%s/array/%s/deselect" % (
            remotedata.host, remotedata.port, remotedata.varname)
        data = transform
        data['selected'] = deselect
        requests.post(url, data=protocol.serialize_json(data))
        self.selected = self.selected + 1

    def get_data(self, transform):
        remotedata = self
        url = "http://%s:%s/array/%s" % (remotedata.host, remotedata.port,
                                         remotedata.varname)
        data = requests.get(url,
                            data=protocol.serialize_json(transform)).json()
        self.metadata = data.pop('metadata', {})
        return data

    def set_computed_columns(self, computed_columns):

        remotedata = self
        url = "http://%s:%s/array/%s/computed" % (
            remotedata.host, remotedata.port, remotedata.varname)
        data = requests.get(
            url, data=protocol.serialize_json(computed_columns)).json()
        self.computed_columns = computed_columns
        self.data += 1
        return data
Beispiel #15
0
class PandasPivotTable(PlotObject):
    source = Instance(has_ref=True)
    sort = List()
    group = List()
    offset = Int(default=0)
    length = Int(default=100)
    maxlength = Int()
    totallength = Int()
    precision = Dict()
    tabledata = Dict()
    filterselected = Bool(default=False)

    def setup_events(self):
        self.on_change('sort', self, 'get_data')
        self.on_change('group', self, 'get_data')
        self.on_change('length', self, 'get_data')
        self.on_change('offset', self, 'get_data')
        self.on_change('precision', self, 'get_data')
        self.on_change('filterselected', self, 'get_data')
        self.source.on_change('selected', self, 'get_data')
        self.source.on_change('data', self, 'get_data')
        self.source.on_change('computed_columns', self, 'get_data')
        if not self.tabledata:
            self.get_data()

    def format_data(self, jsondata):
        """inplace manipulation of jsondata
        """
        precision = self.precision
        for colname, data in jsondata.iteritems():
            if colname == '_selected' or colname == '_counts':
                continue
            if self.source.metadata.get(colname, {}).get('date'):
                isdate = True
            else:
                isdate = False
            for idx, val in enumerate(data):
                if isdate:
                    timeobj = time.localtime(val / 1000.0)
                    data[idx] = time.strftime("%Y-%m-%d %H:%M:%S", timeobj)
                if isinstance(val, float):
                    data[idx] = "%%.%df" % precision.get(colname,
                                                         2) % data[idx]

    def transform(self):
        return dict(
            sort=self.sort,
            group=self.group,
            offset=self.offset,
            length=self.length,
            filterselected=self.filterselected,
        )

    def setselect(self, select):
        self.source.setselect(select, self.transform())
        self.get_data()

    def select(self, select):
        self.source.select(select, self.transform())
        self.get_data()

    def deselect(self, deselect):
        self.source.deselect(deselect, self.transform())
        self.get_data()

    def get_data(self, obj=None, attrname=None, old=None, new=None):
        data = self.source.get_data(self.transform())
        print data['data']['_selected']
        self.maxlength = data.pop('maxlength')
        self.totallength = data.pop('totallength')
        self.format_data(data['data'])
        self.tabledata = data
Beispiel #16
0
class HasIntDictProp(PlotObject):
    foo = Dict(Int, Any)

    def __init__(self, **kwargs):
        super(HasIntDictProp, self).__init__(**kwargs)
Beispiel #17
0
class HasStringDictProp(PlotObject):
    foo = Dict(String, Any)

    def __init__(self, **kwargs):
        super(HasStringDictProp, self).__init__(**kwargs)
Beispiel #18
0
 class V(self.pObjectClass):
     u1 = Instance(U)
     u2 = List(Instance(U))
     u3 = Tuple(Int, Instance(U))
     u4 = Dict(String, Instance(U))
     u5 = Dict(String, List(Instance(U)))
Beispiel #19
0
class AttrSpec(HasProps):
    """A container for assigning attributes to values and retrieving them as needed.

    A special function this provides is automatically handling cases where the provided
    iterator is too short compared to the distinct values provided.

    Once created as attr_spec, you can do attr_spec[data_label], where data_label must
    be a one dimensional tuple of values, representing the unique group in the data.

    See the :meth:`AttrSpec.setup` method for the primary way to provide an existing
    AttrSpec with data and column values and update all derived property values.
    """

    id = Any()
    data = Instance(ColumnDataSource)
    name = String(help='Name of the attribute the spec provides.')

    columns = Either(ColumnLabel,
                     List(ColumnLabel),
                     help="""
        The label or list of column labels that correspond to the columns that will be
        used to find all distinct values (single column) or combination of values (
        multiple columns) to then assign a unique attribute to. If not enough unique
        attribute values are found, then the attribute values will be cycled.
        """)

    default = Any(default=None,
                  help="""
        The default value for the attribute, which is used if no column is assigned to
        the attribute for plotting. If the default value is not provided, the first
        value in the `iterable` property is used.
        """)

    attr_map = Dict(Any,
                    Any,
                    help="""
        Created by the attribute specification when `iterable` and `data` are
        available. The `attr_map` will include a mapping between the distinct value(s)
        found in `columns` and the attribute value that has been assigned.
        """)

    iterable = List(Any,
                    default=None,
                    help="""
        The iterable of attribute values to assign to the distinct values found in
        `columns` of `data`.
        """)

    items = List(Any,
                 default=None,
                 help="""
        The attribute specification calculates this list of distinct values that are
        found in `columns` of `data`.
        """)

    sort = Bool(default=True,
                help="""
        A boolean flag to tell the attribute specification to sort `items`, when it is
        calculated. This affects which value of `iterable` is assigned to each distinct
        value in `items`.
        """)

    ascending = Bool(default=True,
                     help="""
        A boolean flag to tell the attribute specification how to sort `items` if the
        `sort` property is set to `True`. The default setting for `ascending` is `True`.
        """)

    def __init__(self,
                 columns=None,
                 df=None,
                 iterable=None,
                 default=None,
                 items=None,
                 **properties):
        """Create a lazy evaluated attribute specification.

        Args:
            columns: a list of column labels
            df(:class:`~pandas.DataFrame`): the data source for the attribute spec.
            iterable: an iterable of distinct attribute values
            default: a value to use as the default attribute when no columns are passed
            items: the distinct values in columns. If items is provided as input,
                then the values provided are used instead of being calculated. This can
                be used to force a specific order for assignment.
            **properties: other properties to pass to parent :class:`HasProps`
        """
        properties['columns'] = self._ensure_list(columns)

        if df is not None:
            properties['data'] = ColumnDataSource(df)

        if default is None and iterable is not None:
            default_iter = copy(iterable)
            properties['default'] = next(iter(default_iter))
        elif default is not None:
            properties['default'] = default

        if iterable is not None:
            properties['iterable'] = iterable

        if items is not None:
            properties['items'] = items

        super(AttrSpec, self).__init__(**properties)

    @staticmethod
    def _ensure_list(attr):
        """Always returns a list with the provided value. Returns the value if a list."""
        if isinstance(attr, str):
            return [attr]
        elif isinstance(attr, tuple):
            return list(attr)
        else:
            return attr

    @staticmethod
    def _ensure_tuple(attr):
        """Return tuple with the provided value. Returns the value if a tuple."""
        if not isinstance(attr, tuple):
            return (attr, )
        else:
            return attr

    def _setup_default(self):
        """Stores the first value of iterable into `default` property."""
        self.default = next(self._setup_iterable())

    def _setup_iterable(self):
        """Default behavior is to copy and cycle the provided iterable."""
        return cycle(copy(self.iterable))

    def _generate_items(self, df, columns):
        """Produce list of unique tuples that identify each item."""
        if self.items is None or len(self.items) == 0:
            if self.sort:
                df = df.sort(columns=columns, ascending=self.ascending)
            items = df[columns].drop_duplicates()
            self.items = [tuple(x) for x in items.to_records(index=False)]

    def _create_attr_map(self, df, columns):
        """Creates map between unique values and available attributes."""

        self._generate_items(df, columns)
        iterable = self._setup_iterable()

        iter_map = {}
        for item in self.items:
            item = self._ensure_tuple(item)
            iter_map[item] = next(iterable)
        return iter_map

    def set_columns(self, columns):
        """Set columns property and update derived properties as needed."""
        columns = self._ensure_list(columns)
        if all([col in self.data.column_names for col in columns]):
            self.columns = columns
        else:
            # we have input values other than columns
            # assume this is now the iterable at this point
            self.iterable = columns
            self._setup_default()

    def setup(self, data=None, columns=None):
        """Set the data and update derived properties as needed."""
        if data is not None:
            self.data = data

            if columns is not None:
                self.set_columns(columns)

        if self.columns is not None and self.data is not None:
            self.attr_map = self._create_attr_map(self.data.to_df(),
                                                  self.columns)

    def __getitem__(self, item):
        """Lookup the attribute to use for the given unique group label."""

        if not self.columns or not self.data or item is None:
            return self.default
        elif self._ensure_tuple(item) not in self.attr_map.keys():

            # make sure we have attr map
            self.setup()

        return self.attr_map[self._ensure_tuple(item)]
Beispiel #20
0
class Namespace(PlotObject):
    datasets = Dict()
    name = String()

    def __str__(self):
        return "Namespace(name=%r, datasets=%s)" % (
            self.name, sorted(self.datasets.keys()))

    __repr__ = __str__

    def _namespace(self):
        return get_ipython().user_ns

    def statsmodels(self):
        """Populate namespace with statsmodels' datasets. """
        from statsmodels import datasets

        ns = self._namespace()

        for name, dataset in datasets.__dict__.iteritems():
            if hasattr(dataset, "load_pandas"):
                ns[name] = dataset.load_pandas().data

    def populate(self, to_disk=True):
        """Scan global namespace for pandas' datasets. """
        ns = self._namespace()
        datasets = {}

        for name, dataset in ns.iteritems():
            if isinstance(dataset, DataFrame) and not name.startswith("_"):
                datasets[name] = list(dataset.columns)

        if datasets == self.datasets:
            return

        self.datasets = datasets
        self.session.store_obj(self)

        if not to_disk:
            return

        to_write = dict([(name, ns[name]) for name in datasets.keys()])

        with open(self.filename, "w+") as file:
            pickle.dump(to_write, file, protocol=-1)

    def clean(self):
        """Remove all pandas' datasets from global namespace. """
        ns = self._namespace()

        for name, dataset in dict(ns).iteritems():
            if isinstance(dataset, DataFrame) and not name.startswith("_"):
                del ns[name]

    @property
    def filename(self):
        return self.name + ".pickle"

    def load(self):
        ns = self._namespace()
        if os.path.exists(self.filename):
            fname = self.filename
            with open(fname) as f:
                data = pickle.load(f)
            for k, v in data.iteritems():
                ns[k] = v
Beispiel #21
0
 class HasDictDefault(Model):
     value = Dict(String, Int, default=dict(hello=42))
Beispiel #22
0
 class V(PlotObject):
     u1 = Instance(U)
     u2 = List(Instance(U))
     u3 = Tuple(Int, Instance(U))
     u4 = Dict(String, Instance(U))
     u5 = Dict(String, List(Instance(U)))
Beispiel #23
0
class TaxiApp(HBox):
    extra_generated_classes = [["TaxiApp", "TaxiApp", "HBox"]]
    extra_scripts = ['/bokehjs/static/app/src/js/ar_data_source.js']
    extra_js=['window.ar_data_source.main();']
    gbounds = ds.gbounds

    pickup_plot = Instance(Plot)
    pickup_raw_plot_source = Instance(ColumnDataSource)
    pickup_ar_plot_source = Instance(ARDataSource)

    dropoff_plot = Instance(Plot)
    dropoff_raw_plot_source = Instance(ColumnDataSource)
    dropoff_ar_plot_source = Instance(ARDataSource)

    pickup_comparison_plot = Instance(Plot)
    pickup_comparison_raw_plot_source = Instance(ColumnDataSource)
    pickup_comparison_ar_plot_source = Instance(ARDataSource)

    dropoff_comparison_plot = Instance(Plot)
    dropoff_comparison_raw_plot_source = Instance(ColumnDataSource)
    dropoff_comparison_ar_plot_source = Instance(ARDataSource)

    trip_distance_source = Instance(ColumnDataSource)
    trip_time_source = Instance(ColumnDataSource)
    trip_distance_ar_source = Instance(HistogramDataSource)
    trip_time_ar_source = Instance(HistogramDataSource)

    widgets = Instance(VBox)
    date_slider = Instance(DateRangeSlider)
    filters = Dict(String, Any)
    trip_time_bins = np.linspace(0, 3600, 25)
    trip_distance_bins = np.linspace(0.01, 20, 25)
    distance_histogram = Instance(Plot)
    time_histogram = Instance(Plot)
    hour_selector = Instance(Select)
    day_of_week_selector = Instance(Select)

    regular = Instance(HBox)
    filtered = Instance(HBox)
    images = Instance(VBox)

    def make_trip_distance_histogram(self):
        bins = self.trip_distance_bins
        centers = pd.rolling_mean(bins, 2)[1:]
        figure(title="trip distance in miles",
               title_text_font='12pt',
               plot_width=300,
               plot_height=200,
               x_range=[bins[0], bins[-1]],
               y_range=[0, 1],
               tools="pan,wheel_zoom,box_zoom,select,reset"
        )
        source = HistogramDataSource(
            data_url="/bokeh/taxidata/distancehist/",
        )
        hold()
        plot = rect("centers", "y", np.mean(np.diff(centers)) * 0.7, "counts",
                    source=source)
        self.trip_distance_source = plot.select({'type' : ColumnDataSource})[0]
        self.trip_distance_ar_source = source
        plot.min_border=0
        plot.h_symmetry=False
        plot.v_symmetry=False
        select_tool = _get_select_tool(plot)
        if select_tool:
            select_tool.dimensions = ['width']
        self.distance_histogram = plot

    def make_trip_time_histogram(self):
        bins = self.trip_time_bins
        centers = pd.rolling_mean(bins, 2)[1:]
        figure(title="trip time in secs",
               title_text_font='12pt',
               plot_width=300,
               plot_height=200,
               x_range=[bins[0], bins[-1]],
               y_range=[0, 1],
               tools="pan,wheel_zoom,box_zoom,select,reset"
        )
        source = HistogramDataSource(
            data_url="/bokeh/taxidata/timehist/",
        )
        hold()
        plot = rect("centers", "y", np.mean(np.diff(centers)) * 0.7, "counts",
                    source=source)
        self.trip_time_source = plot.select({'type' : ColumnDataSource})[0]
        self.trip_time_ar_source = source
        plot.min_border=0
        plot.h_symmetry=False
        plot.v_symmetry=False
        select_tool = _get_select_tool(plot)
        if select_tool:
            select_tool.dimensions = ['width']
        self.time_histogram = plot

    def update_filters(self, obj, attrname, old, new):
        ##hack - only call this once per req/rep cycle
        from flask import request
        if hasattr(request, 'filters_updated'):
            return
        if not self.trip_time_source.data_geometry:
            self.filters.pop('trip_time_in_secs', None)
        else:
            geom = self.trip_time_source.data_geometry
            lxmin = min(geom['x0'], geom['x1'])
            lxmax = max(geom['x0'], geom['x1'])
            self.filters['trip_time_in_secs'] = [lxmin, lxmax]
        if not self.trip_distance_source.data_geometry:
            self.filters.pop('trip_distance', None)
        else:
            geom = self.trip_distance_source.data_geometry
            lxmin = min(geom['x0'], geom['x1'])
            lxmax = max(geom['x0'], geom['x1'])
            self.filters['trip_distance'] = [lxmin, lxmax]
        if not self.pickup_raw_plot_source.data_geometry:
            self.filters.pop('pickup_latitude', None)
            self.filters.pop('pickup_longitude', None)
        else:
            geom = self.pickup_raw_plot_source.data_geometry
            lxmin = min(geom['x0'], geom['x1'])
            lxmax = max(geom['x0'], geom['x1'])
            lymin = min(geom['y0'], geom['y1'])
            lymax = max(geom['y0'], geom['y1'])
            self.filters['pickup_latitude'] = [lymin, lymax]
            self.filters['pickup_longitude'] = [lxmin, lxmax]

        if not self.dropoff_raw_plot_source.data_geometry:
            self.filters.pop('dropoff_latitude', None)
            self.filters.pop('dropoff_longitude', None)
        else:
            geom = self.dropoff_raw_plot_source.data_geometry
            lxmin = min(geom['x0'], geom['x1'])
            lxmax = max(geom['x0'], geom['x1'])
            lymin = min(geom['y0'], geom['y1'])
            lymax = max(geom['y0'], geom['y1'])
            self.filters['dropoff_latitude'] = [lymin, lymax]
            self.filters['dropoff_longitude'] = [lxmin, lxmax]

        # if not self.pickup_comparison_raw_plot_source.data_geometry:
        #     self.filters.pop('pickup_latitude', None)
        #     self.filters.pop('pickup_longitude', None)
        # else:
        #     geom = self.pickup_comparison_raw_plot_source.data_geometry
        #     lxmin = min(geom['x0'], geom['x1'])
        #     lxmax = max(geom['x0'], geom['x1'])
        #     lymin = min(geom['y0'], geom['y1'])
        #     lymax = max(geom['y0'], geom['y1'])
        #     self.filters['pickup_latitude'] = [lymin, lymax]
        #     self.filters['pickup_longitude'] = [lxmin, lxmax]
        # if not self.dropoff_comparison_raw_plot_source.data_geometry:
        #     self.filters.pop('dropoff_latitude', None)
        #     self.filters.pop('dropoff_longitude', None)
        # else:
        #     geom = self.dropoff_comparison_raw_plot_source.data_geometry
        #     lxmin = min(geom['x0'], geom['x1'])
        #     lxmax = max(geom['x0'], geom['x1'])
        #     lymin = min(geom['y0'], geom['y1'])
        #     lymax = max(geom['y0'], geom['y1'])
        #     self.filters['dropoff_latitude'] = [lymin, lymax]
        #     self.filters['dropoff_longitude'] = [lxmin, lxmax]

        self._dirty = True
        try:
            request.filters_updated = True
        except RuntimeError:
            pass
        self.filter()

    @classmethod
    def create(cls):
        gbounds = cls.gbounds
        xmin, xmax, ymin, ymax = gbounds
        app = cls()
        data = ARDataSource(
            data_url="/bokeh/taxidata/pickup/",
            data=dict(
                x=[0], y=[0], dw=[xmax-xmin], dh=[ymax-ymin], palette=["Greys-256"]
            )
        )
        app.pickup_ar_plot_source = data
        plot = image(source=data,
                     image="image",
                     x="x",
                     y="y",
                     dw="dw",
                     dh="dh",
                     plot_width=400,
                     plot_height=400,
                     palette='palette',
                     x_range=[xmin, xmax], y_range=[ymin, ymax],
                     tools="pan,wheel_zoom,box_zoom,select,reset",
                     title='pickup'
        )
        plot.title_text_font='12pt'
        app.pickup_plot = plot
        app.pickup_raw_plot_source = plot.select({'type' : ColumnDataSource})[0]

        data = ARDataSource(
            data_url="/bokeh/taxidatavsregular/pickup/",
            data=dict(
                x=[0], y=[0], dw=[xmax-xmin], dh=[ymax-ymin], palette=["Greys-256"]
            )
        )
        app.pickup_comparison_ar_plot_source = data
        plot = image(source=data,
                     image="image",
                     x="x",
                     y="y",
                     dw="dw",
                     dh="dh",
                     plot_width=400,
                     plot_height=400,
                     palette='palette',
                     x_range=[xmin, xmax], y_range=[ymin, ymax],
                     tools="pan,wheel_zoom,box_zoom,select,reset",
                     title='pickup comparison plot'
        )
        plot.title_text_font='12pt'
        app.pickup_comparison_plot = plot
        app.pickup_comparison_raw_plot_source = plot.select({'type' : ColumnDataSource})[0]
        data = ARDataSource(
            data_url="/bokeh/taxidatavsregular/dropoff/",
            data=dict(
                x=[0], y=[0], dw=[xmax-xmin], dh=[ymax-ymin], palette=["Greys-256"]
            )
        )
        app.dropoff_comparison_ar_plot_source = data
        plot = image(source=data,
                     image="image",
                     x="x",
                     y="y",
                     dw="dw",
                     dh="dh",
                     plot_width=400,
                     plot_height=400,
                     palette='palette',
                     x_range=[xmin, xmax], y_range=[ymin, ymax],
                     tools="pan,wheel_zoom,box_zoom,select,reset",
                     title='dropoff comparison plot'
        )
        plot.title_text_font='12pt'
        app.dropoff_comparison_plot = plot
        app.dropoff_comparison_raw_plot_source = plot.select({'type' : ColumnDataSource})[0]

        data = ARDataSource(
            data_url="/bokeh/taxidata/dropoff/",
            data=dict(
                x=[0], y=[0], dw=[xmax-xmin], dh=[ymax-ymin], palette=["Greys-256"]
            )
        )
        app.dropoff_ar_plot_source = data
        plot = image(source=data,
                     image="image",
                     plot_width=400,
                     plot_height=400,
                     x="x",
                     y="y",
                     dw="dw",
                     dh="dh",
                     palette='palette',
                     x_range=[xmin, xmax], y_range=[ymin, ymax],
                     tools="pan,wheel_zoom,box_zoom,reset,select,reset",
                     title='dropoff'
        )
        plot.title_text_font='12pt'
        app.dropoff_plot = plot
        app.dropoff_raw_plot_source = plot.select({'type' : ColumnDataSource})[0]
        app.make_trip_distance_histogram()
        app.make_trip_time_histogram()
        app.widgets = VBoxForm()
        app.day_of_week_selector = Select.create(
            options=["-----", 'Weekday', 'Friday/Saturday/Sunday', 'Saturday/Sunday'],
            name='Day Of Week'
        )
        app.date_slider = DateRangeSlider(value=(dt.datetime(2012, 1, 1),
                                                 dt.datetime(2013, 1, 28)),
                                          bounds=(dt.datetime(2012, 12, 31),
                                                  dt.datetime(2013, 1, 31)),
                                          step={'days' : 1},
                                          range=({'days' : 1},{'days':30}),
                                          name='period',
                                          title='period'
        )
        app.hour_selector = Select.create(options=["-----",
                                                   '8am-12pm',
                                                   '12pm-4pm',
                                                   '4pm-8pm',
                                                   '8pm-12am',
                                                   '12am-4am'],
                                          name='Hour of the Day'
        )
        title = Paragraph(text="NYC Taxi Cab Data", width=250, height=50)
        app.widgets.children=[title, app.date_slider,
                              Paragraph(width=250, height=10),
                              app.hour_selector,
                              app.day_of_week_selector,
                              Paragraph(width=250, height=10),
                              app.distance_histogram,
                              Paragraph(text="",
                                        width=250, height=50),
                              app.time_histogram]
        app.images = VBox()
        app.regular = HBox()
        app.filtered = HBox()
        app.regular.children = [app.pickup_plot, app.dropoff_plot]
        app.filtered.children = [app.pickup_comparison_plot,
                                  app.dropoff_comparison_plot]
        app.images.children = [app.regular]
        app.children = [app.widgets, app.images]
        return app

    def set_images(self):
        if self.pickup_ar_plot_source.filter_url:
            self.images.children = [self.regular, self.filtered]
        else:
            self.images.children = [self.regular]

    def filter(self):
        st = time.time()
        query_dict = {}
        def selector(minval, maxval):
            return lambda x : (x >= minval) & (x <= maxval)

        def in1d(data):
            return lambda x : np.in1d(x, data)

        for k,v in self.filters.items():
            if k in {'pickup_datetime', 'pickup_latitude',
                     'pickup_longitude',
                     'dropoff_latitude', 'dropoff_longitude',
                     'trip_distance', 'trip_time_in_secs',
                     'hour_of_day',
            }:
                minval = min(v)
                maxval = max(v)
                query_dict[k] = [selector(minval, maxval)]
            if k in {'day_of_week'}:
                query_dict[k] = [in1d(v)]

        if len(query_dict) == 0:
            self.pickup_ar_plot_source.filter_url = None
            self.dropoff_ar_plot_source.filter_url = None
            self.trip_time_ar_source.filter_url = None
            self.trip_distance_ar_source.filter_url = None
            self.pickup_comparison_ar_plot_source.filter_url = None
            self.dropoff_comparison_ar_plot_source.filter_url = None
            self.set_images()
            return
        print query_dict
        obj = ds.query(query_dict)
        self.pickup_ar_plot_source.filter_url = obj.data_url
        self.dropoff_ar_plot_source.filter_url = obj.data_url
        self.trip_time_ar_source.filter_url = obj.data_url
        self.trip_distance_ar_source.filter_url = obj.data_url
        self.pickup_comparison_ar_plot_source.filter_url = obj.data_url
        self.dropoff_comparison_ar_plot_source.filter_url = obj.data_url
        self.set_images()
        ed = time.time()
        print 'FILTERING', ed-st



    def date_slider_change(self, obj, attrname, old, new):
        minval = min(new)
        maxval = max(new)
        if isinstance(minval, basestring):
            minval = np.datetime64(minval, 'ns').astype('int64')
        if isinstance(maxval, basestring):
            maxval = np.datetime64(maxval, 'ns').astype('int64')
        self.filters['pickup_datetime'] = [minval, maxval]
        self._dirty = True
        self.filter()

    def hour_change(self, obj, attrname, old, new):
        if new == "8am-12pm":
            self.filters['hour_of_day'] = [8,12]
        elif new == "12pm-4pm":
            self.filters['hour_of_day'] = [12,16]
        elif new == "4pm-8pm":
            self.filters['hour_of_day'] = [16,20]
        elif new == "8pm-12am":
            self.filters['hour_of_day'] = [20,24]
        elif new == "12am-4am":
            self.filters['hour_of_day'] = [0,4]
        elif new == "4am-8am":
            self.filters['hour_of_day'] = [4,8]
        else:
            self.filters.pop('hour_of_day')
        self._dirty = True
        self.filter()

    def day_of_week_change(self, obj, attrname, old, new):
        mapping = dict(
            Monday=0,
            Tuesday=1,
            Wednesday=2,
            Thursday=3,
            Friday=4,
            Saturday=5,
            Sunday=6
        )
        if new == 'Weekday':
            self.filters['day_of_week'] = [0,1,2,3,4]
        elif new == 'Friday/Saturday/Sunday':
            self.filters['day_of_week'] = [4,5,6]
        elif new == 'Saturday/Sunday':
            self.filters['day_of_week'] = [5,6]
        else:
            self.filters.pop('day_of_week')
        self._dirty = True
        self.filter()
    def setup_events(self):
        if self.hour_selector:
            self.hour_selector.on_change('value', self, 'hour_change')
        if self.day_of_week_selector:
            self.day_of_week_selector.on_change('value', self, 'day_of_week_change')
        if self.pickup_raw_plot_source:
            self.pickup_raw_plot_source.on_change('data_geometry',
                                                  self, 'update_filters')
        if self.dropoff_raw_plot_source:
            self.dropoff_raw_plot_source.on_change('data_geometry',
                                                   self, 'update_filters')
        if self.pickup_comparison_raw_plot_source:
            self.pickup_comparison_raw_plot_source.on_change('data_geometry',
                                                  self, 'update_filters')
        if self.dropoff_comparison_raw_plot_source:
            self.dropoff_comparison_raw_plot_source.on_change('data_geometry',
                                                   self, 'update_filters')

        if self.trip_distance_source:
            self.trip_distance_source.on_change('data_geometry', self,
                                                'update_filters')
        if self.trip_time_source:
            self.trip_time_source.on_change('data_geometry', self,
                                            'update_filters')
Beispiel #24
0
class RemoteDataSource(PlotObject):
    host = String("localhost")
    port = Int(10020)
    varname = String()
    computed_columns = List()
    metadata = Dict()

    #hack... we're just using this field right now to trigger events
    selected = Int(0)
    data = Int(0)

    # from IPython.kernel import KernelManager
    # kernel = KernelManager(connection_file="kernel-1.json")
    # kernel.load_connection_file()
    # client = kernel.client()
    # client.start_channels()
    # client.shell_channel.execute("x = 1", store_history=False)

    def _url(self, func=None):
        remotedata = self
        func = "/" + func if func is not None else ""
        url = "http://%s:%s/array/%s%s" % \
            (remotedata.host, remotedata.port, remotedata.varname, func)
        return url

    def _is_ok(self, response):
        response.raise_for_status()

    def _trigger_events(self):
        self.selected = self.selected + 1

    def setselect(self, select, transform):
        data = transform
        data['selected'] = select
        json = protocol.serialize_json(data)
        requests.post(self._url("setselect"), data=json)
        self._trigger_events()

    def search(self, search):
        requests.post(self._url("search"), data=search)
        self._trigger_events()

    def select(self, select, transform):
        data = transform
        data['selected'] = select
        json = protocol.serialize_json(data)
        requests.post(self._url("select"), data=json)
        self._trigger_events()

    def deselect(self, deselect, transform):
        data = transform
        data['selected'] = deselect
        requests.post(self._url("selected"),
                      data=protocol.serialize_json(data))
        self._trigger_events()

    def pivot(self, transform):
        json = protocol.serialize_json(transform)
        response = requests.post(self._url("pivot"), data=json)
        self._is_ok(response)
        data = response.json()
        self._trigger_events()
        return data

    def fields(self):
        json = protocol.serialize_json({})
        response = requests.get(self._url("fields"), data=json)
        self._is_ok(response)
        data = response.json()
        self._trigger_events()
        return data

    def get_data(self, transform):
        json = protocol.serialize_json(transform)
        response = requests.get(self._url(), data=json)
        self._is_ok(response)
        data = response.json()
        self.metadata = data.pop('metadata', {})
        return data

    def set_computed_columns(self, computed_columns):
        json = protocol.serialize_json(computed_columns)
        response = requests.get(self._url("computed"), data=json)
        self._is_ok(response)
        data = response.json()
        self.computed_columns = computed_columns
        self.data += 1
        return data
Beispiel #25
0
class AttrSpec(HasProps):
    """A container for assigning attributes to values and retrieving them as needed.

    A special function this provides is automatically handling cases where the provided
    iterator is too short compared to the distinct values provided.

    Once created as attr_spec, you can do attr_spec[data_label], where data_label must
    be a one dimensional tuple of values, representing the unique group in the data.
    """

    id = Any()
    data = Instance(ColumnDataSource)
    name = String(help='Name of the attribute the spec provides.')
    columns = Either(ColumnLabel, List(ColumnLabel))

    default = Any(default=None)
    attr_map = Dict(Any, Any)
    iterable = List(Any, default=None)
    items = List(Any)

    def __init__(self,
                 columns=None,
                 df=None,
                 iterable=None,
                 default=None,
                 **properties):

        properties['columns'] = self._ensure_list(columns)

        if df is not None:
            properties['data'] = ColumnDataSource(df)

        if default is None and iterable is not None:
            default_iter = copy(iterable)
            properties['default'] = next(iter(default_iter))
        elif default is not None:
            properties['default'] = default

        if iterable is not None:
            properties['iterable'] = iterable

        super(AttrSpec, self).__init__(**properties)

    @staticmethod
    def _ensure_list(attr):
        if isinstance(attr, str):
            return [attr]
        elif isinstance(attr, tuple):
            return list(attr)
        else:
            return attr

    @staticmethod
    def _ensure_tuple(attr):
        if not isinstance(attr, tuple):
            return (attr, )
        else:
            return attr

    def _setup_default(self):
        self.default = next(self._setup_iterable())

    def _setup_iterable(self):
        """Default behavior is to copy and cycle the provided iterable."""
        return cycle(copy(self.iterable))

    def _generate_items(self, df, columns):
        """Produce list of unique tuples that identify each item."""
        df = df.sort(columns=columns)
        items = df[columns].drop_duplicates()
        self.items = [tuple(x) for x in items.to_records(index=False)]

    def _create_attr_map(self, df, columns):
        """Creates map between unique values and available attributes."""

        self._generate_items(df, columns)
        iterable = self._setup_iterable()

        iter_map = {}
        for item in self.items:
            item = self._ensure_tuple(item)
            iter_map[item] = next(iterable)
        return iter_map

    def set_columns(self, columns):
        columns = self._ensure_list(columns)
        if all([col in self.data.column_names for col in columns]):
            self.columns = columns
        else:
            # we have input values other than columns
            # assume this is now the iterable at this point
            self.iterable = columns
            self._setup_default()

    def setup(self, data=None, columns=None):
        if data is not None:
            self.data = data

            if columns is not None:
                self.set_columns(columns)

        if self.columns is not None and self.data is not None:
            self.attr_map = self._create_attr_map(self.data.to_df(),
                                                  self.columns)

    def __getitem__(self, item):
        """Lookup the attribute to use for the given unique group label."""

        if not self.columns or not self.data or item is None:
            return self.default
        elif self._ensure_tuple(item) not in self.attr_map.keys():

            # make sure we have attr map
            self.setup()

        return self.attr_map[self._ensure_tuple(item)]