Beispiel #1
0
class VertexAttribute(traitlets.HasTraits):
    name = traitlets.CUnicode("attr")
    id = traitlets.CInt(-1)
    data = traittypes.Array(None, allow_none=True)
    each = traitlets.CInt(-1)
    opengl_type = traitlets.CInt(GL.GL_FLOAT)
    divisor = traitlets.CInt(0)

    @traitlets.default("id")
    def _id_default(self):
        return GL.glGenBuffers(1)

    @contextmanager
    def bind(self, program=None):
        loc = -1
        if program is not None:
            loc = GL.glGetAttribLocation(program.program, self.name)
            if loc >= 0:
                GL.glVertexAttribDivisor(loc, self.divisor)
                _ = GL.glEnableVertexAttribArray(loc)
        _ = GL.glBindBuffer(GL.GL_ARRAY_BUFFER, self.id)
        if loc >= 0:
            GL.glVertexAttribPointer(loc, self.each, self.opengl_type, False, 0, None)
        yield
        if loc >= 0:
            GL.glDisableVertexAttribArray(loc)
        GL.glBindBuffer(GL.GL_ARRAY_BUFFER, 0)

    @traitlets.observe("data")
    def _set_data(self, change):
        arr = change["new"]
        self.each = arr.shape[-1]
        self.opengl_type = np_to_gl[arr.dtype.name]
        with self.bind():
            GL.glBufferData(GL.GL_ARRAY_BUFFER, arr.nbytes, arr, GL.GL_STATIC_DRAW)
Beispiel #2
0
class OrthographicRayBlaster(RayBlaster):
    center = traittypes.Array().valid(check_dtype("f4"), check_shape(3))
    forward = traittypes.Array().valid(check_dtype("f4"), check_shape(3))
    up = traittypes.Array().valid(check_dtype("f4"), check_shape(3))
    east = traittypes.Array().valid(check_dtype("f4"), check_shape(3))
    width = traitlets.CFloat(1.0)
    height = traitlets.CFloat(1.0)
    nx = traitlets.CInt(512)
    ny = traitlets.CInt(512)

    @traitlets.default("east")
    def _default_east(self):
        return np.cross(self.forward, self.up)

    def __init__(self, *args, **kwargs):
        super(OrthographicRayBlaster, self).__init__(*args, **kwargs)

        # here origin is not the center, but the bottom left
        self._directions = np.zeros((self.nx, self.ny, 3), dtype="f4")
        self._directions[:] = self.forward[None, None, :]
        self.directions = self._directions.view().reshape(
            (self.nx * self.ny, 3))

        self._origins = np.zeros((self.nx, self.ny, 3), dtype="f4")
        offset_x, offset_y = np.mgrid[-self.width / 2:self.width / 2:self.nx *
                                      1j, -self.height / 2:self.height /
                                      2:self.ny * 1j, ]
        self._origins[:] = (self.center + offset_x[..., None] * self.east +
                            offset_y[..., None] * self.up)
        self.origins = self._origins.view().reshape((self.nx * self.ny, 3))
Beispiel #3
0
class VertexArray(traitlets.HasTraits):
    name = traitlets.CUnicode("vertex")
    id = traitlets.CInt(-1)
    indices = traittypes.Array(None, allow_none=True)
    index_id = traitlets.CInt(-1)
    attributes = traitlets.List(trait=traitlets.Instance(VertexAttribute))
    each = traitlets.CInt(-1)

    @traitlets.default("id")
    def _id_default(self):
        return GL.glGenVertexArrays(1)

    @contextmanager
    def bind(self, program=None):
        GL.glBindVertexArray(self.id)
        if self.index_id != -1:
            GL.glBindBuffer(GL.GL_ELEMENT_ARRAY_BUFFER, self.index_id)
        # We only bind the attributes if we have a program too
        if program is None:
            attrs = []
        else:
            attrs = self.attributes
        with ExitStack() as stack:
            _ = [stack.enter_context(_.bind(program)) for _ in attrs]
            yield
        if self.index_id != -1:
            GL.glBindBuffer(GL.GL_ELEMENT_ARRAY_BUFFER, 0)
        GL.glBindVertexArray(0)

    @traitlets.observe("indices")
    def _set_indices(self, change):
        arr = change["new"]
        self.index_id = GL.glGenBuffers(1)
        GL.glBindBuffer(GL.GL_ELEMENT_ARRAY_BUFFER, self.index_id)
        GL.glBufferData(GL.GL_ELEMENT_ARRAY_BUFFER, arr.nbytes, arr, GL.GL_STATIC_DRAW)
Beispiel #4
0
    class VaexBinMapper(traitlets.HasTraits):
        max_bins = traitlets.CInt(255)
        random_state = traitlets.Any()
        subsample = traitlets.CInt(int(1e5))

        def fit(self, dataframe_wrapper):
            dataframe = dataframe_wrapper.ds
            self.bin_thresholds_ = []
            for feature in features:
                X = dataframe[feature].values.reshape((-1, 1)).astype(np.float32)
                midpoints = pygbm.binning.find_binning_thresholds(
                    X, self.max_bins, subsample=self.subsample,
                    random_state=self.random_state)[0]
                self.bin_thresholds_.append(midpoints)
            self.bin_thresholds_

        def transform(self, dataframe_wrapper):
            dataframe = dataframe_wrapper.ds
            N = len(dataframe)
            M = len(features)
            # fortran order so 1 column is contiguous in memory
            binned = np.zeros((N, M), dtype=np.uint8, order='F')
            for m, feature in enumerate(features):
                X = dataframe[feature].values.reshape((-1, 1)).astype(np.float32)
                binned1 = pygbm.binning.map_to_bins(X, binning_thresholds=self.bin_thresholds_)
                assert binned1.shape[1] == 1
                binned[:,m] = binned1[:,0]
            return binned

        def fit_transform(self, X):
            self.fit(X)
            return self.transform(X)
Beispiel #5
0
class VizHistogramState(VizBaseState):
    x_expression = traitlets.Unicode()
    x_slice = traitlets.CInt(None, allow_none=True)
    type = traitlets.CaselessStrEnum(['count', 'min', 'max', 'mean'], default_value='count')
    aux = traitlets.Unicode(None, allow_none=True)
    groupby = traitlets.Unicode(None, allow_none=True)
    groupby_normalize = traitlets.Bool(False, allow_none=True)
    x_min = traitlets.CFloat(None, allow_none=True)
    x_max = traitlets.CFloat(None, allow_none=True)
    grid = traitlets.Any().tag(**serialize_numpy)
    grid_sliced = traitlets.Any().tag(**serialize_numpy)
    x_centers = traitlets.Any().tag(**serialize_numpy)
    x_shape = traitlets.CInt(None, allow_none=True)
    #centers = traitlets.Any()
    
    def __init__(self, ds, **kwargs):
        super(VizHistogramState, self).__init__(ds, **kwargs)
        self.observe(lambda x: self.signal_slice.emit(self), ['x_slice'])
        self.observe(lambda x: self.calculate_limits(), ['x_expression', 'type', 'aux'])
        # no need for recompute
        # self.observe(lambda x: self.calculate_grid(), ['groupby', 'shape', 'groupby_normalize'])
        # self.observe(lambda x: self.calculate_grid(), ['groupby', 'shape', 'groupby_normalize'])
        
        self.observe(lambda x: self._update_grid(), ['x_min', 'x_max', 'shape'])
        if self.x_min is None and self.x_max is None:
            self.calculate_limits()
        else:
            self._calculate_centers()

    def bin_parameters(self):
        yield self.x_expression, self.x_shape or self.shape, (self.x_min, self.x_max), self.x_slice

    def state_get(self):
        #         return {name: self.trait_metadata('grid', 'serialize', ident)(getattr(self, name) for name in self.trait_names()}
        state = {}
        for name in self.trait_names():
            serializer = self.trait_metadata(name, 'serialize', ident)
            value = serializer(getattr(self, name))
            state[name] = value
        return state

    def state_set(self, state):
        for name in self.trait_names():
            if name in state:
                deserializer = self.trait_metadata(name, 'deserialize', ident)
                value = deserializer(state[name])
                setattr(self, name, value)
                                                                      
    def calculate_limits(self):
        self._calculate_limits('x', 'x_expression')
        self.signal_regrid.emit(None) # TODO this is also called in the ctor, unnec work
    
    def limits_changed(self, change):
        self.signal_regrid.emit(None) # TODO this is also called in the ctor, unnec work

    @vaex.jupyter.debounced()
    def _update_grid(self):
        self._calculate_centers()
        self.signal_regrid.emit(None)
Beispiel #6
0
class VolumeRendererThree(widgets.DOMWidget):
    """Widget class representing a volume (rendering) using three.js"""
    _view_name = Unicode('VolumeRendererThreeView').tag(sync=True)
    _view_module = Unicode('ipyvolume').tag(sync=True)
    _model_name = Unicode('VolumeRendererThreeModel').tag(sync=True)
    _model_module = Unicode('ipyvolume').tag(sync=True)

    data = Array(default_value=None,
                 allow_none=True).tag(sync=True,
                                      **array_cube_png_serialization)
    data_min = traitlets.CFloat().tag(sync=True)
    data_max = traitlets.CFloat().tag(sync=True)
    tf = traitlets.Instance(TransferFunction, allow_none=True).tag(
        sync=True, **ipywidgets.widget_serialization)
    angle1 = traitlets.Float(0.1).tag(sync=True)
    angle2 = traitlets.Float(0.2).tag(sync=True)

    scatters = traitlets.List(traitlets.Instance(Scatter), [],
                              allow_none=False).tag(
                                  sync=True, **ipywidgets.widget_serialization)

    animation = traitlets.Float(1000.0).tag(sync=True)

    ambient_coefficient = traitlets.Float(0.5).tag(sync=True)
    diffuse_coefficient = traitlets.Float(0.8).tag(sync=True)
    specular_coefficient = traitlets.Float(0.5).tag(sync=True)
    specular_exponent = traitlets.Float(5).tag(sync=True)
    stereo = traitlets.Bool(False).tag(sync=True)
    fullscreen = traitlets.Bool(False).tag(sync=True)

    width = traitlets.CInt(500).tag(sync=True)
    height = traitlets.CInt(400).tag(sync=True)
    downscale = traitlets.CInt(1).tag(sync=True)
    show = traitlets.Unicode("Volume").tag(sync=True)  # for debugging

    xlim = traitlets.List(traitlets.CFloat,
                          default_value=[0, 1],
                          minlen=2,
                          maxlen=2).tag(sync=True)
    ylim = traitlets.List(traitlets.CFloat,
                          default_value=[0, 1],
                          minlen=2,
                          maxlen=2).tag(sync=True)
    zlim = traitlets.List(traitlets.CFloat,
                          default_value=[0, 1],
                          minlen=2,
                          maxlen=2).tag(sync=True)

    style = traitlets.Dict(default_value=default_style).tag(sync=True)
Beispiel #7
0
class PixelizeEffect(CameraEffect):
    pixel_size = traitlets.CInt(4)
    @traitlets.default("func")
    def _default_func(self):
        # By the way, this *way* over JITs things -- need to turn pixel_size into an argument.
        @numba.jit(nopython = True)
        def func(input_arr, output_arr, pixel_size):
            for i in range(input_arr.shape[0]):
                for j in range(input_arr.shape[1]):
                    for k in range(input_arr.shape[2]):
                        output_arr[i, j, 2 - k] = input_arr[(i // pixel_size) * pixel_size,
                                                            (j // pixel_size) * pixel_size,
                                                            k]

        return func

    def call_func(self, input_arr, output_arr):
        return self.func(input_arr, output_arr, self.pixel_size)

    def handle_key(self, symbol, modifiers):
        if symbol == pyglet.window.key.A:
            self.pixel_size += 1
        elif symbol == pyglet.window.key.S:
            self.pixel_size = max(self.pixel_size - 1, 1)
        else:
            return False
        print(f"Pixel size is now {self.pixel_size}")
        return True
Beispiel #8
0
class HexViewer(ipywidgets.DOMWidget):
    _model_name = traitlets.Unicode("HexViewerModel").tag(sync=True)
    _model_module = traitlets.Unicode("jupyterlab_kaitai").tag(sync=True)
    _model_module_version = traitlets.Unicode(EXTENSION_VERSION).tag(sync=True)
    _view_name = traitlets.Unicode("HexViewerView").tag(sync=True)
    _view_module = traitlets.Unicode("jupyterlab_kaitai").tag(sync=True)
    _view_module_version = traitlets.Unicode(EXTENSION_VERSION).tag(sync=True)

    buffer = traitlets.Bytes(allow_none=False).tag(sync=True,
                                                   **bytes_serialization)
    selectionStart = traitlets.CInt().tag(sync=True)
    selectionEnd = traitlets.CInt().tag(sync=True)

    def __init__(self, buffer=None, *args, **kwargs):
        kwargs["buffer"] = buffer if buffer is not None else b""
        super().__init__(*args, **kwargs)
Beispiel #9
0
class CurveCollection(CurveData):
    name = "curve_collection"
    data = Array()
    n_vertices = traitlets.CInt()

    def add_curve(self, curve):

        # curve is a collection of ndarray of points
        assert curve.shape[0] > 1  # a curve needs at least 2 points
        assert curve.shape[1] == 3  # a curve needs at least 3 dimensions

        # double up the indices to use GL_LINES
        index_range = np.arange(0, curve.shape[0])
        line_indices = np.column_stack([index_range,
                                        index_range]).ravel()[1:-1]
        data = curve[line_indices]
        data = np.column_stack([data, np.ones((data.shape[0], ))])

        if self.data.shape:
            self.data = np.concatenate([self.data, data])
        else:
            self.data = data

    def add_data(self):

        self.n_vertices = self.data.shape[0]

        self.vertex_array.attributes.append(
            VertexAttribute(name="model_vertex", data=self.data.astype("f4")))

        self.vertex_array.indices = np.arange(0,
                                              self.n_vertices).astype("uint32")
        self.size = self.n_vertices
Beispiel #10
0
class ParticlePositions(SceneData):
    name = "particle_positions"
    data_source = traitlets.Instance(YTDataContainer)
    particle_type = traitlets.Unicode("all")
    radius_field = traitlets.Unicode(None, allow_none=True)
    color_field = traitlets.Unicode(None, allow_none=True)
    position_field = traitlets.Unicode("particle_position")
    size = traitlets.CInt(-1)

    @traitlets.default("vertex_array")
    def _default_vertex_array(self):
        model_vertex = np.array([[-1, 1], [-1, -1], [1, 1], [1, -1]],
                                order="F",
                                dtype="f4")
        va = VertexArray(name="particle_positions")
        va.attributes.append(
            VertexAttribute(name="model_vertex", data=model_vertex, divisor=0))
        for attr in ("position_field", "radius_field", "color_field"):
            if getattr(self, attr) is None:
                continue
            field = self.data_source[self.particle_type, getattr(self, attr)]
            if field.units.dimensions is length:
                field.convert_to_units("unitary")
            field = field.astype("f4").d
            if field.ndim == 1:
                field.shape = (field.size, 1)
            else:
                self.size = field.shape[0]  # for positions
            print(f"Setting {attr} to a field of shape {field.shape}")
            va.attributes.append(
                VertexAttribute(name=attr, data=field, divisor=1))
        print(f"Size is now: {self.size}")
        return va
Beispiel #11
0
class Image(bqplot.marks.Mark):
    src = bqplot.marks.Unicode().tag(sync=True)
    x = bqplot.marks.Float().tag(sync=True)
    y = bqplot.marks.Float().tag(sync=True)
    view_count = traitlets.CInt(0).tag(sync=True)
    width = bqplot.marks.Float().tag(sync=True)
    height = bqplot.marks.Float().tag(sync=True)
    preserve_aspect_ratio = bqplot.marks.Unicode('').tag(sync=True)
    _model_module = bqplot.marks.Unicode('vaex.ext.bqplot').tag(sync=True)
    _view_module = bqplot.marks.Unicode('vaex.ext.bqplot').tag(sync=True)

    _view_name = bqplot.marks.Unicode('Image').tag(sync=True)
    _model_name = bqplot.marks.Unicode('ImageModel').tag(sync=True)
    scales_metadata = bqplot.marks.Dict({
        'x': {
            'orientation': 'horizontal',
            'dimension': 'x'
        },
        'y': {
            'orientation': 'vertical',
            'dimension': 'y'
        },
    }).tag(sync=True)

    def __init__(self, **kwargs):
        self._drag_end_handlers = bqplot.marks.CallbackDispatcher()
        super(Image, self).__init__(**kwargs)
Beispiel #12
0
class CurveData(SceneData):
    name = "curve_data"
    data = Array()
    n_vertices = traitlets.CInt()

    @traitlets.default("vertex_array")
    def _default_vertex_array(self):
        va = VertexArray(name="vertices")
        return va

    def add_data(self, curve):

        # curve is a collection of ndarray of points
        assert curve.shape[0] > 1  # a curve needs at least 2 points
        assert curve.shape[1] == 3  # a curve needs at least 3 dimensions

        # add the singleton 4th dim
        data = np.ones((curve.shape[0], 4))
        data[:, 0:3] = curve

        self.n_vertices = curve.shape[0]
        self.data = data

        self.vertex_array.attributes.append(
            VertexAttribute(name="model_vertex", data=data.astype("f4")))

        self.vertex_array.indices = np.arange(0,
                                              self.n_vertices).astype("uint32")
        self.size = self.n_vertices
Beispiel #13
0
class AnimationWidget(widgets.DOMWidget):
    """
    A widget that periodic increment a value

    :param value: A float between 0 and 1
    :param run: boolean with the state of the timer. True, the timer is enable

    Produces the following signal. A sampling rate, the value is interpolated
    with the equation val = 1/Period * t
    1- ^  ____
       | /
       |/
    0- |----->
          |
        period 
    """
    _view_name = traitlets.Unicode('AnimationView').tag(sync=True)
    _model_name = traitlets.Unicode('AnimationModel').tag(sync=True)
    _view_module = traitlets.Unicode('animation-widget').tag(sync=True)
    _model_module = traitlets.Unicode('animation-widget').tag(sync=True)
    # Signal value
    value = traitlets.CFloat(0.0).tag(sync=True)
    # Boolean timer is active
    run = traitlets.CBool(False).tag(sync=True)
    # Signal period (in ms)
    period = traitlets.CFloat(5000).tag(sync=True)
    # Number of samples in period
    nbsamples = traitlets.CInt(100).tag(sync=True)
    # Loop
    loop = traitlets.CBool(False).tag(sync=True)
Beispiel #14
0
class Size(Shelf):
    # TODO: min for value
    # TODO: supported role
    # TODO: supported mark types
    shelf_name = 'size'
    scale = T.Instance(Scale, default_value=None, allow_none=True)
    legend = T.Bool(True)
    value = T.CInt(30)
class PyGBMModel(state.HasState):

    features = traitlets.List(traitlets.Unicode())
    num_round = traitlets.CInt()
    param = traitlets.Dict()
    prediction_name = traitlets.Unicode(default_value='pygbm_prediction')
    learning_rate = traitlets.Float(0.1)
    max_iter = traitlets.Int(10)
    max_bins = traitlets.Int(255)
    max_leaf_nodes = traitlets.Int(31)
    random_state = traitlets.Int(0)
    verbose = traitlets.Int(1)
    prediction_name = traitlets.Unicode(default_value='pygbm_prediction')

    def fit(self, dataframe, label):
        self.pygbm_model = pygbm.GradientBoostingMachine(
            learning_rate=self.learning_rate,
            max_iter=self.max_iter,
            max_bins=self.max_bins,
            max_leaf_nodes=self.max_leaf_nodes,
            random_state=self.random_state,
            scoring=None,
            verbose=self.verbose,
            validation_split=None)
        if not hasattr(label, 'values'):
            label = dataframe[label]
        y = label.values.astype(np.float32)
        with using_vaex(self.features):
            dsw = DataFrameWrapper(dataframe)
            self.pygbm_model.fit(dsw, y)

    def predict(self, dataframe):
        data = np.vstack([dataframe[k].values for k in self.features]).T
        return self.pygbm_model.predict(data)

    def __call__(self, *args):
        data = np.vstack([arg.astype(np.float32) for arg in args]).T.copy()
        return self.pygbm_model.predict(data)

    def transform(self, dataframe):
        copy = dataframe.copy()
        lazy_function = copy.add_function('pygbm_prediction_function', self)
        expression = lazy_function(*self.features)
        copy.add_virtual_column(self.prediction_name, expression, unique=False)
        return copy

    def state_get(self):
        return dict(tree_state=base64.encodebytes(
            pickle.dumps(self.pygbm_model)).decode('ascii'),
                    substate=super(PyGBMModel, self).state_get())

    def state_set(self, state, trusted=True):
        super(PyGBMModel, self).state_set(state['substate'])
        if trusted is False:
            raise ValueError(
                "Will not unpickle data when source is not trusted")
        self.pygbm_model = pickle.loads(
            base64.decodebytes(state['tree_state'].encode('ascii')))
Beispiel #16
0
class Figure(widgets.DOMWidget):
    """Widget class representing a volume (rendering) using three.js"""
    _view_name = Unicode('FigureView').tag(sync=True)
    _view_module = Unicode('ipyvolume').tag(sync=True)
    _model_name = Unicode('FigureModel').tag(sync=True)
    _model_module = Unicode('ipyvolume').tag(sync=True)

    volume_data = Array(default_value=None, allow_none=True).tag(sync=True, **create_array_cube_png_serialization('volume_data'))
    data_min = traitlets.CFloat().tag(sync=True)
    data_max = traitlets.CFloat().tag(sync=True)
    tf = traitlets.Instance(TransferFunction, allow_none=True).tag(sync=True, **ipywidgets.widget_serialization)
    angle1 = traitlets.Float(0.1).tag(sync=True)
    angle2 = traitlets.Float(0.2).tag(sync=True)

    scatters = traitlets.List(traitlets.Instance(Scatter), [], allow_none=False).tag(sync=True, **ipywidgets.widget_serialization)

    animation = traitlets.Float(1000.0).tag(sync=True)
    animation_exponent = traitlets.Float(.5).tag(sync=True)

    ambient_coefficient = traitlets.Float(0.5).tag(sync=True)
    diffuse_coefficient = traitlets.Float(0.8).tag(sync=True)
    specular_coefficient = traitlets.Float(0.5).tag(sync=True)
    specular_exponent = traitlets.Float(5).tag(sync=True)
    stereo = traitlets.Bool(False).tag(sync=True)
    screen_capture_enabled = traitlets.Bool(False).tag(sync=True)
    screen_capture_mime_type = traitlets.Unicode(default_value='image/png').tag(sync=True)
    screen_capture_data = traitlets.Unicode(default_value=None, allow_none=True).tag(sync=True)
    fullscreen = traitlets.Bool(False).tag(sync=True)

    camera_control = traitlets.Unicode(default_value='trackball').tag(sync=True)

    width = traitlets.CInt(500).tag(sync=True)
    height = traitlets.CInt(400).tag(sync=True)
    downscale = traitlets.CInt(1).tag(sync=True)
    show = traitlets.Unicode("Volume").tag(sync=True) # for debugging

    xlim = traitlets.List(traitlets.CFloat, default_value=[0, 1], minlen=2, maxlen=2).tag(sync=True)
    ylim = traitlets.List(traitlets.CFloat, default_value=[0, 1], minlen=2, maxlen=2).tag(sync=True)
    zlim = traitlets.List(traitlets.CFloat, default_value=[0, 1], minlen=2, maxlen=2).tag(sync=True)

    xlabel = traitlets.Unicode("x").tag(sync=True)
    ylabel = traitlets.Unicode("y").tag(sync=True)
    zlabel = traitlets.Unicode("z").tag(sync=True)

    style = traitlets.Dict(default_value=ipyvolume.style.default).tag(sync=True)
Beispiel #17
0
class FacetCoordinate(Shelf):
    # TODO: supported types for aggregate
    # TODO: min and max for padding
    # TODO: min for height

    aggregate = T.Enum(['count'], default_value=None, allow_none=True)
    padding = T.CFloat(0.1)
    axis = T.Instance(Axis, default_value=None, allow_none=True)
    height = T.CInt(150)
Beispiel #18
0
class VizBaseState(vaex.ml.state.HasState):
    shape = traitlets.CInt(64)

    def __init__(self, ds, **kwargs):
        super(VizBaseState, self).__init__(**kwargs)
        self.ds = ds
        self.signal_slice = vaex.events.Signal()
        self.signal_regrid = vaex.events.Signal()

    def state_get(self):
        state = {}
        for name in self.trait_names():
            serializer = self.trait_metadata(name, 'serialize', ident)
            value = serializer(getattr(self, name))
            state[name] = value
        return state

    def state_set(self, state):
        for name in self.trait_names():
            if name in state:
                deserializer = self.trait_metadata(name, 'deserialize', ident)
                value = deserializer(state[name])
                setattr(self, name, value)

    def _calculate_limits(self, attr='x', expression='x_expression'):
        expression = getattr(self, expression)
        categorical = self.ds.is_category(expression)
        if categorical:
            N = self.ds.category_count(expression)
            min, max = -0.5, N - 0.5
            centers = np.arange(N)
            setattr(self, attr + '_shape', N)
        else:
            min, max = self.ds.minmax(expression)
            centers = self.ds.bin_centers(expression, [min, max],
                                          shape=getattr(self, attr + '_shape')
                                          or self.shape)
        setattr(self, attr + '_min', min)
        setattr(self, attr + '_max', max)
        setattr(self, attr + '_centers', centers)

    def _calculate_centers(self, attr='x', expression='x_expression'):
        expression = getattr(self, expression)
        categorical = self.ds.is_category(expression)
        min, max = getattr(self, attr + '_min'), getattr(self, attr + '_max')
        if min is None or max is None:
            return  # special condition that can occur during testing, since debounced does not work
        if categorical:
            N = self.ds.category_count(expression)
            centers = np.arange(N)
            setattr(self, attr + '_shape', N)
        else:
            # print(expression, [min, max], getattr(self, attr + '_shape') or self.shape)
            centers = self.ds.bin_centers(expression, [min, max],
                                          shape=getattr(self, attr + '_shape')
                                          or self.shape)
        setattr(self, attr + '_centers', centers)
Beispiel #19
0
class VizBase2dState(VizBaseState):
    x_expression = traitlets.Unicode()
    y_expression = traitlets.Unicode()
    x_slice = traitlets.CInt(None, allow_none=True)
    y_slice = traitlets.CInt(None, allow_none=True)
    type = traitlets.CaselessStrEnum(['count', 'min', 'max', 'mean'],
                                     default_value='count')
    aux = traitlets.Unicode(None, allow_none=True)
    groupby = traitlets.Unicode(None, allow_none=True)
    x_shape = traitlets.CInt(None, allow_none=True)
    y_shape = traitlets.CInt(None, allow_none=True)

    x_min = traitlets.CFloat()
    x_max = traitlets.CFloat()
    y_min = traitlets.CFloat()
    y_max = traitlets.CFloat()

    def __init__(self, ds, **kwargs):
        super(VizBase2dState, self).__init__(ds, **kwargs)
        self.observe(lambda x: self.calculate_limits(),
                     ['x_expression', 'y_expression', 'type', 'aux'])
        self.observe(lambda x: self.signal_slice.emit(self),
                     ['x_slice', 'y_slice'])
        # no need for recompute
        #self.observe(lambda x: self.calculate_grid(), ['groupby', 'shape', 'groupby_normalize'])
        self.observe(self.limits_changed, ['x_min', 'x_max', 'y_min', 'y_max'])
        self.calculate_limits()

    def bin_parameters(self):
        yield self.x_expression, self.x_shape or self.shape, (
            self.x_min, self.x_max), self.x_slice
        yield self.y_expression, self.y_shape or self.shape, (
            self.y_min, self.y_max), self.y_slice

    def calculate_limits(self):
        self._calculate_limits('x', 'x_expression')
        self._calculate_limits('y', 'y_expression')
        self.signal_regrid.emit(self)

    def limits_changed(self, change):
        self._calculate_centers()
        self.signal_regrid.emit(self)
Beispiel #20
0
class CameraPipe(traitlets.HasTraits):
    input_device = traitlets.Unicode("/dev/video0")
    output_device = traitlets.Unicode("/dev/video1")
    width = traitlets.CInt(640)
    height = traitlets.CInt(480)
    fps = traitlets.CInt(30)
    effects = traitlets.List()
    cam_in = traitlets.Instance(cv2.VideoCapture)
    cam_out = traitlets.Instance(pyfakewebcam.FakeWebcam, allow_none = True)

    def __init__(self, *args, **kwargs):
        super(CameraPipe, self).__init__(*args, **kwargs)
        # We don't manage these arrays with traitlets.
        # Pyglet does a lot of checking and byte-joining with arrays based on
        # pitch, so we hold on to things here and then read in an upside-down
        # way.
        self.display_array = np.zeros((self.height, self.width, 3), dtype="u1", order="C")
        self.output_arr1 = np.flipud(self.display_array)
        self.output_arr2 = np.zeros((self.height, self.width, 3), dtype="u1", order="C")

    @traitlets.default("cam_in")
    def _default_cam_in(self):
        in_cam = cv2.VideoCapture(self.input_device)
        in_cam.set(cv2.CAP_PROP_FRAME_WIDTH, self.width)
        in_cam.set(cv2.CAP_PROP_FRAME_HEIGHT, self.height)
        in_cam.set(cv2.CAP_PROP_FPS, self.fps)
        return in_cam

    @traitlets.default("cam_out")
    def _default_cam_out(self):
        return pyfakewebcam.FakeWebcam(self.output_device, self.width, self.height)

    def next_frame(self, dt = 0.0):
        _, self.output_arr1[:] = self.cam_in.read()
        #pixelize_frame(frame, output_arr)
        for effect in self.effects:
            effect(self.output_arr1, self.output_arr2)
            self.output_arr1[:] = self.output_arr2[:]
        if self.cam_out is not None:
            self.cam_out.schedule_frame(self.output_arr1)
Beispiel #21
0
class MeshData(SceneData):
    name = "mesh"
    data_source = traitlets.Instance(YTDataContainer)
    texture_objects = traitlets.Dict(trait=traitlets.Instance(Texture3D))
    texture_objects = traitlets.Dict(trait=traitlets.Instance(Texture3D))
    blocks = traitlets.Dict(default_value=())
    scale = traitlets.Bool(False)
    size = traitlets.CInt(-1)

    def get_mesh_data(self, data_source, field):
        """

        This reads the mesh data into a form that can be fed in to OpenGL.

        """

        # get mesh information
        try:
            ftype, fname = field
            mesh_id = int(ftype[-1])
        except ValueError:
            mesh_id = 0

        mesh = data_source.ds.index.meshes[mesh_id - 1]
        offset = mesh._index_offset
        vertices = mesh.connectivity_coords
        indices = mesh.connectivity_indices - offset

        data = data_source[field]

        return triangulate_mesh(vertices, data, indices)

    def add_data(self, field):
        v, d, i = self.get_mesh_data(self.data_source, field)
        v.shape = (v.size // 3, 3)
        v = np.concatenate([v, np.ones((v.shape[0], 1))], axis=-1).astype("f4")
        d.shape = (d.size, 1)
        i.shape = (i.size, 1)
        i = i.astype("uint32")
        # d[:] = np.mgrid[0.0:1.0:1j*d.size].astype("f4")[:,None]
        self.vertex_array.attributes.append(
            VertexAttribute(name="model_vertex", data=v)
        )
        self.vertex_array.attributes.append(
            VertexAttribute(name="vertex_data", data=d.astype("f4"))
        )
        self.vertex_array.indices = i
        self.size = i.size

    @traitlets.default("vertex_array")
    def _default_vertex_array(self):
        return VertexArray(name="mesh_info", each=0)
Beispiel #22
0
class CycleTransformer(Transformer):
    '''A strategy for transforming cyclical features (e.g. angles, time).

    Think of each feature as an angle of a unit circle in polar coordinates,
    and then and then obtaining the x and y coordinate projections,
    or the cos and sin components respectively.

    Suitable for a variaty of machine learning tasks.
    It preserves the cyclical continuity of the feature.
    Inspired by: http://blog.davidkaleko.com/feature-engineering-cyclical-features.html
    >>> df = vaex.from_arrays(days=[0, 1, 2, 3, 4, 5, 6])
    >>> cyctrans = vaex.ml.CycleTransformer(n=7, features=['days'])
    >>> cyctrans.fit_transform(df)
      #    days     days_x     days_y
      0       0   1          0
      1       1   0.62349    0.781831
      2       2  -0.222521   0.974928
      3       3  -0.900969   0.433884
      4       4  -0.900969  -0.433884
      5       5  -0.222521  -0.974928
      6       6   0.62349   -0.781831
    '''
    n = traitlets.CInt(allow_none=False, help='The number of elements in one cycle.')
    prefix_x = traitlets.Unicode(default_value="", help='Prefix for the x-component of the transformed features.').tag(ui='Text')
    prefix_y = traitlets.Unicode(default_value="", help='Prefix for the y-component of the transformed features.').tag(ui='Text')
    suffix_x = traitlets.Unicode(default_value="_x", help='Suffix for the x-component of the transformed features.').tag(ui='Text')
    suffix_y = traitlets.Unicode(default_value="_y", help='Suffix for the y-component of the transformed features.').tag(ui='Text')

    def fit(self, df):
        '''
        Fit a CycleTransformer to the DataFrame.

        This is a dummy method, as it is not needed for the transformation to be applied.

        :param df: A vaex DataFrame.
        '''
        pass

    def transform(self, df):
        '''
        Transform a DataFrame with a CycleTransformer.

        :param df: A vaex DataFrame.
        '''
        copy = df.copy()
        for feature in self.features:
            name_x = self.prefix_x + feature + self.suffix_x
            copy[name_x] = np.cos(2 * np.pi * copy[feature] / self.n)
            name_y = self.prefix_y + feature + self.suffix_y
            copy[name_y] = np.sin(2 * np.pi * copy[feature] / self.n)

        return copy
Beispiel #23
0
class ColorOffsetEffect(CameraEffect):
    red_offset = traitlets.CInt(0)
    green_offset = traitlets.CInt(0)
    blue_offset = traitlets.CInt(0)

    @traitlets.default("func")
    def _default_func(self):
        @numba.jit(nopython=True)
        def func(input_arr, output_arr, red_offset, green_offset, blue_offset):
            for i in range(input_arr.shape[0]):
                for j in range(input_arr.shape[1]):
                    # red
                    i1 = (i + red_offset) % input_arr.shape[0]
                    output_arr[i, j, 0] = input_arr[i1, j, 0]
                    # green
                    i1 = (i + green_offset) % input_arr.shape[0]
                    output_arr[i, j, 1] = input_arr[i1, j, 1]
                    # blue
                    i1 = (i + blue_offset) % input_arr.shape[0]
                    output_arr[i, j, 2] = input_arr[i1, j, 2]
        return func

    def call_func(self, input_arr, output_arr):
        return self.func(input_arr, output_arr, self.red_offset, self.green_offset, self.blue_offset)

    def handle_key(self, symbol, modifiers):
        sign = 1
        if modifiers & pyglet.window.key.MOD_SHIFT:
            sign = -1
        if symbol == pyglet.window.key.R:
            self.red_offset = max(0, sign + self.red_offset)
        elif symbol == pyglet.window.key.G:
            self.green_offset = max(0, sign + self.green_offset)
        elif symbol == pyglet.window.key.B:
            self.blue_offset = max(0, sign + self.blue_offset)
        else:
            return False
        return True
Beispiel #24
0
class TextCharacters(SceneData):
    characters = traitlets.Dict(value_trait=traitlets.Instance(Character))
    name = "text_overlay"
    font = FontTrait("DejaVu Sans")
    font_size = traitlets.CInt(32)

    @traitlets.default("vertex_array")
    def _default_vertex_array(self):
        return VertexArray(name="char_info", each=6)

    def build_textures(self):
        # This doesn't check if the textures have already been built
        self.font.set_size(self.font_size, 200)
        chars = [ord(_) for _ in string.printable]
        tex_ids = GL.glGenTextures(len(chars))
        vert = []
        for i, (tex_id, char_code) in enumerate(zip(tex_ids, chars)):
            self.font.clear()
            self.font.set_text(chr(char_code), flags=LOAD_FORCE_AUTOHINT)
            self.font.draw_glyphs_to_bitmap(antialiased=True)
            glyph = self.font.load_char(char_code)
            x0, y0, x1, y1 = glyph.bbox
            bitmap = self.font.get_image().astype(">f4") / 255.0
            dx = 1.0 / bitmap.shape[0]
            dy = 1.0 / bitmap.shape[1]
            triangles = np.array(
                [
                    [x0, y1, 0.0 + dx / 2.0, 0.0 + dy / 2.0],
                    [x0, y0, 0.0 + dx / 2.0, 1.0 - dy / 2.0],
                    [x1, y0, 1.0 - dx / 2.0, 1.0 - dy / 2.0],
                    [x0, y1, 0.0 + dx / 2.0, 0.0 + dy / 2.0],
                    [x1, y0, 1.0 - dx / 2.0, 1.0 - dy / 2.0],
                    [x1, y1, 1.0 - dx / 2.0, 0.0 + dy / 2.0],
                ],
                dtype="<f4",
            )
            vert.append(triangles)
            texture = Texture2D(texture_name=tex_id,
                                data=bitmap,
                                boundary_x="clamp",
                                boundary_y="clamp")
            # I can't find information as to why horiAdvance is a
            # factor of 8 larger than the other factors.  I assume it
            # is referenced somewhere, but I cannot find it.
            self.characters[chr(char_code)] = Character(
                texture, i, glyph.horiAdvance / 8.0, glyph.vertAdvance)
        vert = np.concatenate(vert)
        self.vertex_array.attributes.append(
            VertexAttribute(name="quad_vertex", data=vert.astype("<f4")))
Beispiel #25
0
class LineData(SceneData):
    name = "line_data"
    n_values = traitlets.CInt()

    @traitlets.default("vertex_array")
    def _default_vertex_array(self):
        return VertexArray(name="vertices", each=6)

    def add_data(self, lines):
        assert lines.shape[1] == 4
        x_coord = np.mgrid[0.0:1.0:lines.shape[0] * 1j].astype("f4")
        x_coord = x_coord.reshape((-1, 1))
        self.n_vertices = lines.shape[0]
        self.vertex_array.attributes.append(
            VertexAttribute(name="rgba_values", data=lines))
        self.vertex_array.attributes.append(
            VertexAttribute(name="x_coord", data=x_coord))
Beispiel #26
0
class Texture(traitlets.HasTraits):
    texture_name = traitlets.CInt(-1)
    data = traittypes.Array(None, allow_none=True)
    channels = GLValue("r32f")
    min_filter = GLValue("linear")
    mag_filter = GLValue("linear")

    @traitlets.default("texture_name")
    def _default_texture_name(self):
        return GL.glGenTextures(1)

    @contextmanager
    def bind(self, target=0):
        _ = GL.glActiveTexture(TEX_TARGETS[target])
        _ = GL.glBindTexture(self.dim_enum, self.texture_name)
        yield
        _ = GL.glActiveTexture(TEX_TARGETS[target])
        GL.glBindTexture(self.dim_enum, 0)
Beispiel #27
0
class XGBoostModel(state.HasState):
    '''The XGBoost algorithm.

    XGBoost is an optimized distributed gradient boosting library designed to be
    highly efficient, flexible and portable. It implements machine learning
    algorithms under the Gradient Boosting framework. XGBoost provides a parallel
    tree boosting (also known as GBDT, GBM) that solve many data science
    problems in a fast and accurate way.
    (https://github.com/dmlc/xgboost)

    Example:

    >>> import vaex
    >>> import vaex.ml.xgboost
    >>> df = vaex.ml.datasets.load_iris()
    >>> features = ['sepal_width', 'petal_length', 'sepal_length', 'petal_width']
    >>> df_train, df_test = vaex.ml.train_test_split(df)
    >>> params = {
        'max_depth': 5,
        'learning_rate': 0.1,
        'objective': 'multi:softmax',
        'num_class': 3,
        'subsample': 0.80,
        'colsample_bytree': 0.80,
        'silent': 1}
    >>> booster = vaex.ml.xgboost.XGBoostModel(features=features, num_boost_round=100, params=params)
    >>> booster.fit(df_train, 'class_')
    >>> df_train = booster.transform(df_train)
    >>> df_train.head(3)
    #    sepal_length    sepal_width    petal_length    petal_width    class_    xgboost_prediction
    0             5.4            3               4.5            1.5         1                     1
    1             4.8            3.4             1.6            0.2         0                     0
    2             6.9            3.1             4.9            1.5         1                     1
    >>> df_test = booster.transform(df_test)
    >>> df_test.head(3)
    #    sepal_length    sepal_width    petal_length    petal_width    class_    xgboost_prediction
    0             5.9            3               4.2            1.5         1                     1
    1             6.1            3               4.6            1.4         1                     1
    2             6.6            2.9             4.6            1.3         1                     1
    '''

    features = traitlets.List(
        traitlets.Unicode(),
        help='List of features to use when fitting the XGBoostModel.')
    num_boost_round = traitlets.CInt(help='Number of boosting iterations.')
    params = traitlets.Dict(
        help='A dictionary of parameters to be passed on to the XGBoost model.'
    )
    prediction_name = traitlets.Unicode(
        default_value='xgboost_prediction',
        help='The name of the virtual column housing the predictions.')

    def __call__(self, *args):
        data2d = np.vstack([arg.astype(np.float64) for arg in args]).T.copy()
        dmatrix = xgboost.DMatrix(data2d)
        return self.booster.predict(dmatrix)

    def transform(self, df):
        '''Transform a DataFrame such that it contains the predictions of the XGBoostModel in form of a virtual column.

        :param df: A vaex DataFrame. It should have the same columns as the DataFrame used to train the model.

        :return copy: A shallow copy of the DataFrame that includes the XGBoostModel prediction as a virtual column.
        :rtype: DataFrame
        '''
        copy = df.copy()
        lazy_function = copy.add_function('xgboost_prediction_function', self)
        expression = lazy_function(*self.features)
        copy.add_virtual_column(self.prediction_name, expression, unique=False)
        return copy

    def fit(self,
            df,
            target,
            evals=(),
            early_stopping_rounds=None,
            evals_result=None,
            verbose_eval=False,
            **kwargs):
        '''Fit the XGBoost model given a DataFrame.
        This method accepts all key word arguments for the xgboost.train method.

        :param df: A vaex DataFrame containing the training features.
        :param target: The column name of the target variable.
        :param evals: A list of pairs (DataFrame, string).
        List of items to be evaluated during training, this allows user to watch performance on the validation set.
        :param int early_stopping_rounds: Activates early stopping.
        Validation error needs to decrease at least every *early_stopping_rounds* round(s) to continue training.
        Requires at least one item in *evals*. If there's more than one, will use the last. Returns the model
        from the last iteration (not the best one).
        :param dict evals_result: A dictionary storing the evaluation results of all the items in *evals*.
        :param bool verbose_eval: Requires at least one item in *evals*.
        If *verbose_eval* is True then the evaluation metric on the validation set is printed at each boosting stage.
        '''
        data = df[self.features].values
        target_data = df.evaluate(target)
        dtrain = xgboost.DMatrix(data, target_data)
        if evals is not None:
            evals = [list(elem) for elem in evals]
            for item in evals:
                data = item[0][self.features].values
                target_data = item[0].evaluate(target)
                item[0] = xgboost.DMatrix(data, target_data)
        else:
            evals = ()

        # This does the actual training / fitting of the xgboost model
        self.booster = xgboost.train(
            params=self.params,
            dtrain=dtrain,
            num_boost_round=self.num_boost_round,
            evals=evals,
            early_stopping_rounds=early_stopping_rounds,
            evals_result=evals_result,
            verbose_eval=verbose_eval,
            **kwargs)

    def predict(self, df, **kwargs):
        '''Provided a vaex DataFrame, get an in-memory numpy array with the predictions from the XGBoost model.
        This method accepts the key word arguments of the predict method from XGBoost.

        :returns: A in-memory numpy array containing the XGBoostModel predictions.
        :rtype: numpy.array
        '''
        data = df[self.features].values
        dmatrix = xgboost.DMatrix(data)
        return self.booster.predict(dmatrix, **kwargs)

    def state_get(self):
        filename = tempfile.mktemp()
        self.booster.save_model(filename)
        with open(filename, 'rb') as f:
            data = f.read()
        return dict(tree_state=base64.encodebytes(data).decode('ascii'),
                    substate=super(XGBoostModel, self).state_get())

    def state_set(self, state):
        super(XGBoostModel, self).state_set(state['substate'])
        data = base64.decodebytes(state['tree_state'].encode('ascii'))
        filename = tempfile.mktemp()
        with open(filename, 'wb') as f:
            f.write(data)
        self.booster = xgboost.Booster(model_file=filename)
Beispiel #28
0
class CatBoostModel(state.HasState):
    '''The CatBoost algorithm.

    This class provides an interface to the CatBoost aloritham.
    CatBoost is a fast, scalable, high performance Gradient Boosting on
    Decision Trees library, used for ranking, classification, regression and
    other machine learning tasks. For more information please visit
    https://github.com/catboost/catboost

    Example:

    >>> import vaex
    >>> import vaex.ml.catboost
    >>> df = vaex.ml.datasets.load_iris()
    >>> features = ['sepal_width', 'petal_length', 'sepal_length', 'petal_width']
    >>> df_train, df_test = vaex.ml.train_test_split(df)
    >>> params = {
        'leaf_estimation_method': 'Gradient',
        'learning_rate': 0.1,
        'max_depth': 3,
        'bootstrap_type': 'Bernoulli',
        'objective': 'MultiClass',
        'eval_metric': 'MultiClass',
        'subsample': 0.8,
        'random_state': 42,
        'verbose': 0}
    >>> booster = vaex.ml.catboost.CatBoostModel(features=features, num_boost_round=100, params=params)
    >>> booster.fit(df_train, 'class_')
    >>> df_train = booster.transform(df_train)
    >>> df_train.head(3)
    #    sepal_length    sepal_width    petal_length    petal_width    class_  catboost_prediction
    0             5.4            3               4.5            1.5         1  [0.00615039 0.98024259 0.01360702]
    1             4.8            3.4             1.6            0.2         0  [0.99034267 0.00526382 0.0043935 ]
    2             6.9            3.1             4.9            1.5         1  [0.00688241 0.95190908 0.04120851]
    >>> df_test = booster.transform(df_test)
    >>> df_test.head(3)
    #    sepal_length    sepal_width    petal_length    petal_width    class_  catboost_prediction
    0             5.9            3               4.2            1.5         1  [0.00464228 0.98883351 0.00652421]
    1             6.1            3               4.6            1.4         1  [0.00350424 0.9882139  0.00828186]
    2             6.6            2.9             4.6            1.3         1  [0.00325705 0.98891631 0.00782664]
    '''

    features = traitlets.List(
        traitlets.Unicode(),
        help='List of features to use when fitting the CatBoostModel.')
    num_boost_round = traitlets.CInt(default_value=None,
                                     allow_none=True,
                                     help='Number of boosting iterations.')
    params = traitlets.Dict(
        help=
        'A dictionary of parameters to be passed on to the CatBoostModel model.'
    )
    pool_params = traitlets.Dict(
        default_value={},
        help=
        'A dictionary of parameters to be passed to the Pool data object construction'
    )
    prediction_name = traitlets.Unicode(
        default_value='catboost_prediction',
        help='The name of the virtual column housing the predictions.')
    prediction_type = traitlets.Enum(
        values=['Probability', 'Class', 'RawFormulaVal'],
        default_value='Probability',
        help=
        'The form of the predictions. Can be "RawFormulaVal", "Probability" or "Class".'
    )

    def __call__(self, *args):
        data2d = np.vstack([arg.astype(np.float64) for arg in args]).T.copy()
        dmatrix = catboost.Pool(data2d, **self.pool_params)
        return self.booster.predict(dmatrix,
                                    prediction_type=self.prediction_type)

    def transform(self, df):
        '''Transform a DataFrame such that it contains the predictions of the CatBoostModel in form of a virtual column.

        :param df: A vaex DataFrame. It should have the same columns as the DataFrame used to train the model.

        :return copy: A shallow copy of the DataFrame that includes the CatBoostModel prediction as a virtual column.
        :rtype: DataFrame
        '''
        copy = df.copy()
        lazy_function = copy.add_function('catboost_prediction_function', self)
        expression = lazy_function(*self.features)
        copy.add_virtual_column(self.prediction_name, expression, unique=False)
        return copy

    def fit(self,
            df,
            target,
            evals=None,
            early_stopping_rounds=None,
            verbose_eval=None,
            plot=False,
            **kwargs):
        '''Fit the CatBoostModel model given a DataFrame.
        This method accepts all key word arguments for the catboost.train method.

        :param df: A vaex DataFrame containing the training features.
        :param target: The column name of the target variable.
        :param evals: A list of DataFrames to be evaluated during training.
        This allows user to watch performance on the validation sets.
        :param int early_stopping_rounds: Activates early stopping.
        :param bool verbose_eval: Requires at least one item in *evals*.
        If *verbose_eval* is True then the evaluation metric on the validation set is printed at each boosting stage.
        :param bool plot: if True, display an interactive widget in the Jupyter
        notebook of how the train and validation sets score on each boosting iteration.
        '''
        data = df[self.features].values
        target_data = df.evaluate(target)
        dtrain = catboost.Pool(data=data,
                               label=target_data,
                               **self.pool_params)
        if evals is not None:
            for i, item in enumerate(evals):
                data = item[self.features].values
                target_data = item.evaluate(target)
                evals[i] = catboost.Pool(data=data,
                                         label=target_data,
                                         **self.pool_params)

        # This does the actual training/fitting of the catboost model
        self.booster = catboost.train(
            params=self.params,
            dtrain=dtrain,
            num_boost_round=self.num_boost_round,
            evals=evals,
            early_stopping_rounds=early_stopping_rounds,
            verbose_eval=verbose_eval,
            plot=plot,
            **kwargs)

    def predict(self, df, **kwargs):
        '''Provided a vaex DataFrame, get an in-memory numpy array with the predictions from the CatBoostModel model.
        This method accepts the key word arguments of the predict method from catboost.

        :param df: a vaex DataFrame

        :returns: A in-memory numpy array containing the CatBoostModel predictions.
        :rtype: numpy.array
        '''
        data = df[self.features].values
        dmatrix = catboost.Pool(data, **self.pool_params)
        return self.booster.predict(dmatrix,
                                    prediction_type=self.prediction_type,
                                    **kwargs)

    def state_get(self):
        filename = tempfile.mktemp()
        self.booster.save_model(filename)
        with open(filename, 'rb') as f:
            data = f.read()
        return dict(tree_state=base64.encodebytes(data).decode('ascii'),
                    substate=super(CatBoostModel, self).state_get())

    def state_set(self, state, trusted=True):
        super(CatBoostModel, self).state_set(state['substate'])
        data = base64.decodebytes(state['tree_state'].encode('ascii'))
        filename = tempfile.mktemp()
        with open(filename, 'wb') as f:
            f.write(data)
        self.booster = catboost.CatBoost().load_model(fname=filename)
Beispiel #29
0
class Plot2dSliced(PlotBase):
    z = traitlets.Unicode(allow_none=False).tag(sync=True)
    z_slice = traitlets.CInt(default_value=0).tag(
        sync=True)  #.tag(sync=True) # TODO: do linking at python side
    z_shape = traitlets.CInt(default_value=10).tag(sync=True)
    z_relative = traitlets.CBool(False).tag(sync=True)
    z_min = traitlets.CFloat(default_value=None,
                             allow_none=True).tag(sync=True)  #.tag(sync=True)
    z_max = traitlets.CFloat(default_value=None,
                             allow_none=True).tag(sync=True)  #.tag(sync=True)

    def __init__(self, **kwargs):
        self.z_min_extreme, self.z_max_extreme = kwargs["dataset"].minmax(
            kwargs["z"])
        super(Plot2dSliced, self).__init__(**kwargs)
        self.create_tools()

    def get_limits(self, limits):
        limits = self.dataset.limits(self.get_binby(), limits)
        limits = list([list(k) for k in limits])
        if self.z_min is None:
            self.z_min = limits[2][0]
        if self.z_max is None:
            self.z_max = limits[2][1]
        limits[2][0] = self.z_min
        limits[2][1] = self.z_max
        return limits

    def select_rectangle(self, x1, y1, x2, y2, mode="replace"):
        dz = self.z_max - self.z_min
        z1 = self.z_min + dz * self.z_slice / self.z_shape
        z2 = self.z_min + dz * (self.z_slice + 1) / self.z_shape
        spaces = [self.x, self.y, self.z]
        limits = [[x1, x2], [y1, y2], [z1, z2]]
        self.dataset.select_box(spaces, limits=limits, mode=mode)

    def select_lasso(self, x, y, mode="replace"):
        raise NotImplementedError("todo")

    def get_grid(self):
        zslice = self.grid[..., self.z_slice]
        if self.z_relative:
            with np.errstate(divide='ignore', invalid='ignore'):
                zslice = zslice / self.grid.sum(axis=-1)
        return zslice
        #return self.grid[...,self.z_slice]

    def get_vgrids(self):
        def zsliced(grid):
            return grid[..., self.z_slice] if grid is not None else None

        return [
            zsliced(grid) for grid in super(Plot2dSliced, self).get_vgrids()
        ]

    def create_tools(self):
        #super(Plot2dSliced, self).create_tools()
        self.z_slice_slider = widgets.IntSlider(value=self.z_slice,
                                                min=0,
                                                max=self.z_shape - 1)
        #self.add_control_widget(self.z_slice_slider)
        self.z_slice_slider.observe(self._z_slice_changed, "value")
        self.observe(self._z_slice_changed, "z_slice")

        dz = self.z_max_extreme - self.z_min_extreme

        self.z_range_slider = widgets.FloatRangeSlider(
            min=min(self.z_min, self.z_min_extreme),
            value=[self.z_min, self.z_max],
            max=max(self.z_max, self.z_max_extreme),
            step=dz / 1000)
        self.z_range_slider.observe(self._z_range_changed_, names=["value"])
        #self.observe(self.z_range_slider, "z_min")

        self.z_control = widgets.VBox(
            [self.z_slice_slider, self.z_range_slider])
        self.add_control_widget(self.z_control)

    def _z_range_changed_(self, changes, **kwargs):
        #print("changes1", changes, repr(changes), kwargs)
        self.limits[2][0], self.limits[2][1] =\
            self.z_min, self.z_max = self.z_range_slider.value = changes["new"]
        self.update_grid()

    def _z_slice_changed(self, changes):
        self.z_slice = self.z_slice_slider.value = changes["new"]
        self._update_image()

    def get_shape(self):
        return vaex.dataset._expand_shape(self.shape, 2) + (self.z_shape, )

    def get_vshape(self):
        return vaex.dataset._expand_shape(self.vshape, 2) + (self.z_shape, )

    def get_binby(self):
        return [self.x, self.y, self.z]
Beispiel #30
0
class RasterioBase(LoadFileMixin, BaseFileSource):
    """Create a DataSource using rasterio.

    Attributes
    ----------
    source : str, :class:`io.BytesIO`
        Path to the data source
    dataset : :class:`rasterio._io.RasterReader`
        A reference to the datasource opened by rasterio
    coordinates : :class:`podpac.Coordinates`
        {coordinates}
    band : int
        The 'band' or index for the variable being accessed in files such as GeoTIFFs. Use None for all bounds.
     crs : str, optional
        The coordinate reference system. Normally this will come directly from the file, but this allows users to
        specify the crs in case this information is missing from the file.
    read_as_filename : bool, optional
        Default is False. If True, the file will be read using rasterio.open(self.source) instead of being automatically
        parsed to handle ftp, s3, in-memory files, etc.
    """

    # dataset = tl.Instance(rasterio.DatasetReader).tag(readonly=True)
    band = tl.CInt(allow_none=True).tag(attr=True)
    crs = tl.Unicode(allow_none=True, default_value=None).tag(attr=True)
    driver = tl.Unicode(allow_none=True, default_value=None)
    read_from_source = tl.Bool(False).tag(attr=True)

    @cached_property
    def dataset(self):
        if re.match(".*:.*:.*", self.source):
            # i.e. user supplied a non-file-looking string like 'HDF4_EOS:EOS_GRID:"MOD13Q1.A2013033.h08v05.006.2015256072248.hdf":MODIS_Grid_16DAY_250m_500m_VI:"250m 16 days NDVI"'
            # This also includes many subdatsets as part of GDAL data drivers; https://gdal.org/drivers/raster/index.html
            self.set_trait("read_from_source", True)
            return rasterio.open(self.source)
        else:
            return super(RasterioBase, self).dataset

    @tl.default("band")
    def _band_default(self):
        if self.outputs is not None and self.output is not None:
            return self.outputs.index(self.output)
        elif self.outputs is None:
            return 1
        else:
            return None  # All bands

    # -------------------------------------------------------------------------
    # public api methods
    # -------------------------------------------------------------------------

    @cached_property
    def nan_vals(self):
        return list(self.dataset.nodatavals)

    def open_dataset(self, fp, **kwargs):
        if self.read_from_source:
            return rasterio.open(self.source)

        with rasterio.MemoryFile() as mf:
            mf.write(fp.read())
            return mf.open(driver=self.driver)

    def close_dataset(self):
        """Closes the file for the datasource"""
        self.dataset.close()

    @common_doc(COMMON_DATA_DOC)
    def get_coordinates(self):
        """{get_coordinates}

        The default implementation tries to find the lat/lon coordinates based on dataset.affine.
        It cannot determine the alt or time dimensions, so child classes may
        have to overload this method.
        """

        # check to see if the coordinates are rotated used affine
        affine = self.dataset.transform

        if self.crs is not None:
            crs = self.crs
        elif isinstance(self.dataset.crs,
                        rasterio.crs.CRS) and "init" in self.dataset.crs:
            crs = self.dataset.crs["init"].upper()
        elif isinstance(self.dataset.crs, dict) and "init" in self.dataset.crs:
            crs = self.dataset.crs["init"].upper()
        else:
            try:
                crs = pyproj.CRS(self.dataset.crs).to_wkt()
            except pyproj.exceptions.CRSError:
                raise RuntimeError("Unexpected rasterio crs '%s'" %
                                   self.dataset.crs)

        return Coordinates.from_geotransform(affine.to_gdal(),
                                             self.dataset.shape, crs)

    @common_doc(COMMON_DATA_DOC)
    def get_data(self, coordinates, coordinates_index):
        """{get_data}"""
        data = self.create_output_array(coordinates)
        slc = coordinates_index

        # read data within coordinates_index window
        window = ((slc[0].start, slc[0].stop), (slc[1].start, slc[1].stop))

        if self.outputs is not None:  # read all the bands
            raster_data = self.dataset.read(out_shape=(len(self.outputs), ) +
                                            tuple(coordinates.shape),
                                            window=window)
            raster_data = np.moveaxis(raster_data, 0, 2)
        else:  # read the requested band
            raster_data = self.dataset.read(self.band,
                                            out_shape=tuple(coordinates.shape),
                                            window=window)

        # set raster data to output array
        data.data.ravel()[:] = raster_data.ravel()
        return data

    # -------------------------------------------------------------------------
    # additional methods and properties
    # -------------------------------------------------------------------------

    @property
    def tags(self):
        return self.dataset.tags()

    @property
    def subdatasets(self):
        return self.dataset.subdatasets

    @property
    def band_count(self):
        """The number of bands"""

        return self.dataset.count

    @cached_property
    def band_descriptions(self):
        """A description of each band contained in dataset.tags

        Returns
        -------
        OrderedDict
            Dictionary of band_number: band_description pairs. The band_description values are a dictionary, each
            containing a number of keys -- depending on the metadata
        """

        return OrderedDict(
            (i, self.dataset.tags(i + 1)) for i in range(self.band_count))

    @cached_property
    def band_keys(self):
        """An alternative view of band_descriptions based on the keys present in the metadata

        Returns
        -------
        dict
            Dictionary of metadata keys, where the values are the value of the key for each band.
            For example, band_keys['TIME'] = ['2015', '2016', '2017'] for a dataset with three bands.
        """

        keys = {
            k
            for i in range(self.band_count) for k in self.band_descriptions[i]
        }  # set
        return {
            k:
            [self.band_descriptions[i].get(k) for i in range(self.band_count)]
            for k in keys
        }

    def get_band_numbers(self, key, value):
        """Return the bands that have a key equal to a specified value.

        Parameters
        ----------
        key : str / list
            Key present in the metadata of the band. Can be a single key, or a list of keys.
        value : str / list
            Value of the key that should be returned. Can be a single value, or a list of values

        Returns
        -------
        np.ndarray
            An array of band numbers that match the criteria
        """
        if not hasattr(key, "__iter__") or isinstance(key, string_types):
            key = [key]

        if not hasattr(value, "__iter__") or isinstance(value, string_types):
            value = [value]

        match = np.ones(self.band_count, bool)
        for k, v in zip(key, value):
            match = match & (np.array(self.band_keys[k]) == v)
        matches = np.where(match)[0] + 1

        return matches