class VertexAttribute(traitlets.HasTraits): name = traitlets.CUnicode("attr") id = traitlets.CInt(-1) data = traittypes.Array(None, allow_none=True) each = traitlets.CInt(-1) opengl_type = traitlets.CInt(GL.GL_FLOAT) divisor = traitlets.CInt(0) @traitlets.default("id") def _id_default(self): return GL.glGenBuffers(1) @contextmanager def bind(self, program=None): loc = -1 if program is not None: loc = GL.glGetAttribLocation(program.program, self.name) if loc >= 0: GL.glVertexAttribDivisor(loc, self.divisor) _ = GL.glEnableVertexAttribArray(loc) _ = GL.glBindBuffer(GL.GL_ARRAY_BUFFER, self.id) if loc >= 0: GL.glVertexAttribPointer(loc, self.each, self.opengl_type, False, 0, None) yield if loc >= 0: GL.glDisableVertexAttribArray(loc) GL.glBindBuffer(GL.GL_ARRAY_BUFFER, 0) @traitlets.observe("data") def _set_data(self, change): arr = change["new"] self.each = arr.shape[-1] self.opengl_type = np_to_gl[arr.dtype.name] with self.bind(): GL.glBufferData(GL.GL_ARRAY_BUFFER, arr.nbytes, arr, GL.GL_STATIC_DRAW)
class OrthographicRayBlaster(RayBlaster): center = traittypes.Array().valid(check_dtype("f4"), check_shape(3)) forward = traittypes.Array().valid(check_dtype("f4"), check_shape(3)) up = traittypes.Array().valid(check_dtype("f4"), check_shape(3)) east = traittypes.Array().valid(check_dtype("f4"), check_shape(3)) width = traitlets.CFloat(1.0) height = traitlets.CFloat(1.0) nx = traitlets.CInt(512) ny = traitlets.CInt(512) @traitlets.default("east") def _default_east(self): return np.cross(self.forward, self.up) def __init__(self, *args, **kwargs): super(OrthographicRayBlaster, self).__init__(*args, **kwargs) # here origin is not the center, but the bottom left self._directions = np.zeros((self.nx, self.ny, 3), dtype="f4") self._directions[:] = self.forward[None, None, :] self.directions = self._directions.view().reshape( (self.nx * self.ny, 3)) self._origins = np.zeros((self.nx, self.ny, 3), dtype="f4") offset_x, offset_y = np.mgrid[-self.width / 2:self.width / 2:self.nx * 1j, -self.height / 2:self.height / 2:self.ny * 1j, ] self._origins[:] = (self.center + offset_x[..., None] * self.east + offset_y[..., None] * self.up) self.origins = self._origins.view().reshape((self.nx * self.ny, 3))
class VertexArray(traitlets.HasTraits): name = traitlets.CUnicode("vertex") id = traitlets.CInt(-1) indices = traittypes.Array(None, allow_none=True) index_id = traitlets.CInt(-1) attributes = traitlets.List(trait=traitlets.Instance(VertexAttribute)) each = traitlets.CInt(-1) @traitlets.default("id") def _id_default(self): return GL.glGenVertexArrays(1) @contextmanager def bind(self, program=None): GL.glBindVertexArray(self.id) if self.index_id != -1: GL.glBindBuffer(GL.GL_ELEMENT_ARRAY_BUFFER, self.index_id) # We only bind the attributes if we have a program too if program is None: attrs = [] else: attrs = self.attributes with ExitStack() as stack: _ = [stack.enter_context(_.bind(program)) for _ in attrs] yield if self.index_id != -1: GL.glBindBuffer(GL.GL_ELEMENT_ARRAY_BUFFER, 0) GL.glBindVertexArray(0) @traitlets.observe("indices") def _set_indices(self, change): arr = change["new"] self.index_id = GL.glGenBuffers(1) GL.glBindBuffer(GL.GL_ELEMENT_ARRAY_BUFFER, self.index_id) GL.glBufferData(GL.GL_ELEMENT_ARRAY_BUFFER, arr.nbytes, arr, GL.GL_STATIC_DRAW)
class VaexBinMapper(traitlets.HasTraits): max_bins = traitlets.CInt(255) random_state = traitlets.Any() subsample = traitlets.CInt(int(1e5)) def fit(self, dataframe_wrapper): dataframe = dataframe_wrapper.ds self.bin_thresholds_ = [] for feature in features: X = dataframe[feature].values.reshape((-1, 1)).astype(np.float32) midpoints = pygbm.binning.find_binning_thresholds( X, self.max_bins, subsample=self.subsample, random_state=self.random_state)[0] self.bin_thresholds_.append(midpoints) self.bin_thresholds_ def transform(self, dataframe_wrapper): dataframe = dataframe_wrapper.ds N = len(dataframe) M = len(features) # fortran order so 1 column is contiguous in memory binned = np.zeros((N, M), dtype=np.uint8, order='F') for m, feature in enumerate(features): X = dataframe[feature].values.reshape((-1, 1)).astype(np.float32) binned1 = pygbm.binning.map_to_bins(X, binning_thresholds=self.bin_thresholds_) assert binned1.shape[1] == 1 binned[:,m] = binned1[:,0] return binned def fit_transform(self, X): self.fit(X) return self.transform(X)
class VizHistogramState(VizBaseState): x_expression = traitlets.Unicode() x_slice = traitlets.CInt(None, allow_none=True) type = traitlets.CaselessStrEnum(['count', 'min', 'max', 'mean'], default_value='count') aux = traitlets.Unicode(None, allow_none=True) groupby = traitlets.Unicode(None, allow_none=True) groupby_normalize = traitlets.Bool(False, allow_none=True) x_min = traitlets.CFloat(None, allow_none=True) x_max = traitlets.CFloat(None, allow_none=True) grid = traitlets.Any().tag(**serialize_numpy) grid_sliced = traitlets.Any().tag(**serialize_numpy) x_centers = traitlets.Any().tag(**serialize_numpy) x_shape = traitlets.CInt(None, allow_none=True) #centers = traitlets.Any() def __init__(self, ds, **kwargs): super(VizHistogramState, self).__init__(ds, **kwargs) self.observe(lambda x: self.signal_slice.emit(self), ['x_slice']) self.observe(lambda x: self.calculate_limits(), ['x_expression', 'type', 'aux']) # no need for recompute # self.observe(lambda x: self.calculate_grid(), ['groupby', 'shape', 'groupby_normalize']) # self.observe(lambda x: self.calculate_grid(), ['groupby', 'shape', 'groupby_normalize']) self.observe(lambda x: self._update_grid(), ['x_min', 'x_max', 'shape']) if self.x_min is None and self.x_max is None: self.calculate_limits() else: self._calculate_centers() def bin_parameters(self): yield self.x_expression, self.x_shape or self.shape, (self.x_min, self.x_max), self.x_slice def state_get(self): # return {name: self.trait_metadata('grid', 'serialize', ident)(getattr(self, name) for name in self.trait_names()} state = {} for name in self.trait_names(): serializer = self.trait_metadata(name, 'serialize', ident) value = serializer(getattr(self, name)) state[name] = value return state def state_set(self, state): for name in self.trait_names(): if name in state: deserializer = self.trait_metadata(name, 'deserialize', ident) value = deserializer(state[name]) setattr(self, name, value) def calculate_limits(self): self._calculate_limits('x', 'x_expression') self.signal_regrid.emit(None) # TODO this is also called in the ctor, unnec work def limits_changed(self, change): self.signal_regrid.emit(None) # TODO this is also called in the ctor, unnec work @vaex.jupyter.debounced() def _update_grid(self): self._calculate_centers() self.signal_regrid.emit(None)
class VolumeRendererThree(widgets.DOMWidget): """Widget class representing a volume (rendering) using three.js""" _view_name = Unicode('VolumeRendererThreeView').tag(sync=True) _view_module = Unicode('ipyvolume').tag(sync=True) _model_name = Unicode('VolumeRendererThreeModel').tag(sync=True) _model_module = Unicode('ipyvolume').tag(sync=True) data = Array(default_value=None, allow_none=True).tag(sync=True, **array_cube_png_serialization) data_min = traitlets.CFloat().tag(sync=True) data_max = traitlets.CFloat().tag(sync=True) tf = traitlets.Instance(TransferFunction, allow_none=True).tag( sync=True, **ipywidgets.widget_serialization) angle1 = traitlets.Float(0.1).tag(sync=True) angle2 = traitlets.Float(0.2).tag(sync=True) scatters = traitlets.List(traitlets.Instance(Scatter), [], allow_none=False).tag( sync=True, **ipywidgets.widget_serialization) animation = traitlets.Float(1000.0).tag(sync=True) ambient_coefficient = traitlets.Float(0.5).tag(sync=True) diffuse_coefficient = traitlets.Float(0.8).tag(sync=True) specular_coefficient = traitlets.Float(0.5).tag(sync=True) specular_exponent = traitlets.Float(5).tag(sync=True) stereo = traitlets.Bool(False).tag(sync=True) fullscreen = traitlets.Bool(False).tag(sync=True) width = traitlets.CInt(500).tag(sync=True) height = traitlets.CInt(400).tag(sync=True) downscale = traitlets.CInt(1).tag(sync=True) show = traitlets.Unicode("Volume").tag(sync=True) # for debugging xlim = traitlets.List(traitlets.CFloat, default_value=[0, 1], minlen=2, maxlen=2).tag(sync=True) ylim = traitlets.List(traitlets.CFloat, default_value=[0, 1], minlen=2, maxlen=2).tag(sync=True) zlim = traitlets.List(traitlets.CFloat, default_value=[0, 1], minlen=2, maxlen=2).tag(sync=True) style = traitlets.Dict(default_value=default_style).tag(sync=True)
class PixelizeEffect(CameraEffect): pixel_size = traitlets.CInt(4) @traitlets.default("func") def _default_func(self): # By the way, this *way* over JITs things -- need to turn pixel_size into an argument. @numba.jit(nopython = True) def func(input_arr, output_arr, pixel_size): for i in range(input_arr.shape[0]): for j in range(input_arr.shape[1]): for k in range(input_arr.shape[2]): output_arr[i, j, 2 - k] = input_arr[(i // pixel_size) * pixel_size, (j // pixel_size) * pixel_size, k] return func def call_func(self, input_arr, output_arr): return self.func(input_arr, output_arr, self.pixel_size) def handle_key(self, symbol, modifiers): if symbol == pyglet.window.key.A: self.pixel_size += 1 elif symbol == pyglet.window.key.S: self.pixel_size = max(self.pixel_size - 1, 1) else: return False print(f"Pixel size is now {self.pixel_size}") return True
class HexViewer(ipywidgets.DOMWidget): _model_name = traitlets.Unicode("HexViewerModel").tag(sync=True) _model_module = traitlets.Unicode("jupyterlab_kaitai").tag(sync=True) _model_module_version = traitlets.Unicode(EXTENSION_VERSION).tag(sync=True) _view_name = traitlets.Unicode("HexViewerView").tag(sync=True) _view_module = traitlets.Unicode("jupyterlab_kaitai").tag(sync=True) _view_module_version = traitlets.Unicode(EXTENSION_VERSION).tag(sync=True) buffer = traitlets.Bytes(allow_none=False).tag(sync=True, **bytes_serialization) selectionStart = traitlets.CInt().tag(sync=True) selectionEnd = traitlets.CInt().tag(sync=True) def __init__(self, buffer=None, *args, **kwargs): kwargs["buffer"] = buffer if buffer is not None else b"" super().__init__(*args, **kwargs)
class CurveCollection(CurveData): name = "curve_collection" data = Array() n_vertices = traitlets.CInt() def add_curve(self, curve): # curve is a collection of ndarray of points assert curve.shape[0] > 1 # a curve needs at least 2 points assert curve.shape[1] == 3 # a curve needs at least 3 dimensions # double up the indices to use GL_LINES index_range = np.arange(0, curve.shape[0]) line_indices = np.column_stack([index_range, index_range]).ravel()[1:-1] data = curve[line_indices] data = np.column_stack([data, np.ones((data.shape[0], ))]) if self.data.shape: self.data = np.concatenate([self.data, data]) else: self.data = data def add_data(self): self.n_vertices = self.data.shape[0] self.vertex_array.attributes.append( VertexAttribute(name="model_vertex", data=self.data.astype("f4"))) self.vertex_array.indices = np.arange(0, self.n_vertices).astype("uint32") self.size = self.n_vertices
class ParticlePositions(SceneData): name = "particle_positions" data_source = traitlets.Instance(YTDataContainer) particle_type = traitlets.Unicode("all") radius_field = traitlets.Unicode(None, allow_none=True) color_field = traitlets.Unicode(None, allow_none=True) position_field = traitlets.Unicode("particle_position") size = traitlets.CInt(-1) @traitlets.default("vertex_array") def _default_vertex_array(self): model_vertex = np.array([[-1, 1], [-1, -1], [1, 1], [1, -1]], order="F", dtype="f4") va = VertexArray(name="particle_positions") va.attributes.append( VertexAttribute(name="model_vertex", data=model_vertex, divisor=0)) for attr in ("position_field", "radius_field", "color_field"): if getattr(self, attr) is None: continue field = self.data_source[self.particle_type, getattr(self, attr)] if field.units.dimensions is length: field.convert_to_units("unitary") field = field.astype("f4").d if field.ndim == 1: field.shape = (field.size, 1) else: self.size = field.shape[0] # for positions print(f"Setting {attr} to a field of shape {field.shape}") va.attributes.append( VertexAttribute(name=attr, data=field, divisor=1)) print(f"Size is now: {self.size}") return va
class Image(bqplot.marks.Mark): src = bqplot.marks.Unicode().tag(sync=True) x = bqplot.marks.Float().tag(sync=True) y = bqplot.marks.Float().tag(sync=True) view_count = traitlets.CInt(0).tag(sync=True) width = bqplot.marks.Float().tag(sync=True) height = bqplot.marks.Float().tag(sync=True) preserve_aspect_ratio = bqplot.marks.Unicode('').tag(sync=True) _model_module = bqplot.marks.Unicode('vaex.ext.bqplot').tag(sync=True) _view_module = bqplot.marks.Unicode('vaex.ext.bqplot').tag(sync=True) _view_name = bqplot.marks.Unicode('Image').tag(sync=True) _model_name = bqplot.marks.Unicode('ImageModel').tag(sync=True) scales_metadata = bqplot.marks.Dict({ 'x': { 'orientation': 'horizontal', 'dimension': 'x' }, 'y': { 'orientation': 'vertical', 'dimension': 'y' }, }).tag(sync=True) def __init__(self, **kwargs): self._drag_end_handlers = bqplot.marks.CallbackDispatcher() super(Image, self).__init__(**kwargs)
class CurveData(SceneData): name = "curve_data" data = Array() n_vertices = traitlets.CInt() @traitlets.default("vertex_array") def _default_vertex_array(self): va = VertexArray(name="vertices") return va def add_data(self, curve): # curve is a collection of ndarray of points assert curve.shape[0] > 1 # a curve needs at least 2 points assert curve.shape[1] == 3 # a curve needs at least 3 dimensions # add the singleton 4th dim data = np.ones((curve.shape[0], 4)) data[:, 0:3] = curve self.n_vertices = curve.shape[0] self.data = data self.vertex_array.attributes.append( VertexAttribute(name="model_vertex", data=data.astype("f4"))) self.vertex_array.indices = np.arange(0, self.n_vertices).astype("uint32") self.size = self.n_vertices
class AnimationWidget(widgets.DOMWidget): """ A widget that periodic increment a value :param value: A float between 0 and 1 :param run: boolean with the state of the timer. True, the timer is enable Produces the following signal. A sampling rate, the value is interpolated with the equation val = 1/Period * t 1- ^ ____ | / |/ 0- |-----> | period """ _view_name = traitlets.Unicode('AnimationView').tag(sync=True) _model_name = traitlets.Unicode('AnimationModel').tag(sync=True) _view_module = traitlets.Unicode('animation-widget').tag(sync=True) _model_module = traitlets.Unicode('animation-widget').tag(sync=True) # Signal value value = traitlets.CFloat(0.0).tag(sync=True) # Boolean timer is active run = traitlets.CBool(False).tag(sync=True) # Signal period (in ms) period = traitlets.CFloat(5000).tag(sync=True) # Number of samples in period nbsamples = traitlets.CInt(100).tag(sync=True) # Loop loop = traitlets.CBool(False).tag(sync=True)
class Size(Shelf): # TODO: min for value # TODO: supported role # TODO: supported mark types shelf_name = 'size' scale = T.Instance(Scale, default_value=None, allow_none=True) legend = T.Bool(True) value = T.CInt(30)
class PyGBMModel(state.HasState): features = traitlets.List(traitlets.Unicode()) num_round = traitlets.CInt() param = traitlets.Dict() prediction_name = traitlets.Unicode(default_value='pygbm_prediction') learning_rate = traitlets.Float(0.1) max_iter = traitlets.Int(10) max_bins = traitlets.Int(255) max_leaf_nodes = traitlets.Int(31) random_state = traitlets.Int(0) verbose = traitlets.Int(1) prediction_name = traitlets.Unicode(default_value='pygbm_prediction') def fit(self, dataframe, label): self.pygbm_model = pygbm.GradientBoostingMachine( learning_rate=self.learning_rate, max_iter=self.max_iter, max_bins=self.max_bins, max_leaf_nodes=self.max_leaf_nodes, random_state=self.random_state, scoring=None, verbose=self.verbose, validation_split=None) if not hasattr(label, 'values'): label = dataframe[label] y = label.values.astype(np.float32) with using_vaex(self.features): dsw = DataFrameWrapper(dataframe) self.pygbm_model.fit(dsw, y) def predict(self, dataframe): data = np.vstack([dataframe[k].values for k in self.features]).T return self.pygbm_model.predict(data) def __call__(self, *args): data = np.vstack([arg.astype(np.float32) for arg in args]).T.copy() return self.pygbm_model.predict(data) def transform(self, dataframe): copy = dataframe.copy() lazy_function = copy.add_function('pygbm_prediction_function', self) expression = lazy_function(*self.features) copy.add_virtual_column(self.prediction_name, expression, unique=False) return copy def state_get(self): return dict(tree_state=base64.encodebytes( pickle.dumps(self.pygbm_model)).decode('ascii'), substate=super(PyGBMModel, self).state_get()) def state_set(self, state, trusted=True): super(PyGBMModel, self).state_set(state['substate']) if trusted is False: raise ValueError( "Will not unpickle data when source is not trusted") self.pygbm_model = pickle.loads( base64.decodebytes(state['tree_state'].encode('ascii')))
class Figure(widgets.DOMWidget): """Widget class representing a volume (rendering) using three.js""" _view_name = Unicode('FigureView').tag(sync=True) _view_module = Unicode('ipyvolume').tag(sync=True) _model_name = Unicode('FigureModel').tag(sync=True) _model_module = Unicode('ipyvolume').tag(sync=True) volume_data = Array(default_value=None, allow_none=True).tag(sync=True, **create_array_cube_png_serialization('volume_data')) data_min = traitlets.CFloat().tag(sync=True) data_max = traitlets.CFloat().tag(sync=True) tf = traitlets.Instance(TransferFunction, allow_none=True).tag(sync=True, **ipywidgets.widget_serialization) angle1 = traitlets.Float(0.1).tag(sync=True) angle2 = traitlets.Float(0.2).tag(sync=True) scatters = traitlets.List(traitlets.Instance(Scatter), [], allow_none=False).tag(sync=True, **ipywidgets.widget_serialization) animation = traitlets.Float(1000.0).tag(sync=True) animation_exponent = traitlets.Float(.5).tag(sync=True) ambient_coefficient = traitlets.Float(0.5).tag(sync=True) diffuse_coefficient = traitlets.Float(0.8).tag(sync=True) specular_coefficient = traitlets.Float(0.5).tag(sync=True) specular_exponent = traitlets.Float(5).tag(sync=True) stereo = traitlets.Bool(False).tag(sync=True) screen_capture_enabled = traitlets.Bool(False).tag(sync=True) screen_capture_mime_type = traitlets.Unicode(default_value='image/png').tag(sync=True) screen_capture_data = traitlets.Unicode(default_value=None, allow_none=True).tag(sync=True) fullscreen = traitlets.Bool(False).tag(sync=True) camera_control = traitlets.Unicode(default_value='trackball').tag(sync=True) width = traitlets.CInt(500).tag(sync=True) height = traitlets.CInt(400).tag(sync=True) downscale = traitlets.CInt(1).tag(sync=True) show = traitlets.Unicode("Volume").tag(sync=True) # for debugging xlim = traitlets.List(traitlets.CFloat, default_value=[0, 1], minlen=2, maxlen=2).tag(sync=True) ylim = traitlets.List(traitlets.CFloat, default_value=[0, 1], minlen=2, maxlen=2).tag(sync=True) zlim = traitlets.List(traitlets.CFloat, default_value=[0, 1], minlen=2, maxlen=2).tag(sync=True) xlabel = traitlets.Unicode("x").tag(sync=True) ylabel = traitlets.Unicode("y").tag(sync=True) zlabel = traitlets.Unicode("z").tag(sync=True) style = traitlets.Dict(default_value=ipyvolume.style.default).tag(sync=True)
class FacetCoordinate(Shelf): # TODO: supported types for aggregate # TODO: min and max for padding # TODO: min for height aggregate = T.Enum(['count'], default_value=None, allow_none=True) padding = T.CFloat(0.1) axis = T.Instance(Axis, default_value=None, allow_none=True) height = T.CInt(150)
class VizBaseState(vaex.ml.state.HasState): shape = traitlets.CInt(64) def __init__(self, ds, **kwargs): super(VizBaseState, self).__init__(**kwargs) self.ds = ds self.signal_slice = vaex.events.Signal() self.signal_regrid = vaex.events.Signal() def state_get(self): state = {} for name in self.trait_names(): serializer = self.trait_metadata(name, 'serialize', ident) value = serializer(getattr(self, name)) state[name] = value return state def state_set(self, state): for name in self.trait_names(): if name in state: deserializer = self.trait_metadata(name, 'deserialize', ident) value = deserializer(state[name]) setattr(self, name, value) def _calculate_limits(self, attr='x', expression='x_expression'): expression = getattr(self, expression) categorical = self.ds.is_category(expression) if categorical: N = self.ds.category_count(expression) min, max = -0.5, N - 0.5 centers = np.arange(N) setattr(self, attr + '_shape', N) else: min, max = self.ds.minmax(expression) centers = self.ds.bin_centers(expression, [min, max], shape=getattr(self, attr + '_shape') or self.shape) setattr(self, attr + '_min', min) setattr(self, attr + '_max', max) setattr(self, attr + '_centers', centers) def _calculate_centers(self, attr='x', expression='x_expression'): expression = getattr(self, expression) categorical = self.ds.is_category(expression) min, max = getattr(self, attr + '_min'), getattr(self, attr + '_max') if min is None or max is None: return # special condition that can occur during testing, since debounced does not work if categorical: N = self.ds.category_count(expression) centers = np.arange(N) setattr(self, attr + '_shape', N) else: # print(expression, [min, max], getattr(self, attr + '_shape') or self.shape) centers = self.ds.bin_centers(expression, [min, max], shape=getattr(self, attr + '_shape') or self.shape) setattr(self, attr + '_centers', centers)
class VizBase2dState(VizBaseState): x_expression = traitlets.Unicode() y_expression = traitlets.Unicode() x_slice = traitlets.CInt(None, allow_none=True) y_slice = traitlets.CInt(None, allow_none=True) type = traitlets.CaselessStrEnum(['count', 'min', 'max', 'mean'], default_value='count') aux = traitlets.Unicode(None, allow_none=True) groupby = traitlets.Unicode(None, allow_none=True) x_shape = traitlets.CInt(None, allow_none=True) y_shape = traitlets.CInt(None, allow_none=True) x_min = traitlets.CFloat() x_max = traitlets.CFloat() y_min = traitlets.CFloat() y_max = traitlets.CFloat() def __init__(self, ds, **kwargs): super(VizBase2dState, self).__init__(ds, **kwargs) self.observe(lambda x: self.calculate_limits(), ['x_expression', 'y_expression', 'type', 'aux']) self.observe(lambda x: self.signal_slice.emit(self), ['x_slice', 'y_slice']) # no need for recompute #self.observe(lambda x: self.calculate_grid(), ['groupby', 'shape', 'groupby_normalize']) self.observe(self.limits_changed, ['x_min', 'x_max', 'y_min', 'y_max']) self.calculate_limits() def bin_parameters(self): yield self.x_expression, self.x_shape or self.shape, ( self.x_min, self.x_max), self.x_slice yield self.y_expression, self.y_shape or self.shape, ( self.y_min, self.y_max), self.y_slice def calculate_limits(self): self._calculate_limits('x', 'x_expression') self._calculate_limits('y', 'y_expression') self.signal_regrid.emit(self) def limits_changed(self, change): self._calculate_centers() self.signal_regrid.emit(self)
class CameraPipe(traitlets.HasTraits): input_device = traitlets.Unicode("/dev/video0") output_device = traitlets.Unicode("/dev/video1") width = traitlets.CInt(640) height = traitlets.CInt(480) fps = traitlets.CInt(30) effects = traitlets.List() cam_in = traitlets.Instance(cv2.VideoCapture) cam_out = traitlets.Instance(pyfakewebcam.FakeWebcam, allow_none = True) def __init__(self, *args, **kwargs): super(CameraPipe, self).__init__(*args, **kwargs) # We don't manage these arrays with traitlets. # Pyglet does a lot of checking and byte-joining with arrays based on # pitch, so we hold on to things here and then read in an upside-down # way. self.display_array = np.zeros((self.height, self.width, 3), dtype="u1", order="C") self.output_arr1 = np.flipud(self.display_array) self.output_arr2 = np.zeros((self.height, self.width, 3), dtype="u1", order="C") @traitlets.default("cam_in") def _default_cam_in(self): in_cam = cv2.VideoCapture(self.input_device) in_cam.set(cv2.CAP_PROP_FRAME_WIDTH, self.width) in_cam.set(cv2.CAP_PROP_FRAME_HEIGHT, self.height) in_cam.set(cv2.CAP_PROP_FPS, self.fps) return in_cam @traitlets.default("cam_out") def _default_cam_out(self): return pyfakewebcam.FakeWebcam(self.output_device, self.width, self.height) def next_frame(self, dt = 0.0): _, self.output_arr1[:] = self.cam_in.read() #pixelize_frame(frame, output_arr) for effect in self.effects: effect(self.output_arr1, self.output_arr2) self.output_arr1[:] = self.output_arr2[:] if self.cam_out is not None: self.cam_out.schedule_frame(self.output_arr1)
class MeshData(SceneData): name = "mesh" data_source = traitlets.Instance(YTDataContainer) texture_objects = traitlets.Dict(trait=traitlets.Instance(Texture3D)) texture_objects = traitlets.Dict(trait=traitlets.Instance(Texture3D)) blocks = traitlets.Dict(default_value=()) scale = traitlets.Bool(False) size = traitlets.CInt(-1) def get_mesh_data(self, data_source, field): """ This reads the mesh data into a form that can be fed in to OpenGL. """ # get mesh information try: ftype, fname = field mesh_id = int(ftype[-1]) except ValueError: mesh_id = 0 mesh = data_source.ds.index.meshes[mesh_id - 1] offset = mesh._index_offset vertices = mesh.connectivity_coords indices = mesh.connectivity_indices - offset data = data_source[field] return triangulate_mesh(vertices, data, indices) def add_data(self, field): v, d, i = self.get_mesh_data(self.data_source, field) v.shape = (v.size // 3, 3) v = np.concatenate([v, np.ones((v.shape[0], 1))], axis=-1).astype("f4") d.shape = (d.size, 1) i.shape = (i.size, 1) i = i.astype("uint32") # d[:] = np.mgrid[0.0:1.0:1j*d.size].astype("f4")[:,None] self.vertex_array.attributes.append( VertexAttribute(name="model_vertex", data=v) ) self.vertex_array.attributes.append( VertexAttribute(name="vertex_data", data=d.astype("f4")) ) self.vertex_array.indices = i self.size = i.size @traitlets.default("vertex_array") def _default_vertex_array(self): return VertexArray(name="mesh_info", each=0)
class CycleTransformer(Transformer): '''A strategy for transforming cyclical features (e.g. angles, time). Think of each feature as an angle of a unit circle in polar coordinates, and then and then obtaining the x and y coordinate projections, or the cos and sin components respectively. Suitable for a variaty of machine learning tasks. It preserves the cyclical continuity of the feature. Inspired by: http://blog.davidkaleko.com/feature-engineering-cyclical-features.html >>> df = vaex.from_arrays(days=[0, 1, 2, 3, 4, 5, 6]) >>> cyctrans = vaex.ml.CycleTransformer(n=7, features=['days']) >>> cyctrans.fit_transform(df) # days days_x days_y 0 0 1 0 1 1 0.62349 0.781831 2 2 -0.222521 0.974928 3 3 -0.900969 0.433884 4 4 -0.900969 -0.433884 5 5 -0.222521 -0.974928 6 6 0.62349 -0.781831 ''' n = traitlets.CInt(allow_none=False, help='The number of elements in one cycle.') prefix_x = traitlets.Unicode(default_value="", help='Prefix for the x-component of the transformed features.').tag(ui='Text') prefix_y = traitlets.Unicode(default_value="", help='Prefix for the y-component of the transformed features.').tag(ui='Text') suffix_x = traitlets.Unicode(default_value="_x", help='Suffix for the x-component of the transformed features.').tag(ui='Text') suffix_y = traitlets.Unicode(default_value="_y", help='Suffix for the y-component of the transformed features.').tag(ui='Text') def fit(self, df): ''' Fit a CycleTransformer to the DataFrame. This is a dummy method, as it is not needed for the transformation to be applied. :param df: A vaex DataFrame. ''' pass def transform(self, df): ''' Transform a DataFrame with a CycleTransformer. :param df: A vaex DataFrame. ''' copy = df.copy() for feature in self.features: name_x = self.prefix_x + feature + self.suffix_x copy[name_x] = np.cos(2 * np.pi * copy[feature] / self.n) name_y = self.prefix_y + feature + self.suffix_y copy[name_y] = np.sin(2 * np.pi * copy[feature] / self.n) return copy
class ColorOffsetEffect(CameraEffect): red_offset = traitlets.CInt(0) green_offset = traitlets.CInt(0) blue_offset = traitlets.CInt(0) @traitlets.default("func") def _default_func(self): @numba.jit(nopython=True) def func(input_arr, output_arr, red_offset, green_offset, blue_offset): for i in range(input_arr.shape[0]): for j in range(input_arr.shape[1]): # red i1 = (i + red_offset) % input_arr.shape[0] output_arr[i, j, 0] = input_arr[i1, j, 0] # green i1 = (i + green_offset) % input_arr.shape[0] output_arr[i, j, 1] = input_arr[i1, j, 1] # blue i1 = (i + blue_offset) % input_arr.shape[0] output_arr[i, j, 2] = input_arr[i1, j, 2] return func def call_func(self, input_arr, output_arr): return self.func(input_arr, output_arr, self.red_offset, self.green_offset, self.blue_offset) def handle_key(self, symbol, modifiers): sign = 1 if modifiers & pyglet.window.key.MOD_SHIFT: sign = -1 if symbol == pyglet.window.key.R: self.red_offset = max(0, sign + self.red_offset) elif symbol == pyglet.window.key.G: self.green_offset = max(0, sign + self.green_offset) elif symbol == pyglet.window.key.B: self.blue_offset = max(0, sign + self.blue_offset) else: return False return True
class TextCharacters(SceneData): characters = traitlets.Dict(value_trait=traitlets.Instance(Character)) name = "text_overlay" font = FontTrait("DejaVu Sans") font_size = traitlets.CInt(32) @traitlets.default("vertex_array") def _default_vertex_array(self): return VertexArray(name="char_info", each=6) def build_textures(self): # This doesn't check if the textures have already been built self.font.set_size(self.font_size, 200) chars = [ord(_) for _ in string.printable] tex_ids = GL.glGenTextures(len(chars)) vert = [] for i, (tex_id, char_code) in enumerate(zip(tex_ids, chars)): self.font.clear() self.font.set_text(chr(char_code), flags=LOAD_FORCE_AUTOHINT) self.font.draw_glyphs_to_bitmap(antialiased=True) glyph = self.font.load_char(char_code) x0, y0, x1, y1 = glyph.bbox bitmap = self.font.get_image().astype(">f4") / 255.0 dx = 1.0 / bitmap.shape[0] dy = 1.0 / bitmap.shape[1] triangles = np.array( [ [x0, y1, 0.0 + dx / 2.0, 0.0 + dy / 2.0], [x0, y0, 0.0 + dx / 2.0, 1.0 - dy / 2.0], [x1, y0, 1.0 - dx / 2.0, 1.0 - dy / 2.0], [x0, y1, 0.0 + dx / 2.0, 0.0 + dy / 2.0], [x1, y0, 1.0 - dx / 2.0, 1.0 - dy / 2.0], [x1, y1, 1.0 - dx / 2.0, 0.0 + dy / 2.0], ], dtype="<f4", ) vert.append(triangles) texture = Texture2D(texture_name=tex_id, data=bitmap, boundary_x="clamp", boundary_y="clamp") # I can't find information as to why horiAdvance is a # factor of 8 larger than the other factors. I assume it # is referenced somewhere, but I cannot find it. self.characters[chr(char_code)] = Character( texture, i, glyph.horiAdvance / 8.0, glyph.vertAdvance) vert = np.concatenate(vert) self.vertex_array.attributes.append( VertexAttribute(name="quad_vertex", data=vert.astype("<f4")))
class LineData(SceneData): name = "line_data" n_values = traitlets.CInt() @traitlets.default("vertex_array") def _default_vertex_array(self): return VertexArray(name="vertices", each=6) def add_data(self, lines): assert lines.shape[1] == 4 x_coord = np.mgrid[0.0:1.0:lines.shape[0] * 1j].astype("f4") x_coord = x_coord.reshape((-1, 1)) self.n_vertices = lines.shape[0] self.vertex_array.attributes.append( VertexAttribute(name="rgba_values", data=lines)) self.vertex_array.attributes.append( VertexAttribute(name="x_coord", data=x_coord))
class Texture(traitlets.HasTraits): texture_name = traitlets.CInt(-1) data = traittypes.Array(None, allow_none=True) channels = GLValue("r32f") min_filter = GLValue("linear") mag_filter = GLValue("linear") @traitlets.default("texture_name") def _default_texture_name(self): return GL.glGenTextures(1) @contextmanager def bind(self, target=0): _ = GL.glActiveTexture(TEX_TARGETS[target]) _ = GL.glBindTexture(self.dim_enum, self.texture_name) yield _ = GL.glActiveTexture(TEX_TARGETS[target]) GL.glBindTexture(self.dim_enum, 0)
class XGBoostModel(state.HasState): '''The XGBoost algorithm. XGBoost is an optimized distributed gradient boosting library designed to be highly efficient, flexible and portable. It implements machine learning algorithms under the Gradient Boosting framework. XGBoost provides a parallel tree boosting (also known as GBDT, GBM) that solve many data science problems in a fast and accurate way. (https://github.com/dmlc/xgboost) Example: >>> import vaex >>> import vaex.ml.xgboost >>> df = vaex.ml.datasets.load_iris() >>> features = ['sepal_width', 'petal_length', 'sepal_length', 'petal_width'] >>> df_train, df_test = vaex.ml.train_test_split(df) >>> params = { 'max_depth': 5, 'learning_rate': 0.1, 'objective': 'multi:softmax', 'num_class': 3, 'subsample': 0.80, 'colsample_bytree': 0.80, 'silent': 1} >>> booster = vaex.ml.xgboost.XGBoostModel(features=features, num_boost_round=100, params=params) >>> booster.fit(df_train, 'class_') >>> df_train = booster.transform(df_train) >>> df_train.head(3) # sepal_length sepal_width petal_length petal_width class_ xgboost_prediction 0 5.4 3 4.5 1.5 1 1 1 4.8 3.4 1.6 0.2 0 0 2 6.9 3.1 4.9 1.5 1 1 >>> df_test = booster.transform(df_test) >>> df_test.head(3) # sepal_length sepal_width petal_length petal_width class_ xgboost_prediction 0 5.9 3 4.2 1.5 1 1 1 6.1 3 4.6 1.4 1 1 2 6.6 2.9 4.6 1.3 1 1 ''' features = traitlets.List( traitlets.Unicode(), help='List of features to use when fitting the XGBoostModel.') num_boost_round = traitlets.CInt(help='Number of boosting iterations.') params = traitlets.Dict( help='A dictionary of parameters to be passed on to the XGBoost model.' ) prediction_name = traitlets.Unicode( default_value='xgboost_prediction', help='The name of the virtual column housing the predictions.') def __call__(self, *args): data2d = np.vstack([arg.astype(np.float64) for arg in args]).T.copy() dmatrix = xgboost.DMatrix(data2d) return self.booster.predict(dmatrix) def transform(self, df): '''Transform a DataFrame such that it contains the predictions of the XGBoostModel in form of a virtual column. :param df: A vaex DataFrame. It should have the same columns as the DataFrame used to train the model. :return copy: A shallow copy of the DataFrame that includes the XGBoostModel prediction as a virtual column. :rtype: DataFrame ''' copy = df.copy() lazy_function = copy.add_function('xgboost_prediction_function', self) expression = lazy_function(*self.features) copy.add_virtual_column(self.prediction_name, expression, unique=False) return copy def fit(self, df, target, evals=(), early_stopping_rounds=None, evals_result=None, verbose_eval=False, **kwargs): '''Fit the XGBoost model given a DataFrame. This method accepts all key word arguments for the xgboost.train method. :param df: A vaex DataFrame containing the training features. :param target: The column name of the target variable. :param evals: A list of pairs (DataFrame, string). List of items to be evaluated during training, this allows user to watch performance on the validation set. :param int early_stopping_rounds: Activates early stopping. Validation error needs to decrease at least every *early_stopping_rounds* round(s) to continue training. Requires at least one item in *evals*. If there's more than one, will use the last. Returns the model from the last iteration (not the best one). :param dict evals_result: A dictionary storing the evaluation results of all the items in *evals*. :param bool verbose_eval: Requires at least one item in *evals*. If *verbose_eval* is True then the evaluation metric on the validation set is printed at each boosting stage. ''' data = df[self.features].values target_data = df.evaluate(target) dtrain = xgboost.DMatrix(data, target_data) if evals is not None: evals = [list(elem) for elem in evals] for item in evals: data = item[0][self.features].values target_data = item[0].evaluate(target) item[0] = xgboost.DMatrix(data, target_data) else: evals = () # This does the actual training / fitting of the xgboost model self.booster = xgboost.train( params=self.params, dtrain=dtrain, num_boost_round=self.num_boost_round, evals=evals, early_stopping_rounds=early_stopping_rounds, evals_result=evals_result, verbose_eval=verbose_eval, **kwargs) def predict(self, df, **kwargs): '''Provided a vaex DataFrame, get an in-memory numpy array with the predictions from the XGBoost model. This method accepts the key word arguments of the predict method from XGBoost. :returns: A in-memory numpy array containing the XGBoostModel predictions. :rtype: numpy.array ''' data = df[self.features].values dmatrix = xgboost.DMatrix(data) return self.booster.predict(dmatrix, **kwargs) def state_get(self): filename = tempfile.mktemp() self.booster.save_model(filename) with open(filename, 'rb') as f: data = f.read() return dict(tree_state=base64.encodebytes(data).decode('ascii'), substate=super(XGBoostModel, self).state_get()) def state_set(self, state): super(XGBoostModel, self).state_set(state['substate']) data = base64.decodebytes(state['tree_state'].encode('ascii')) filename = tempfile.mktemp() with open(filename, 'wb') as f: f.write(data) self.booster = xgboost.Booster(model_file=filename)
class CatBoostModel(state.HasState): '''The CatBoost algorithm. This class provides an interface to the CatBoost aloritham. CatBoost is a fast, scalable, high performance Gradient Boosting on Decision Trees library, used for ranking, classification, regression and other machine learning tasks. For more information please visit https://github.com/catboost/catboost Example: >>> import vaex >>> import vaex.ml.catboost >>> df = vaex.ml.datasets.load_iris() >>> features = ['sepal_width', 'petal_length', 'sepal_length', 'petal_width'] >>> df_train, df_test = vaex.ml.train_test_split(df) >>> params = { 'leaf_estimation_method': 'Gradient', 'learning_rate': 0.1, 'max_depth': 3, 'bootstrap_type': 'Bernoulli', 'objective': 'MultiClass', 'eval_metric': 'MultiClass', 'subsample': 0.8, 'random_state': 42, 'verbose': 0} >>> booster = vaex.ml.catboost.CatBoostModel(features=features, num_boost_round=100, params=params) >>> booster.fit(df_train, 'class_') >>> df_train = booster.transform(df_train) >>> df_train.head(3) # sepal_length sepal_width petal_length petal_width class_ catboost_prediction 0 5.4 3 4.5 1.5 1 [0.00615039 0.98024259 0.01360702] 1 4.8 3.4 1.6 0.2 0 [0.99034267 0.00526382 0.0043935 ] 2 6.9 3.1 4.9 1.5 1 [0.00688241 0.95190908 0.04120851] >>> df_test = booster.transform(df_test) >>> df_test.head(3) # sepal_length sepal_width petal_length petal_width class_ catboost_prediction 0 5.9 3 4.2 1.5 1 [0.00464228 0.98883351 0.00652421] 1 6.1 3 4.6 1.4 1 [0.00350424 0.9882139 0.00828186] 2 6.6 2.9 4.6 1.3 1 [0.00325705 0.98891631 0.00782664] ''' features = traitlets.List( traitlets.Unicode(), help='List of features to use when fitting the CatBoostModel.') num_boost_round = traitlets.CInt(default_value=None, allow_none=True, help='Number of boosting iterations.') params = traitlets.Dict( help= 'A dictionary of parameters to be passed on to the CatBoostModel model.' ) pool_params = traitlets.Dict( default_value={}, help= 'A dictionary of parameters to be passed to the Pool data object construction' ) prediction_name = traitlets.Unicode( default_value='catboost_prediction', help='The name of the virtual column housing the predictions.') prediction_type = traitlets.Enum( values=['Probability', 'Class', 'RawFormulaVal'], default_value='Probability', help= 'The form of the predictions. Can be "RawFormulaVal", "Probability" or "Class".' ) def __call__(self, *args): data2d = np.vstack([arg.astype(np.float64) for arg in args]).T.copy() dmatrix = catboost.Pool(data2d, **self.pool_params) return self.booster.predict(dmatrix, prediction_type=self.prediction_type) def transform(self, df): '''Transform a DataFrame such that it contains the predictions of the CatBoostModel in form of a virtual column. :param df: A vaex DataFrame. It should have the same columns as the DataFrame used to train the model. :return copy: A shallow copy of the DataFrame that includes the CatBoostModel prediction as a virtual column. :rtype: DataFrame ''' copy = df.copy() lazy_function = copy.add_function('catboost_prediction_function', self) expression = lazy_function(*self.features) copy.add_virtual_column(self.prediction_name, expression, unique=False) return copy def fit(self, df, target, evals=None, early_stopping_rounds=None, verbose_eval=None, plot=False, **kwargs): '''Fit the CatBoostModel model given a DataFrame. This method accepts all key word arguments for the catboost.train method. :param df: A vaex DataFrame containing the training features. :param target: The column name of the target variable. :param evals: A list of DataFrames to be evaluated during training. This allows user to watch performance on the validation sets. :param int early_stopping_rounds: Activates early stopping. :param bool verbose_eval: Requires at least one item in *evals*. If *verbose_eval* is True then the evaluation metric on the validation set is printed at each boosting stage. :param bool plot: if True, display an interactive widget in the Jupyter notebook of how the train and validation sets score on each boosting iteration. ''' data = df[self.features].values target_data = df.evaluate(target) dtrain = catboost.Pool(data=data, label=target_data, **self.pool_params) if evals is not None: for i, item in enumerate(evals): data = item[self.features].values target_data = item.evaluate(target) evals[i] = catboost.Pool(data=data, label=target_data, **self.pool_params) # This does the actual training/fitting of the catboost model self.booster = catboost.train( params=self.params, dtrain=dtrain, num_boost_round=self.num_boost_round, evals=evals, early_stopping_rounds=early_stopping_rounds, verbose_eval=verbose_eval, plot=plot, **kwargs) def predict(self, df, **kwargs): '''Provided a vaex DataFrame, get an in-memory numpy array with the predictions from the CatBoostModel model. This method accepts the key word arguments of the predict method from catboost. :param df: a vaex DataFrame :returns: A in-memory numpy array containing the CatBoostModel predictions. :rtype: numpy.array ''' data = df[self.features].values dmatrix = catboost.Pool(data, **self.pool_params) return self.booster.predict(dmatrix, prediction_type=self.prediction_type, **kwargs) def state_get(self): filename = tempfile.mktemp() self.booster.save_model(filename) with open(filename, 'rb') as f: data = f.read() return dict(tree_state=base64.encodebytes(data).decode('ascii'), substate=super(CatBoostModel, self).state_get()) def state_set(self, state, trusted=True): super(CatBoostModel, self).state_set(state['substate']) data = base64.decodebytes(state['tree_state'].encode('ascii')) filename = tempfile.mktemp() with open(filename, 'wb') as f: f.write(data) self.booster = catboost.CatBoost().load_model(fname=filename)
class Plot2dSliced(PlotBase): z = traitlets.Unicode(allow_none=False).tag(sync=True) z_slice = traitlets.CInt(default_value=0).tag( sync=True) #.tag(sync=True) # TODO: do linking at python side z_shape = traitlets.CInt(default_value=10).tag(sync=True) z_relative = traitlets.CBool(False).tag(sync=True) z_min = traitlets.CFloat(default_value=None, allow_none=True).tag(sync=True) #.tag(sync=True) z_max = traitlets.CFloat(default_value=None, allow_none=True).tag(sync=True) #.tag(sync=True) def __init__(self, **kwargs): self.z_min_extreme, self.z_max_extreme = kwargs["dataset"].minmax( kwargs["z"]) super(Plot2dSliced, self).__init__(**kwargs) self.create_tools() def get_limits(self, limits): limits = self.dataset.limits(self.get_binby(), limits) limits = list([list(k) for k in limits]) if self.z_min is None: self.z_min = limits[2][0] if self.z_max is None: self.z_max = limits[2][1] limits[2][0] = self.z_min limits[2][1] = self.z_max return limits def select_rectangle(self, x1, y1, x2, y2, mode="replace"): dz = self.z_max - self.z_min z1 = self.z_min + dz * self.z_slice / self.z_shape z2 = self.z_min + dz * (self.z_slice + 1) / self.z_shape spaces = [self.x, self.y, self.z] limits = [[x1, x2], [y1, y2], [z1, z2]] self.dataset.select_box(spaces, limits=limits, mode=mode) def select_lasso(self, x, y, mode="replace"): raise NotImplementedError("todo") def get_grid(self): zslice = self.grid[..., self.z_slice] if self.z_relative: with np.errstate(divide='ignore', invalid='ignore'): zslice = zslice / self.grid.sum(axis=-1) return zslice #return self.grid[...,self.z_slice] def get_vgrids(self): def zsliced(grid): return grid[..., self.z_slice] if grid is not None else None return [ zsliced(grid) for grid in super(Plot2dSliced, self).get_vgrids() ] def create_tools(self): #super(Plot2dSliced, self).create_tools() self.z_slice_slider = widgets.IntSlider(value=self.z_slice, min=0, max=self.z_shape - 1) #self.add_control_widget(self.z_slice_slider) self.z_slice_slider.observe(self._z_slice_changed, "value") self.observe(self._z_slice_changed, "z_slice") dz = self.z_max_extreme - self.z_min_extreme self.z_range_slider = widgets.FloatRangeSlider( min=min(self.z_min, self.z_min_extreme), value=[self.z_min, self.z_max], max=max(self.z_max, self.z_max_extreme), step=dz / 1000) self.z_range_slider.observe(self._z_range_changed_, names=["value"]) #self.observe(self.z_range_slider, "z_min") self.z_control = widgets.VBox( [self.z_slice_slider, self.z_range_slider]) self.add_control_widget(self.z_control) def _z_range_changed_(self, changes, **kwargs): #print("changes1", changes, repr(changes), kwargs) self.limits[2][0], self.limits[2][1] =\ self.z_min, self.z_max = self.z_range_slider.value = changes["new"] self.update_grid() def _z_slice_changed(self, changes): self.z_slice = self.z_slice_slider.value = changes["new"] self._update_image() def get_shape(self): return vaex.dataset._expand_shape(self.shape, 2) + (self.z_shape, ) def get_vshape(self): return vaex.dataset._expand_shape(self.vshape, 2) + (self.z_shape, ) def get_binby(self): return [self.x, self.y, self.z]
class RasterioBase(LoadFileMixin, BaseFileSource): """Create a DataSource using rasterio. Attributes ---------- source : str, :class:`io.BytesIO` Path to the data source dataset : :class:`rasterio._io.RasterReader` A reference to the datasource opened by rasterio coordinates : :class:`podpac.Coordinates` {coordinates} band : int The 'band' or index for the variable being accessed in files such as GeoTIFFs. Use None for all bounds. crs : str, optional The coordinate reference system. Normally this will come directly from the file, but this allows users to specify the crs in case this information is missing from the file. read_as_filename : bool, optional Default is False. If True, the file will be read using rasterio.open(self.source) instead of being automatically parsed to handle ftp, s3, in-memory files, etc. """ # dataset = tl.Instance(rasterio.DatasetReader).tag(readonly=True) band = tl.CInt(allow_none=True).tag(attr=True) crs = tl.Unicode(allow_none=True, default_value=None).tag(attr=True) driver = tl.Unicode(allow_none=True, default_value=None) read_from_source = tl.Bool(False).tag(attr=True) @cached_property def dataset(self): if re.match(".*:.*:.*", self.source): # i.e. user supplied a non-file-looking string like 'HDF4_EOS:EOS_GRID:"MOD13Q1.A2013033.h08v05.006.2015256072248.hdf":MODIS_Grid_16DAY_250m_500m_VI:"250m 16 days NDVI"' # This also includes many subdatsets as part of GDAL data drivers; https://gdal.org/drivers/raster/index.html self.set_trait("read_from_source", True) return rasterio.open(self.source) else: return super(RasterioBase, self).dataset @tl.default("band") def _band_default(self): if self.outputs is not None and self.output is not None: return self.outputs.index(self.output) elif self.outputs is None: return 1 else: return None # All bands # ------------------------------------------------------------------------- # public api methods # ------------------------------------------------------------------------- @cached_property def nan_vals(self): return list(self.dataset.nodatavals) def open_dataset(self, fp, **kwargs): if self.read_from_source: return rasterio.open(self.source) with rasterio.MemoryFile() as mf: mf.write(fp.read()) return mf.open(driver=self.driver) def close_dataset(self): """Closes the file for the datasource""" self.dataset.close() @common_doc(COMMON_DATA_DOC) def get_coordinates(self): """{get_coordinates} The default implementation tries to find the lat/lon coordinates based on dataset.affine. It cannot determine the alt or time dimensions, so child classes may have to overload this method. """ # check to see if the coordinates are rotated used affine affine = self.dataset.transform if self.crs is not None: crs = self.crs elif isinstance(self.dataset.crs, rasterio.crs.CRS) and "init" in self.dataset.crs: crs = self.dataset.crs["init"].upper() elif isinstance(self.dataset.crs, dict) and "init" in self.dataset.crs: crs = self.dataset.crs["init"].upper() else: try: crs = pyproj.CRS(self.dataset.crs).to_wkt() except pyproj.exceptions.CRSError: raise RuntimeError("Unexpected rasterio crs '%s'" % self.dataset.crs) return Coordinates.from_geotransform(affine.to_gdal(), self.dataset.shape, crs) @common_doc(COMMON_DATA_DOC) def get_data(self, coordinates, coordinates_index): """{get_data}""" data = self.create_output_array(coordinates) slc = coordinates_index # read data within coordinates_index window window = ((slc[0].start, slc[0].stop), (slc[1].start, slc[1].stop)) if self.outputs is not None: # read all the bands raster_data = self.dataset.read(out_shape=(len(self.outputs), ) + tuple(coordinates.shape), window=window) raster_data = np.moveaxis(raster_data, 0, 2) else: # read the requested band raster_data = self.dataset.read(self.band, out_shape=tuple(coordinates.shape), window=window) # set raster data to output array data.data.ravel()[:] = raster_data.ravel() return data # ------------------------------------------------------------------------- # additional methods and properties # ------------------------------------------------------------------------- @property def tags(self): return self.dataset.tags() @property def subdatasets(self): return self.dataset.subdatasets @property def band_count(self): """The number of bands""" return self.dataset.count @cached_property def band_descriptions(self): """A description of each band contained in dataset.tags Returns ------- OrderedDict Dictionary of band_number: band_description pairs. The band_description values are a dictionary, each containing a number of keys -- depending on the metadata """ return OrderedDict( (i, self.dataset.tags(i + 1)) for i in range(self.band_count)) @cached_property def band_keys(self): """An alternative view of band_descriptions based on the keys present in the metadata Returns ------- dict Dictionary of metadata keys, where the values are the value of the key for each band. For example, band_keys['TIME'] = ['2015', '2016', '2017'] for a dataset with three bands. """ keys = { k for i in range(self.band_count) for k in self.band_descriptions[i] } # set return { k: [self.band_descriptions[i].get(k) for i in range(self.band_count)] for k in keys } def get_band_numbers(self, key, value): """Return the bands that have a key equal to a specified value. Parameters ---------- key : str / list Key present in the metadata of the band. Can be a single key, or a list of keys. value : str / list Value of the key that should be returned. Can be a single value, or a list of values Returns ------- np.ndarray An array of band numbers that match the criteria """ if not hasattr(key, "__iter__") or isinstance(key, string_types): key = [key] if not hasattr(value, "__iter__") or isinstance(value, string_types): value = [value] match = np.ones(self.band_count, bool) for k, v in zip(key, value): match = match & (np.array(self.band_keys[k]) == v) matches = np.where(match)[0] + 1 return matches