def axis_titles(self, x=None, y=None): """Apply axis titles to the figure. This is a convenience method for manually modifying the "Axes" mark. Parameters ---------- x: string, default 'null' X-axis title y: string, default 'null' Y-axis title Example ------- >>>vis.axis_titles(y="Data 1", x="Data 2") """ keys = self.axes.get_keys() if keys: for key in keys: if key == 'x': self.axes[key].title = x elif key == 'y': self.axes[key].title = y else: self.axes.extend( [Axis(type='x', title=x), Axis(type='y', title=y)])
def __init__(self, *args, **kwargs): """Create a Vega Bar Chart""" super(Bar, self).__init__(*args, **kwargs) #Scales self.scales['x'] = Scale(name='x', type='ordinal', range='width', domain=DataRef(data='table', field="data.idx")) self.scales['y'] = Scale(name='y', range='height', nice=True, domain=DataRef(data='table', field="data.val")) self.axes.extend([Axis(type='x', scale='x'), Axis(type='y', scale='y')]) #Marks enter_props = PropertySet(x=ValueRef(scale='x', field="data.idx"), y=ValueRef(scale='y', field="data.val"), width=ValueRef(scale='x', band=True, offset=-1), y2=ValueRef(scale='y', value=0)) update_props = PropertySet(fill=ValueRef(value='steelblue')) mark = Mark(type='rect', from_=MarkRef(data='table'), properties=MarkProperties(enter=enter_props, update=update_props)) self.marks.append(mark)
def _get_axis_from_header(_header, _id): """ Local utility to create an Axis object from the data we can extract from the FITS header. As FITS files authors are pretty whimsical regarding the names of the header cards, we check for as many names as we can. Note that _id """ _step = _get_meta(_header, [ 'CDELT%d' % _id, 'CD%d_%d' % (_id, _id), 'CDEL_%d' % _id, ]) # Make sure our start value is for pixel 0, not CRPIX. # Note that the 1st pixel has a CRPIX value of 1, not 0. _val = _get_meta(_header, ['CRVAL%d' % _id]) _start = _val - (_get_meta(_header, ['CRPIX%d' % _id]) - 1.) * _step return Axis( _get_meta(_header, ['CTYPE%d' % _id], default='Axis%d' % _id), _start, _step, Unit(_get_meta(_header, ['CUNIT%d' % _id])), )
def _interp_nearest(obj, values, axis, repna): """ "nearest" neighbour interpolation """ ax = obj.axes[axis] pos = obj.dims.index(ax.name) assert ax.dtype is not np.dtype( 'O'), "interpolation only for non-object types" indices = np.zeros_like(values, dtype=int) mask = np.zeros_like(values, dtype=bool) for i, x in enumerate(values): res = _locate_nearest(ax, x) if res is None: if repna: mask[i] = True continue else: raise IndexError("value not found: {}".format(x)) continue indices[i], _ = res # sample nearest neighbors result = obj.take(indices, axis=pos, indexing="position") result.put(np.nan, np.where(mask)[0], axis=pos, indexing="position", convert=True, inplace=True) result.axes[pos] = Axis(values, ax.name) # update axis return result
def from_pandas(cls, data, dims=None): """ Initialize a DimArray from pandas data: pandas object (Series, DataFrame, Panel, Panel4D) dims, optional: dimension (axis) names, otherwise look at ax.name for ax in data.axes >>> import pandas as pd >>> s = pd.Series([3,5,6], index=['a','b','c']) >>> s.index.name = 'dim0' >>> DimArray.from_pandas(s) dimarray: 3 non-null elements (0 null) dimensions: 'dim0' 0 / dim0 (3): a to c array([3, 5, 6]) Also work with Multi-Index >>> panel = pd.Panel(np.arange(2*3*4).reshape(2,3,4)) >>> b = panel.to_frame() # pandas' method to convert Panel to DataFrame via MultiIndex >>> DimArray.from_pandas(b) # doctest: +SKIP dimarray: 24 non-null elements (0 null) dimensions: 'major,minor', 'x1' 0 / major,minor (12): (0, 0) to (2, 3) 1 / x1 (2): 0 to 1 ... """ try: import pandas as pd except ImportError: raise ImportError("pandas module is required to use this method") axisnames = [] axes = [] for i, ax in enumerate(data.axes): # axis name name = ax.name if dims is not None: name = dims[i] if name is None: name = 'x%i' % (i) # Multi-Index: make a Grouped Axis object if isinstance(ax, pd.MultiIndex): # level names names = ax.names for j, nm in enumerate(names): if nm is None: names[j] = '%s_%i' % (name, j) miaxes = Axes.from_arrays(ax.levels, dims=names) axis = GroupedAxis(*miaxes) # Index: Make a simple Axis else: axis = Axis(ax.values, name) axes.append(axis) #axisnames, axes = zip(*[(ax.name, ax.values) for ax in data.axes]) return cls(data.values, axes=axes)
def stack(arrays, axis, keys=None, align=False): """ stack arrays along a new dimension (raise error if already existing) parameters: ---------- arrays: sequence or dict of arrays axis: str, new dimension along which to stack the array keys, optional: stack axis values, useful if array is a sequence, or a non-ordered dictionary align, optional: if True, align axes prior to stacking (Default to False) returns: -------- DimArray: joint array Sea Also: --------- concatenate: join arrays along an existing dimension Examples: --------- >>> a = DimArray([1,2,3]) >>> b = DimArray([11,22,33]) >>> stack([a, b], axis='stackdim', keys=['a','b']) dimarray: 6 non-null elements (0 null) dimensions: 'stackdim', 'x0' 0 / stackdim (2): a to b 1 / x0 (3): 0 to 2 array([[ 1, 2, 3], [11, 22, 33]]) """ # make a sequence of arrays arrays, keys = _check_stack_args(arrays, keys) for a in arrays: if not is_DimArray(a): raise TypeError('can only stack DimArray instances') # make sure the stacking dimension is OK (new) dims = get_dims(*arrays) axis = _check_stack_axis(axis, dims) # re-index axes if needed if align: arrays = align_axes(*arrays) # make it a numpy array data = [a.values for a in arrays] data = np.array(data) # new axis newaxis = Axis(keys, axis) # find common axes try: axes = _get_axes(*arrays) except ValueError, msg: if 'axes are not aligned' in repr(msg): msg = 'axes are not aligned\n ==> Try passing `align=True`' raise ValueError(msg)
def __init__(self, x_scale=None, y_scale=None, mark=None, width=None, height=None): if x_scale: self.x_scale = x_scale else: self.x_scale = Scale(name='x', range='width', type='ordinal', domain=DataRef(data='table', field='data.x')) if y_scale: self.y_scale = y_scale else: self.y_scale = Scale(name='y', range='height', type='linear', nice=True, domain=DataRef(data='table', field='data.y')) if mark: self.mark = mark else: self.mark = Mark( type='rect', from_=MarkRef(data='table'), properties=MarkProperties( enter=PropertySet(x=ValueRef(scale='x', field='data.x'), y=ValueRef(scale='y', field='data.y'), width=ValueRef(scale='x', band=True, offset=-1), y2=ValueRef(scale='y', value=0)), update=PropertySet(fill=ValueRef(value='steelblue')))) self.width = width or 400 self.height = height or 200 self.padding = {'top': 10, 'left': 30, 'bottom': 20, 'right': 10} self.x_axis = Axis(type='x', scale='x') self.y_axis = Axis(type='y', scale='y')
def concatenate_axes(axes): """ concatenate Axis objects axes: list of Axis objects >>> a = Axis([1,2,3],'x0') >>> b = Axis([5,6,7],'x0') >>> ax = concatenate_axes((a, b)) >>> ax.name 'x0' >>> ax.values array([1, 2, 3, 5, 6, 7]) """ #assert np.iterable(axes) and axes #if not isinstance(axes[0], Axis): raise TypeError() if len({ax.name for ax in axes}) != 1: print axes raise ValueError("axis names differ!") values = np.concatenate([ax.values for ax in axes]) return Axis(values, axes[0].name)
def _get_adjusted_axis(_axis, _index, _key): start = _axis.start start_index = _key[_index].start if not (start_index is None or start_index == 0): if start_index < 0: start_index = old_shape[_index] + start_index start = start + start_index * _axis.step step = _axis.step step_index = _key[_index].step if not (step_index is None or step_index == 1): if step_index < 0: # Oh, this is more complex than it seems, as it affects # start and stop values, too. # We'll postpone it for now ; add a test case and hack away! raise NotImplementedError( "Negative steps are not supported at the moment. " "Make a request or add support for them yourself!") step = step * step_index return Axis(_axis.name, start, step, _axis.unit)
def _interp_linear(obj, newindices, axis, repna): """ linearly interpolate a dimarray along an axis """ ax = obj.axes[axis] pos = obj.dims.index(ax.name) assert ax.dtype is not np.dtype( 'O'), "interpolation only for non-object types" i0 = np.zeros_like(newindices, dtype=int) i1 = np.zeros_like(newindices, dtype=int) w1 = np.empty_like(newindices, dtype=float) w1.fill(np.nan) for i, x in enumerate(newindices): res = _locate_bounds(ax, x) if res is None: if repna: continue else: raise IndexError("value not found: {}".format(x)) continue i0[i], i1[i], w1[i] = res # sample nearest neighbors v0 = obj.take(i0, axis=pos, indexing="position") v1 = obj.take(i1, axis=pos, indexing="position") # result as weighted sum if not hasattr(v0, 'values'): # scalar return v0 * (1 - w1) + v1 * w1 else: newvalues = v0.values * (1 - w1) + v1.values * w1 axes = obj.axes.copy() axes[pos] = Axis(newindices, ax.name) # new axis return obj._constructor(newvalues, axes, **obj._metadata)
def _take_broadcast(a, indices): """ broadcast array-indices & integers, numpy's classical Examples: --------- >>> a = da.zeros(shape=(3,4,5,6)) >>> a[:,[0, 1],:,2].shape (2, 3, 5) >>> a[:,[0, 1],2,:].shape (3, 2, 6) """ # new values newval = a.values[indices] # if the new values is a scalar, then just return it if np.isscalar(newval): return newval # new axes: broacast indices (should do the same as above, since integers are just broadcast) indices2 = broadcast_indices(indices) # assert np.all(newval == a.values[indices2]) # make a multi-axis with tuples is_array2 = np.array([np.iterable(ix) for ix in indices2]) nb_array2 = is_array2.sum() # If none or one array is present, easy if nb_array2 <= 1: newaxes = [ a.axes[i][ix] for i, ix in enumerate(indices) if not np.isscalar(ix) ] # indices or indices2, does not matter # else, finer check needed else: # same stats but on original indices is_array = np.array([np.iterable(ix) for ix in indices]) array_ix_pos = np.where(is_array)[0] # Determine where the axis will be inserted # - need to consider the integers as well (broadcast as arrays) # - if two indexed dimensions are not contiguous, new axis placed at first position... # a = zeros((3,4,5,6)) # a[:,[1,2],:,0].shape ==> (2, 3, 5) # a[:,[1,2],0,:].shape ==> (3, 2, 6) array_ix_pos2 = np.where(is_array2)[0] if np.any(np.diff(array_ix_pos2) > 1 ): # that mean, if two indexed dimensions are not contiguous insert = 0 else: insert = array_ix_pos2[0] # Now determine axis value # ...if originally only one array was provided, use these values correspondingly if len(array_ix_pos) == 1: i = array_ix_pos[0] values = a.axes[i].values[indices[i]] name = a.axes[i].name # ...else use a list of tuples else: values = zip( *[a.axes[i].values[indices2[i]] for i in array_ix_pos]) name = ",".join([a.axes[i].name for i in array_ix_pos]) broadcastaxis = Axis(values, name) newaxes = Axes() for i, ax in enumerate(a.axes): # axis is already part of the broadcast axis: skip if is_array2[i]: continue else: newaxis = ax[indices2[i]] ## do not append axis if scalar #if np.isscalar(newaxis): # continue newaxes.append(newaxis) # insert the right new axis at the appropriate position newaxes.insert(insert, broadcastaxis) return a._constructor(newval, newaxes, **a._metadata)
def take(obj, indices, axis=0, indexing="values", tol=TOLERANCE, keepdims=False, broadcast_arrays=True, mode='raise'): """ Retrieve values from a DimArray input: - self or obj: DimArray (ignore this parameter if accessed as bound method) - indices : int or list or slice (single-dimensional indices) or a tuple of those (multi-dimensional) or `dict` (`axis name` : `indices`) - axis : int or str - indexing : "values" or "position" "position": use numpy-like position index "values": indexing on axis values - tol : tolerance when looking for numerical values, e.g. to use nearest neighbor search, default `None` - keepdims : keep singleton dimensions - broadcast_arrays: True, by default, consistently with numpy if False, indexing with list or array of indices will behave like Matlab TM does, which means that it will index on each individual dimensions. (internally, any list or array of indices will be converted to a boolean index of values before slicing) If True, numpy rules are followed. Consider the following case: a = DimArray(np.zeros((4,4,4))) a[[0,0],[0,0],[0,0]] if broadcast_arrays is False, the result will be a 3-D array of shape 2 x 2 x 2 if broadcast_arrays is True, the result will be a 1-D array of size 2 - mode: "raise", "clip", "wrap" analogous to numpy.ndarray.take's mode parameter, only valid (for now) if indexing is 'position' output: - DimArray object or python built-in type, consistently with numpy slicing Examples: --------- >>> v = DimArray([[1,2,3],[4,5,6]], axes=[["a","b"], [10.,20.,30.]], dims=['d0','d1'], dtype=float) >>> v dimarray: 6 non-null elements (0 null) dimensions: 'd0', 'd1' 0 / d0 (2): a to b 1 / d1 (3): 10.0 to 30.0 array([[ 1., 2., 3.], [ 4., 5., 6.]]) Indexing via axis values (default) >>> a = v[:,10] # python slicing method >>> a dimarray: 2 non-null elements (0 null) dimensions: 'd0' 0 / d0 (2): a to b array([ 1., 4.]) >>> b = v.take(10, axis=1) # take, by axis position >>> c = v.take(10, axis='d1') # take, by axis name >>> d = v.take({'d1':10}) # take, by dict {axis name : axis values} >>> (a==b).all() and (a==c).all() and (a==d).all() True Indexing via integer index (indexing="position" or `ix` property) >>> np.all(v.ix[:,0] == v[:,10]) True >>> np.all(v.take(0, axis="d1", indexing="position") == v.take(10, axis="d1")) True Multi-dimensional indexing >>> v["a", 10] # also work with string axis 1.0 >>> v.take(('a',10)) # multi-dimensional, tuple 1.0 >>> v.take({'d0':'a', 'd1':10}) # dict-like arguments 1.0 Take a list of indices >>> a = v[:,[10,20]] # also work with a list of index >>> a dimarray: 4 non-null elements (0 null) dimensions: 'd0', 'd1' 0 / d0 (2): a to b 1 / d1 (2): 10.0 to 20.0 array([[ 1., 2.], [ 4., 5.]]) >>> b = v.take([10,20], axis='d1') >>> np.all(a == b) True Take a slice: >>> c = v[:,10:20] # axis values: slice includes last element >>> c dimarray: 4 non-null elements (0 null) dimensions: 'd0', 'd1' 0 / d0 (2): a to b 1 / d1 (2): 10.0 to 20.0 array([[ 1., 2.], [ 4., 5.]]) >>> d = v.take(slice(10,20), axis='d1') # `take` accepts `slice` objects >>> np.all(c == d) True >>> v.ix[:,0:1] # integer position: does *not* include last element dimarray: 2 non-null elements (0 null) dimensions: 'd0', 'd1' 0 / d0 (2): a to b 1 / d1 (1): 10.0 to 10.0 array([[ 1.], [ 4.]]) Keep dimensions >>> a = v[["a"]] >>> b = v.take("a",keepdims=True) >>> np.all(a == b) True tolerance parameter to achieve "nearest neighbour" search >>> v.take(12, axis="d1", tol=5) dimarray: 2 non-null elements (0 null) dimensions: 'd0' 0 / d0 (2): a to b array([ 1., 4.]) # Matlab like multi-indexing >>> v = DimArray(np.arange(2*3*4).reshape(2,3,4)) >>> v.box[[0,1],:,[0,0,0]].shape (2, 3, 3) >>> v.box[[0,1],:,[0,0]].shape # here broadcast_arrays = False (2, 3, 2) >>> v[[0,1],:,[0,0]].shape # that is traditional numpy, with broadcasting on same shape (2, 3) >>> v.values[[0,1],:,[0,0]].shape # a proof of it (2, 3) >>> a = DimArray(np.arange(2*3).reshape(2,3)) >>> a[a > 3] # FULL ARRAY: return a numpy array in n-d case (at least for now) dimarray: 2 non-null elements (0 null) dimensions: 'x0,x1' 0 / x0,x1 (2): (1, 1) to (1, 2) array([4, 5]) >>> a[a.x0 > 0] # SINGLE AXIS: only first axis dimarray: 3 non-null elements (0 null) dimensions: 'x0', 'x1' 0 / x0 (1): 1 to 1 1 / x1 (3): 0 to 2 array([[3, 4, 5]]) >>> a[:, a.x1 > 0] # only second axis dimarray: 4 non-null elements (0 null) dimensions: 'x0', 'x1' 0 / x0 (2): 0 to 1 1 / x1 (2): 1 to 2 array([[1, 2], [4, 5]]) >>> a.box[a.x0 > 0, a.x1 > 0] # AXIS-BASED (need `box` to prevent broadcasting) dimarray: 2 non-null elements (0 null) dimensions: 'x0', 'x1' 0 / x0 (1): 1 to 1 1 / x1 (2): 1 to 2 array([[4, 5]]) Ommit `indices` parameter when putting a DimArray >>> a = DimArray([0,1,2,3,4], ['a','b','c','d','e']) >>> b = DimArray([5,6], ['c','d']) >>> a.put(b) dimarray: 5 non-null elements (0 null) dimensions: 'x0' 0 / x0 (5): a to e array([0, 1, 5, 6, 4]) Ellipsis (only one supported) >>> a = DimArray(np.arange(2*3*4*5).reshape(2,3,4,5)) >>> a[0,...,0].shape (3, 4) >>> a[...,0,0].shape (2, 3) """ assert indexing in ("position", "values"), "invalid mode: " + repr(indexing) # SPECIAL CASE: full scale boolean array if obj.ndim > 1 and is_boolean_index(indices, obj.shape): indices = np.where(np.asarray(indices)) newvalues = obj.values[indices] # return a scalar if size is 1 if np.size(newvalues) <= 1: return newvalues # or return a DimArray with axes as tuple newaxisvalues = zip( *[obj.axes[i].values[ii] for i, ii in enumerate(indices)]) newaxisname = ",".join(obj.dims) newaxis = Axis(newaxisvalues, newaxisname) newobj = obj._constructor(newvalues, [newaxis], **obj._metadata) return newobj indices = _fill_ellipsis(indices, obj.ndim) try: indices_numpy = obj.axes.loc(indices, axis=axis, position_index=(indexing == "position"), keepdims=keepdims, tol=tol) except IndexError, msg: raise IndexError(msg)
def diff(obj, axis=-1, scheme="backward", keepaxis=False, n=1): """ Analogous to numpy's diff Calculate the n-th order discrete difference along given axis. The first order difference is given by ``out[n] = a[n+1] - a[n]`` along the given axis, higher order differences are calculated by using `diff` recursively. Parameters ---------- {axis} scheme: str, determines the values of the resulting axis "forward" : diff[i] = x[i+1] - x[i] "backward": diff[i] = x[i] - x[i-1] "centered": diff[i] = x[i+1/2] - x[i-1/2] default is "backward" keepaxis: bool, if True, keep the initial axis by padding with NaNs Only compatible with "forward" or "backward" differences n : int, optional The number of times values are differenced. Returns ------- diff : DimArray The `n` order differences. The shape of the output is the same as `a` except along `axis` where the dimension is smaller by `n`. Examples: --------- Create some example data >>> v = da.DimArray([1,2,3,4], ('time', np.arange(1950,1954)), dtype=float) >>> s = v.cumsum() >>> s dimarray: 4 non-null elements (0 null) dimensions: 'time' 0 / time (4): 1950 to 1953 array([ 1., 3., 6., 10.]) `diff` reduces axis size by one, by default >>> s.diff() dimarray: 3 non-null elements (0 null) dimensions: 'time' 0 / time (3): 1951 to 1953 array([ 2., 3., 4.]) The `keepaxis=` parameter fills array with `nan` where necessary to keep the axis unchanged. Default is backward differencing: `diff[i] = v[i] - v[i-1]`. >>> s.diff(keepaxis=True) dimarray: 3 non-null elements (1 null) dimensions: 'time' 0 / time (4): 1950 to 1953 array([ nan, 2., 3., 4.]) But other schemes are available to control how the new axis is defined: `backward` (default), `forward` and even `centered` >>> s.diff(keepaxis=True, scheme="forward") # diff[i] = v[i+1] - v[i] dimarray: 3 non-null elements (1 null) dimensions: 'time' 0 / time (4): 1950 to 1953 array([ 2., 3., 4., nan]) The `keepaxis=True` option is invalid with the `centered` scheme, since every axis value is modified by definition: >>> s.diff(axis='time', scheme='centered') dimarray: 3 non-null elements (0 null) dimensions: 'time' 0 / time (3): 1950.5 to 1952.5 array([ 2., 3., 4.]) """ # If `axis` is None (operations on the flattened array), just returns the numpy array if axis is None: return np.diff(obj.values, n=n, axis=None) # Deal with `axis` parameter, whether `int`, `str` or `tuple` # possibly grouping dimensions if axis is tuple obj, idx, name = _deal_with_axis(obj, axis) # Recursive call if n > 1 if n > 1: obj = obj.diff(n=n - 1, axis=idx, scheme=scheme, keepaxis=keepaxis) n = 1 # n = 1 assert n == 1, "n must be integer greater or equal to one" # Compute differences result = np.diff(obj.values, axis=idx) # Old axis along diff oldaxis = obj.axes[idx] # forward differencing if scheme == "forward": # keep axis: pad last element with NaNs if keepaxis: result = _append_nans(result, axis=idx) newaxis = oldaxis.copy() # otherwise just shorten the axis else: newaxis = oldaxis[:-1] elif scheme == "backward": # keep axis: pad first element with NaNs if keepaxis: result = _append_nans(result, axis=idx, first=True) newaxis = oldaxis.copy() # otherwise just shorten the axis else: newaxis = oldaxis[1:] elif scheme == "centered": # keep axis: central difference + forward/backward diff at the edges if keepaxis: #indices = range(oldaxis.size) raise ValueError( "keepaxis=True is not compatible with centered differences") #central = obj.values.take(indices[2:], axis=idx) \ # - obj.values.take(indices[:-2], axis=idx) #start = obj.values.take([1], axis=idx) \ # - obj.values.take([0], axis=idx) #end = obj.values.take([-1], axis=idx) \ # - obj.values.take([-2], axis=idx) #result = np.concatenate((start, central, end), axis=idx) #newaxis = oldaxis.copy() else: axisvalues = 0.5 * (oldaxis.values[:-1] + oldaxis.values[1:]) newaxis = Axis(axisvalues, name) else: raise ValueError( "scheme must be one of 'forward', 'backward', 'central', got {}". format(scheme)) newaxes = obj.axes.copy() newaxes[idx] = newaxis newobj = obj._constructor(result, newaxes, **obj._metadata) return newobj