def process(self, name, X): X_new = [] data_idx = axis_normalize(axis=self.data_idx, ndim=len(X), return_tuple=True) for _, x in enumerate(X): # apply the indices if _ in target_data if _ in data_idx: ndim = x.ndim axis = self.axis % ndim # just one index given if isinstance(self.slices, (slice, int)): indices = tuple([slice(None) if i != axis else self.slices for i in range(ndim)]) x = x[indices] # multiple indices are given else: indices = [] for idx in self.slices: indices.append(tuple([slice(None) if i != axis else idx for i in range(ndim)])) x = np.concatenate([x[i] for i in indices], axis=self.axis) # check if array still contigous x = np.ascontiguousarray(x) X_new.append(x) return name, X_new
def process(self, name, X): # ====== not enough data points for sequencing ====== # if self.end == 'cut' and \ any(x.shape[0] < self.frame_length for x in X): return None if self.end == 'ignore' and \ any(x.shape[0] > self.frame_length for x in X): return None end = self.end if end == 'ignore': end = 'pad' # ====== preprocessing data-idx, label-idx ====== # data_idx = axis_normalize(axis=self.data_idx, ndim=len(X), return_tuple=True) # ====== segments X ====== # X_new = [] for idx, x in enumerate(X): ## for data if idx in data_idx: if end == 'mix': x = segment_axis(a=x, frame_length=self.frame_length, step_length=self.step_length, axis=0, end='cut' if x.shape[0] >= self.frame_length else 'pad', pad_value=self.pad_value, pad_mode=self.pad_mode) else: x = segment_axis(a=x, frame_length=self.frame_length, step_length=self.step_length, axis=0, end=end, pad_value=self.pad_value, pad_mode=self.pad_mode) ## for all X_new.append(x) return name, X_new
def process(self, name, X): data_idx = axis_normalize(axis=self.data_idx, ndim=len(X), return_tuple=True) X = [np.expand_dims(x, axis=self.axis) if i in data_idx else x for i, x in enumerate(X)] return name, X
def shape_transform(self, shapes): data_idx = axis_normalize(axis=self.data_idx, ndim=len(shapes), return_tuple=True) shapes = [(tuple(shp[:-1] + (shp[-1] // self.size - 2,)), ids) if i in data_idx else (shp, ids) for i, (shp, ids) in enumerate(shapes)] return shapes
def process(self, name, X): data_idx = axis_normalize(axis=self.data_idx, ndim=len(X), return_tuple=True) if len(X) > 1 and len(data_idx) > 1: X_old = [x for i, x in enumerate(X) if i not in data_idx] X_new = [x for i, x in enumerate(X) if i in data_idx] X = [np.hstack(X_new)] + X_old return name, X
def process(self, name, X): data_idx = axis_normalize(axis=self.data_idx, ndim=len(X), return_tuple=True) label_idx = axis_normalize(axis=self.label_idx, ndim=len(X), return_tuple=True) index = self._get_index(name) # ====== indexing ====== # X_new = [] for i, x in enumerate(X): if i in data_idx: x = x[index] # if NOT label, normalization if self.mvn and i not in label_idx: x = _mvn(x, varnorm=self.varnorm) X_new.append(x) return name, X_new
def shape_transform(self, shapes): data_idx = axis_normalize(axis=self.data_idx, ndim=len(shapes), return_tuple=True) # ====== update the shape and indices ====== # new_shapes = [] for idx, (shp, ids) in enumerate(shapes): if idx in data_idx: n_samples = shp[0] shp = (n_samples, self.length) + shp[1:] new_shapes.append((shp, ids)) # ====== do the shape infer ====== # return new_shapes
def shape_transform(self, shapes): if self.delta > 0: data_idx = axis_normalize(axis=self.data_idx, ndim=len(shapes), return_tuple=True) n = (self.delta + 1) if self.keep_original else self.delta axis = self.axis shapes = [(shp, ids) if i not in data_idx else (shp[:axis] + (shp[axis] * n,) + shp[axis:], ids) for i, (shp, ids) in enumerate(shapes)] return shapes
def process(self, name, X): if self.delta > 0: data_idx = axis_normalize(axis=self.data_idx, ndim=len(X), return_tuple=True) X = [x if i not in data_idx else np.concatenate( ([x] if self.keep_original else []) + delta(x, order=self.delta, axis=self.axis), axis=self.axis) for i, x in enumerate(X)] return name, X
def process(self, name, X): # update the whiten data_idx = axis_normalize(axis=self.data_idx, ndim=len(X), return_tuple=True) pca_whiten = self._pca.whiten self._pca.whiten = self.whiten X = [self._pca.transform(x, n_components=self.nb_components) if i in data_idx else x for i, x in enumerate(X)] # reset the white value self._pca.whiten = pca_whiten return name, X
def __init__(self, data, axis=-1): data = as_tuple(data) if len(data) < 2: raise ValueError("2 or more Data must be given to `DataConcat`") if axis == 0: raise ValueError("Cannot concatenate axis=0") if len(set(d.ndim for d in data)) > 2: raise ValueError("All Data must have the same number of dimension (i.e. `ndim`)") if len(set(d.shape[0] for d in data)) > 2: raise ValueError("All Data must have the same length (i.e. first dimension)") super(DataConcat, self).__init__(data, read_only=True) self._is_data_list = False self._axis = axis_normalize(int(axis), ndim=data[0].ndim)
def process(self, name, X): data_idx = axis_normalize(axis=self.data_idx, ndim=len(X), return_tuple=True) # ====== scaling features to [0, 1] ====== # X_new = [] for i, x in enumerate(X): if i in data_idx: x = x.astype('float32') min_ = x.min(); max_ = x.max() x = (x - min_) / (max_ - min_) X_new.append(x) return name, X_new
def shape_transform(self, shapes): data_idx = axis_normalize(axis=self.data_idx, ndim=len(shapes), return_tuple=True) # ====== update the indices ====== # new_shapes = [] for idx, (shp, ids) in enumerate(shapes): if idx in data_idx: # transoform the indices n = 0; ids_new = [] for name, n_samples in ids: ## MODE = cut if self.end == 'cut': if n_samples < self.frame_length: n_samples = 0 else: n_samples = 1 + np.floor( (n_samples - self.frame_length) / self.step_length) ## MODE = ignore and pad elif self.end == 'ignore': if n_samples > self.frame_length: n_samples = 0 else: n_samples = 1 ## MODE = mix elif self.end == 'mix': if n_samples < self.frame_length: n_samples = 1 else: n_samples = 1 + np.floor( (n_samples - self.frame_length) / self.step_length) ## MODE = pad or wrap else: if n_samples < self.frame_length: n_samples = 1 else: n_samples = 1 + np.ceil( (n_samples - self.frame_length) / self.step_length) # make sure everything is integer n_samples = int(n_samples) if n_samples > 0: ids_new.append((name, n_samples)) n += n_samples # transform the shape for data if idx in data_idx: feat_shape = (shp[-1],) if len(shp) >= 2 else () mid_shape = tuple(shp[1:-1]) shp = (n, self.frame_length,) + mid_shape + feat_shape # end new_shapes.append((shp, ids)) return new_shapes
def process(self, name, X): data_idx = axis_normalize(axis=self.data_idx, ndim=len(X), return_tuple=True) X_pooled = [] for i, x in enumerate(X): if i in data_idx: shape = x.shape x = x[:, 2:-2] x = x.reshape(shape[0], -1, 2) x = self.pool_func(x, axis=-1) x = x.reshape(shape[0], -1) X_pooled.append(x) return name, X_pooled
def shape_transform(self, shapes): data_idx = axis_normalize(axis=self.data_idx, ndim=len(shapes), return_tuple=True) new_shapes = [] for idx, (shp, ids) in enumerate(shapes): if idx in data_idx: shp = list(shp) axis = self.axis if self.axis >= 0 else \ (len(shp) + 1 - self.axis) shp.insert(axis, 1) shp = tuple(shp) new_shapes.append((shp, ids)) return new_shapes
def process(self, name, X): data_idx = axis_normalize(axis=self.data_idx, ndim=len(X), return_tuple=True) # ====== stacking ====== # X_new = [] for idx, x in enumerate(X): # stack the data if idx in data_idx: if x.ndim == 1: x = np.expand_dims(x, axis=-1) feat_shape = x.shape[1:] x = stack_frames(x, frame_length=self.length, step_length=1, keep_length=True, make_contigous=True) x = np.reshape(x, newshape=(-1, self.length) + feat_shape) X_new.append(x) return name, X_new
def _apply(self, X, mask=None): def _step_fn(outs, ins): return [f(ins) for f in self._apply_ops] # ====== need to apply the ops to know initializer information ====== # ndim = X.shape.ndims axis = axis_normalize(self.time_axis, ndim=ndim) with tf.device("/cpu:0"): sample = tf.zeros_like(X) sample = sample[[slice(None, None) if i != axis else 0 for i in range(ndim)]] initializer = [tf.zeros_like(f(sample)) for f in self._apply_ops] # ====== scan ====== # outputs = K.scan_tensors(_step_fn, sequences=X, mask=mask, initializer=initializer, axis=axis, backward=self.backward, reverse=self.reverse, reshape_outputs=True) return outputs[0] if len(self._apply_ops) == 1 else outputs
def shape_transform(self, shapes): data_idx = axis_normalize(axis=self.data_idx, ndim=len(shapes), return_tuple=True) new_shapes = [] # ====== check if first dimension is sliced ====== # for idx, (shp, ids) in enumerate(shapes): if idx in data_idx: if self.axis == 0: ids = [(name, self._from_indices(length)) for name, length in ids] n = sum(i[1] for i in ids) shp = (n,) + shp[1:] else: axis = self.axis % len(shp) # axis in case if negative # int indices, just 1 n = self._from_indices(shp[axis]) shp = tuple([j if i != axis else n for i, j in enumerate(shp)]) new_shapes.append((shp, ids)) return new_shapes
def shape_transform(self, shapes): data_idx = axis_normalize(axis=self.data_idx, ndim=len(shapes), return_tuple=True) shapes_new = [] for i, (shp, ids) in enumerate(shapes): if i in data_idx: ids_new = [] n_total = 0 # ====== update the indices ====== # for name, _ in ids: # this take a lot of time, but # we only calculate new shapes once. index = self._get_index(name) n = np.sum(index) n_total += n ids_new.append((name, n)) # ====== update the shape ====== # ids = ids_new shp = (n_total,) + shp[1:] shapes_new.append((shp, ids)) return shapes_new
def shape_transform(self, shapes): data_idx = axis_normalize(axis=self.data_idx, ndim=len(shapes), return_tuple=True) # just 1 shape, nothing to merge if len(shapes) <= 1 or len(data_idx) <= 1: return shapes # merge old_shapes = [] new_shapes = [] for idx, (shp, ids) in enumerate(shapes): if idx in data_idx: new_shapes.append((shp, ids)) else: old_shapes.append((shp, ids)) # ====== horizontal stacking ====== # shape, ids = new_shapes[0] new_shapes = ( shape[:-1] + (sum(shp[-1] for shp, _ in new_shapes),), ids ) return [new_shapes] + old_shapes
def _apply(self, X, mask=None): def _step_fn(outs, ins): return [f(ins) for f in self._apply_ops] # ====== need to apply the ops to know initializer information ====== # ndim = X.shape.ndims axis = axis_normalize(self.time_axis, ndim=ndim) with tf.device("/cpu:0"): sample = tf.zeros_like(X) sample = sample[[ slice(None, None) if i != axis else 0 for i in range(ndim) ]] initializer = [tf.zeros_like(f(sample)) for f in self._apply_ops] # ====== scan ====== # outputs = K.scan_tensors(_step_fn, sequences=X, mask=mask, initializer=initializer, axis=axis, backward=self.backward, reverse=self.reverse, reshape_outputs=True) return outputs[0] if len(self._apply_ops) == 1 else outputs
def process(self, name, X): X_normlized = [] data_idx = axis_normalize(axis=self.data_idx, ndim=len(X), return_tuple=True) for i, x in enumerate(X): if i in data_idx: x = x.astype('float32') # ====== global normalization ====== # if self.mean is not None and self.std is not None: x = (x - self.mean) / (self.std + 1e-20) # ====== perform local normalization ====== # if 'normal' in self.local_normalize or 'true' in self.local_normalize: x = ((x - x.mean(self.axis, keepdims=True)) / (x.std(self.axis, keepdims=True) + 1e-20)) elif 'sigmoid' in self.local_normalize: min_, max_ = np.min(x), np.max(x) x = (x - min_) / (max_ - min_) elif 'tanh' in self.local_normalize: min_, max_ = np.min(x), np.max(x) x = 2 * (x - min_) / (max_ - min_) - 1 elif 'mean' in self.local_normalize: x -= x.mean(0) X_normlized.append(x) return name, X_normlized
def _get_data_label_idx(data_idx, label_idx, ndim): data_idx = axis_normalize(axis=data_idx, ndim=ndim, return_tuple=True) label_idx = axis_normalize(axis=label_idx, ndim=ndim, return_tuple=True) # exclude all index in label_idx data_idx = [i for i in data_idx if i not in label_idx] return data_idx, label_idx