def _apply(self, X, h0=None, mask=None, **kwargs): input_shape = K.get_shape(X) # ====== check mask ====== # if mask is not None and (K.ndim(mask) != K.ndim(X) - 1 or K.get_shape(mask)[-1] != input_shape[1]): raise Exception( 'Mask must has "%d" dimensions and the time dimension ' '(i.e. the second dimension) must equal to "%d"' ', but the given mask has shape "%s".' % (K.ndim(X) - 1, input_shape[1], K.get_shape(mask))) # ====== initialize states ====== # h0 = _check_rnn_hidden_states(h0, self, input_shape, 'h0') # turn off repeat_states if batch_size already included if K.get_shape(h0)[0] != 1: self.repeat_states = False # ====== precompute input ====== # X = K.dot(X, self.W_in) if self.input_mode != 'skip' else X if self.input_mode == 'norm': # normalize all axes except the time dimension bn = BatchNorm(axes=(0, 1), activation=K.linear, gamma_init=self.gamma, beta_init=self.beta, mean_init=self.mean, inv_std_init=self.inv_std) X = bn(X) out = self._rnn(X, h0=h0, mask=mask, **self.get_recurrent_info(kwargs)) for i in out: K.add_shape(i, shape=tuple(input_shape[:-1]) + (self.num_units, )) # only care about the first state return out[0] if len(out) == 1 else out
def _apply(self, x): axes = iter(range(K.ndim(self.alpha))) pattern = [ 'x' if input_axis in self.shared_axes else next(axes) for input_axis in range(K.ndim(x)) ] alpha = K.dimshuffle(self.alpha, pattern) return K.relu(x, alpha)
def _apply(self, x): input_shape = K.get_shape(x) _validate_input_shape(input_shape) other_shape = tuple([ input_shape[i] for i in range(K.ndim(x) - self.outdim + 1, K.ndim(x)) ]) return K.reshape(x, (-1, ) + other_shape)
def _apply(self, x, **kwargs): y = self.ops.apply(x, **kwargs) return_list = True if not isinstance(y, (tuple, list)): return_list = False y = [y] # apply slice and calculate the shape output = [] for i in y: shape = K.get_shape(i) i = i[self.slice] # good to calculate new output shape if isinstance(shape, (tuple, list)): new_shape = [] for dim, idx in zip(shape, self.slice): if isinstance(idx, numbers.Number): dim = -1 elif dim is not None and isinstance(idx, slice): dim = idx.indices(dim) dim = dim[1] - dim[0] # -1 mean delete that dimension because of int index if dim > 0 or dim is None: new_shape.append(dim) # slice is not specified for all dimension if len(new_shape) < K.ndim(i): new_shape += shape[len(self.slice):] # add the new shape K.add_shape(i, new_shape) output.append(i) # return output if return_list: return output return output[0]
def _check_cudnn_hidden_init(s0, shape, nnops, name): nb_layers, batch_size, hidden_size = shape # ====== init s0 ====== # if s0 is None and hasattr(nnops, name): s0 = getattr(nnops, name) elif s0 is not None: if callable(s0) or K.is_trainable_variable(s0) or isinstance( s0, np.ndarray): _ = (nb_layers, 1, hidden_size) if callable(s0) or isinstance(s0, np.ndarray) \ else K.get_shape(s0) s0 = nnops.configuration.create_params(s0, shape=_, name=name, nnops=nnops, roles=INITIAL_STATE) # ====== check s0 shape ====== # init_shape = K.get_shape(s0) if K.ndim(s0) == 2: if K.get_shape(s0)[-1] != hidden_size: raise ValueError( 'init state has %d dimension, but the hidden_size=%d' % (init_shape[-1], hidden_size)) elif init_shape[::2] != (nb_layers, hidden_size): raise ValueError('Require init states of size: %s, but ' 'given state of size: %s' % (shape, init_shape)) # ====== return the right shape ====== # setattr(nnops, name, s0) return s0
def _apply(self, X, h0=None, c0=None, mask=None): batch_size = K.get_shape(X, native=True)[0] is_bidirectional = self.direction_mode == 'bidirectional' input_mode = ('skip' if self.input_mode == 'skip' or self.input_mode == 'norm' else 'linear') # ====== precompute input ====== # # linear or norm input mode if self.input_mode == 'norm': X = K.dot(X, self.W_in) # normalize all axes except the time dimension bn = BatchNorm(axes=(0, 1), activation=K.linear, gamma_init=self.gamma, beta_init=self.beta, mean_init=self.mean, inv_std_init=self.inv_std) X = bn(X) # cudnnRNN doesnt' support multiple inputs shapeX = K.get_shape(X, native=True) ndims = K.ndim(X) if 'rnn' in self.rnn_mode: N = 1 elif self.rnn_mode == 'gru': N = 3 else: N = 4 newshape = [shapeX[i] for i in range(ndims - 1)] + [self.num_units, N] X = K.mean(K.reshape(X, newshape), axis=-1) # ====== hidden state ====== # num_layers = self.num_layers * 2 if is_bidirectional else self.num_layers require_shape = (num_layers, batch_size, self.num_units) h0 = _check_cudnn_hidden_init(h0, require_shape, self, 'h0') c0 = _check_cudnn_hidden_init(c0, require_shape, self, 'c0') # ====== parameters ====== # if self.params_split: parameters = K.concatenate([ K.flatten(i, outdim=1) for i in self.parameters if not has_roles(i, INITIAL_STATE) ]) else: parameters = self.params # ====== return CuDNN RNN ====== # results = K.rnn_dnn(X, hidden_size=self.num_units, rnn_mode=self.rnn_mode, num_layers=self.num_layers, parameters=parameters, h0=h0, c0=c0, input_mode=input_mode, direction_mode=self.direction_mode, dropout=self.dropout, name=self.name) if not self.return_states: results = results[0] # only get the output return results
def _apply(self, x): if K.ndim(x) != self.conv.ndim + 2: raise ValueError( 'Input has %d dimensions, but this Ops require %d-D ' 'tensor.' % (K.ndim(x), self.conv.ndim + 2)) # ====== prepare the deconvolution ====== # stride = self.conv.strides border_mode = self.conv.pad W = self.conv.W dilation = self.conv.dilation # if Dilated Convolution, must transpose the Weights if self.conv.ndim == 2: deconv_func = K.deconv2d elif self.conv.ndim == 3: deconv_func = K.deconv3d else: raise Exception('No support for %d-D input in TransposedConv' % self.conv.ndim) # theano require batch_dims is Constant or None, but tensorflow # require batch_dims is a native TensorVariable conved = deconv_func( x, kernel=W, output_shape=K.get_shape( self.conv._last_input, native=True if K.backend() == 'tensorflow' else False), strides=stride, border_mode=border_mode, filter_dilation=dilation) if hasattr(self, 'b'): if self.conv.untie_biases: conved += K.expand_dims(self.b, 0) else: conved += K.dimshuffle(self.b, ('x', ) * (self.conv.ndim + 1) + (0, )) activated = self.conv.activation(conved) K.add_shape(activated, self.conv.input_shape) return activated
def _apply(self, X, h0=None, c0=None, mask=None, **kwargs): # check input_shape input_shape = K.get_shape(X) # ====== check mask ====== # if mask is not None and (K.ndim(mask) != 2 or K.get_shape(mask)[-1] != input_shape[1]): raise Exception('Mask must be a 2-D matrix and the time dimension ' '(i.e. the second dimension) must equal to "%d"' ', but the given mask has shape "%s".' % (input_shape[1], K.get_shape(mask))) # add broadcastable dimension for mask if mask is not None: mask = K.expand_dims(mask, dim=-1) # ====== initialize states ====== # # hidden states h0 = _check_rnn_hidden_states(h0, self, input_shape, 'h0') c0 = _check_rnn_hidden_states(c0, self, input_shape, 'c0') # turn off repeat_states if batch_size already included if K.get_shape(h0)[0] != 1 and K.get_shape(c0)[0] != 1: self.repeat_states = False # ====== precompute input ====== # # linear or norm input mode if self.input_mode != 'skip': X = K.dot(X, self.W_in) if self.input_mode == 'norm': # normalize all axes except the time dimension bn = BatchNorm(axes=(0, 1), activation=K.linear, gamma_init=self.gamma, beta_init=self.beta, mean_init=self.mean, inv_std_init=self.inv_std) X = bn(X) # skip input elif input_shape[-1] == self.num_units: X = K.repeat(X, 4, axes=-1) # ====== compute recurrent output ====== # out = self._rnn(X, h0=h0, c0=c0, mask=mask, **self.get_recurrent_info(kwargs)) if not self.return_cell_memory: out = out[:-1] for i in out: K.add_shape(i, shape=input_shape[:-1] + (self.num_units, )) # only care about the first state return out[0] if len(out) == 1 else out
def _initialize_param(name, spec, shape): """ return a ndarray or trainable_variable """ ##################################### # 0. initializing function. if callable(spec): spec = spec(shape) ##################################### # 1. Shared variable, just check the shape. if K.is_trainable_variable(spec): spec_shape = K.get_shape(spec) if not isinstance(spec_shape, tuple): spec_shape = K.eval(spec_shape) if shape is None: shape = spec_shape elif tuple(shape) != tuple(spec_shape): raise Exception( 'Given variable has different shape from requirement' ', %s != %s' % (str(spec_shape), str(shape))) ##################################### # 2. expression, we can only check number of dimension. elif K.is_variable(spec): # We cannot check the shape here, Theano expressions (even shared # variables) do not have a fixed compile-time shape. We can check the # dimensionality though. # Note that we cannot assign a name here. We could assign to the # `name` attribute of the variable, but the user may have already # named the variable and we don't want to override this. if shape is not None and K.ndim(spec) != len(shape): raise Exception( "parameter with name=%s has %d dimensions, should be " "%d" % (name, spec.ndim, len(shape))) ##################################### # 3. numpy ndarray, create shared variable wraper for it. elif isinstance(spec, np.ndarray): if shape is not None and spec.shape != shape: raise RuntimeError( "parameter with name=%s has shape %s, should be " "%s" % (name, spec.shape, shape)) ##################################### # 5. Exception. else: raise RuntimeError("cannot initialize parameters: 'spec' is not " "a numpy array, a Theano expression, or a " "callable") return spec, shape
def _apply(self, x): input_shape = K.get_shape(x) is_training = K.is_training() ndim = K.ndim(x) # if is training, normalize input by its own mean and std if not is_training: mean = self.mean inv_std = self.inv_std else: mean = K.mean(x, self.axes) inv_std = K.inv(K.sqrt(K.var(x, self.axes) + self.epsilon)) # set a default update for them: running_mean = ((1 - self.alpha) * self.mean + self.alpha * mean) running_inv_std = ((1 - self.alpha) * self.inv_std + self.alpha * inv_std) # prepare dimshuffle pattern inserting broadcastable axes as needed param_axes = iter(range(ndim - len(self.axes))) pattern = [ 'x' if input_axis in self.axes else next(param_axes) for input_axis in range(ndim) ] # apply dimshuffle pattern to all parameters beta = 0 if not hasattr(self, 'beta') else K.dimshuffle( self.beta, pattern) gamma = 1 if not hasattr(self, 'gamma') else K.dimshuffle( self.gamma, pattern) # normalize normalized = (x - K.dimshuffle(mean, pattern)) * \ (gamma * K.dimshuffle(inv_std, pattern)) + beta # set shape for output K.add_shape(normalized, input_shape) # activated output output = self.activation(normalized) # add updates for final output if is_training: add_updates(output, self.mean, running_mean) add_updates(output, self.inv_std, running_inv_std) return output
def _slice_x(X, idx): """ Slice tensor at its last dimension """ ndim = K.ndim(X) _ = [slice(None, None, None) for i in range(ndim - 1)] return X[_ + [idx]]
def _initialize(self, x): config = NNConfig(ndim=K.ndim(x)) return config