def pickling_variable(v, target=None): """ This function only apply for trainable parameters Warning ------- This pickling method won't save "auxiliary_variables" and "updates" tag of variables """ # load variable if isinstance(v, str): try: if os.path.exists(v): v = open(v, 'r') except: pass name, value, dtype, roles = cPickle.loads(v) v = variable(value, dtype=dtype, name=name, target=target) for i in roles: add_role(v, i) return v elif is_trainable_variable(v): name = v.name if ':' not in v.name else v.name.split(':')[0] value = get_value(v) dtype = v.dtype.as_numpy_dtype if hasattr( v.dtype, 'as_numpy_dtype') else v.dtype # ====== shape and roles ====== # roles = getattr(v.tag, 'roles', []) return cPickle.dumps([name, value, dtype, roles], protocol=cPickle.HIGHEST_PROTOCOL) else: raise Exception('Variable must be in string form or trainable variable' ' (i.e. SharedVariable in theano)')
def get_mean_logsigma(self, x): b_mean = 0. if not hasattr(self, 'b_mean') else self.b_mean b_logsigma = 0. if not hasattr(self, 'b_logsigma') else self.b_logsigma mean = self.activation(K.dot(x, self.W_mean) + b_mean) logsigma = self.activation(K.dot(x, self.W_logsigma) + b_logsigma) mean.name = 'variational_mean' logsigma.name = 'variational_logsigma' add_role(mean, VARIATIONAL_MEAN) add_role(logsigma, VARIATIONAL_LOGSIGMA) return mean, logsigma
def rnn(input_dim, hidden_dim, W_init=glorot_uniform, b_init=constant(0.), bidirectional=False, one_vector=False, return_variable=True, name=None): """ Fast initalize all Standard RNN weights Parameters ---------- one_vector: bool if True, all the weights are flatten and concatenated into 1 big vector return_variable: bool if False, only return the numpy array bidirectional: bool if True, return parameters for both forward and backward RNN Return ------ [W_i, b_wi, R_h, b_wh] """ if name is None: name = uuid() def init(): W_i = W_init((input_dim, hidden_dim)) b_wi = b_init((hidden_dim)) R_h = W_init((hidden_dim, hidden_dim)) b_wh = b_init((hidden_dim)) return [W_i, b_wi, R_h, b_wh] params = init() + init() if bidirectional else init() roles = [WEIGHT, BIAS] if one_vector: params = [np.concatenate([p.flatten() for p in params])] roles = [PARAMETER] # names if one_vector: names = [name + '_rnn'] else: names = ["_W_i", "_b_wi", "_R_h", "_b_wh"] if bidirectional: names = [i + '_fw' for i in names] + [i + '_bw' for i in names] names = [name + i for i in names] # create variable or not if return_variable: params = [variable(p, name=n) for p, n in zip(params, names)] for i, p in enumerate(params): add_role(p, roles[i % 2]) return params if len(params) > 1 else params[0]
def set_inputs(self, *inputs): self._input_info = [] self._inputs = [] for i in inputs: if not K.is_placeholder(i): raise ValueError('Only accept input which is placeholder.') name, dtype, shape = i.name, i.dtype, K.get_shape(i) self._input_info.append([name, dtype, shape]) self._inputs.append(i) # ====== Try to check if the inputs match the Ops ====== # try: # call this to initialize the parameters and get # estimated output shape (we assume training and deploying # mode get the same shape). for i in self._inputs: add_role(i, TRAINING) self._y_train = self._seq_ops(*self._inputs) for i in self._inputs: add_role(i, DEPLOYING) self._y_pred = self._seq_ops(*self._inputs) # create default output if len(self._output_info) == 0: shape = K.get_shape(self._y_train) self._outputs = [ K.placeholder(shape=shape, dtype=self._y_train.dtype, name='output1') ] self._output_info = [('output1', self._y_train.dtype, shape)] # reset all functions for i, j in self._functions.items(): del self._functions[i] del j self._functions = {} except Exception, e: warnings.warn('Inputs do not match the Ops requirements, ' 'error: ' + str(e)) self._input_info = [] self._inputs = []
def test_computational_graph2(self): np.random.seed(1208) X = K.variable(np.zeros((8, 12)), name='X') Y = K.variable(np.random.rand(12, 8), name='Y') Z = K.placeholder(shape=(8, 8), name='Z') a = K.dot(X, Y) add_role(a, AUXILIARY) add_updates(a, X, X + 12) a = a + Z g1 = K.ComputationGraph(a) self.assertEqual(len(g1.trainable_variables), 2) self.assertEqual(len(g1.placeholders), 1) self.assertEqual(len(g1.updates), 1) self.assertEqual(len(g1.auxiliary_variables), 1) f = K.function(Z, [a] + g1.auxiliary_variables) output = f(np.random.rand(8, 8)) self.assertEqual(repr(np.sum(output[0]))[:5], "32.20") self.assertEqual(np.sum(output[1]), 0) self.assertEqual(np.unique(K.eval(X)).tolist(), [12.])
def lstm(input_dim, hidden_dim, W_init=glorot_uniform, b_init=constant(0.), bidirectional=False, one_vector=False, return_variable=True, name=None): """ Fast initalize all Standard LSTM weights (without peephole connection) Parameters ---------- one_vector: bool if True, all the weights are flatten and concatenated into 1 big vector return_variable: bool if False, only return the numpy array bidirectional: bool if True, return parameters for both forward and backward RNN Return ------ [W_i, b_wi, W_f, b_wf, W_c, b_wc, W_o, b_wo, R_i, b_ri, R_f, b_rf, R_c, b_rc, R_o, b_ro] """ if name is None: name = uuid() def init(): # input to hidden W_i = W_init((input_dim, hidden_dim)) b_wi = b_init((hidden_dim)) W_f = W_init((input_dim, hidden_dim)) b_wf = b_init((hidden_dim)) W_c = W_init((input_dim, hidden_dim)) b_wc = b_init((hidden_dim)) W_o = W_init((input_dim, hidden_dim)) b_wo = b_init((hidden_dim)) # hidden to hidden R_i = W_init((hidden_dim, hidden_dim)) b_ri = b_init((hidden_dim)) R_f = W_init((hidden_dim, hidden_dim)) b_rf = b_init((hidden_dim)) R_c = W_init((hidden_dim, hidden_dim)) b_rc = b_init((hidden_dim)) R_o = W_init((hidden_dim, hidden_dim)) b_ro = b_init((hidden_dim)) return [ W_i, b_wi, W_f, b_wf, W_c, b_wc, W_o, b_wo, R_i, b_ri, R_f, b_rf, R_c, b_rc, R_o, b_ro ] params = init() + init() if bidirectional else init() roles = [WEIGHT, BIAS] if one_vector: params = [np.concatenate([p.flatten() for p in params])] roles = [PARAMETER] # names if one_vector: names = [name + '_lstm'] else: names = [ "_W_i", "_b_wi", "_W_f", "_b_wf", "_W_c", "_b_wc", "_W_o", "_b_wo", "_R_i", "_b_ri", "_R_f", "_b_rf", "_R_c", "_b_rc", "_R_o", "_b_ro" ] if bidirectional: names = [i + '_fw' for i in names] + [i + '_bw' for i in names] names = [name + i for i in names] # create variable or not if return_variable: params = [variable(p, name=n) for p, n in zip(params, names)] for i, p in enumerate(params): add_role(p, roles[i % 2]) return params if len(params) > 1 else params[0]
def create_params(self, spec, shape, name, nnops, roles=[], nb_params=1): """ Parameters ---------- spec: variable, numpy.ndarray, function specification for initializing the weights shape: tuple, list expected shape for given variable name: str name for the variable nnops: NNOps parent operator of this parameters roles: odin.basic.VariableRole categories of this variable nb_params: int number of parameters that horizontally stacked into given `shape (e.g. nb_params=2, create 2 parameters with given `shape and horizontally stack them into 1 parameters) * do NOT support when `spec` is variable. """ if not isinstance(roles, (tuple, list)): roles = [roles] if not isinstance(nnops, NNOps): raise Exception('nnops must be instance of odin.nnet.base.NNOps') shape = tuple(shape) # convert to tuple if needed if any(d <= 0 for d in shape): raise ValueError( ("Cannot create param with a non-positive shape dimension. " "Tried to create param with shape=%r, name=%r") % (shape, name)) # ====== create parameters ====== # spec = as_tuple(spec, nb_params) spec = [_initialize_param(name, s, shape) for s in spec] # check shape returned shape = list(set([i[-1] for i in spec])) if len(shape) > 1: raise Exception( 'shape are inconsitent among all given "spec", the ' 'created shape is: %s' % str(shape)) shape = shape[0] # check spec returned spec = [i[0] for i in spec] if isinstance(spec[0], np.ndarray): with K.variable_scope(nnops.name): spec = np.concatenate(spec, axis=-1) shape = spec.shape spec = K.variable(spec, name=name) elif K.is_trainable_variable(spec[0]): if nb_params > 1: with K.variable_scope(nnops.name): spec = np.concatenate([K.get_value(i) for i in spec], axis=-1) shape = spec.shape spec = K.variable(spec, name=name) else: spec = spec[0] elif K.is_variable(spec[0]): shape = (shape[0] * nb_params,) if len(shape) == 1 \ else shape[:-1] + (shape[-1] * nb_params,) spec = K.concatenate(spec, axis=-1) # ====== assign annotations ====== # # only add role for trainable variables for i in roles: if isinstance(i, VariableRole) and K.is_trainable_variable(spec): add_role(spec, i) # return actual variable or expression # override other parameters with same name self._variables[name] = spec # set parameter attribute for NNOps setattr(nnops, name, spec) return spec
def randrectify(x, lower=0.3, upper=0.8, shared_axes='auto'): """ This function is adpated from Lasagne Original work Copyright (c) 2014-2015 lasagne contributors All rights reserved. LICENSE: https://github.com/Lasagne/Lasagne/blob/master/LICENSE Applies a randomized leaky rectify activation to x. The randomized leaky rectifier was first proposed and used in the Kaggle NDSB Competition, and later evaluated in [1]_. Compared to the standard leaky rectifier :func:`leaky_rectify`, it has a randomly sampled slope for negative input during training, and a fixed slope during evaluation. Equation for the randomized rectifier linear unit during training: :math:`\\varphi(x) = \\max((\\sim U(lower, upper)) \\cdot x, x)` During evaluation, the factor is fixed to the arithmetic mean of `lower` and `upper`. Parameters ---------- lower : Theano shared variable, expression, or constant The lower bound for the randomly chosen slopes. upper : Theano shared variable, expression, or constant The upper bound for the randomly chosen slopes. shared_axes : 'auto', 'all', int or tuple of int The axes along which the random slopes of the rectifier units are going to be shared. If ``'auto'`` (the default), share over all axes except for the second - this will share the random slope over the minibatch dimension for dense layers, and additionally over all spatial dimensions for convolutional layers. If ``'all'``, share over all axes, thus using a single random slope. References ---------- .. [1] Bing Xu, Naiyan Wang et al. (2015): Empirical Evaluation of Rectified Activations in Convolutional Network, http://arxiv.org/abs/1505.00853 """ input_shape = get_shape(x) # ====== check lower and upper ====== # if is_trainable_variable(lower): add_role(lower, ACTIVATION_PARAMETER) lower.name = 'lower' if is_trainable_variable(upper): add_role(upper, ACTIVATION_PARAMETER) upper.name = 'upper' if not is_variable(lower > upper) and lower > upper: raise ValueError("Upper bound for Randomized Rectifier needs " "to be higher than lower bound.") # ====== check shared_axes ====== # if shared_axes == 'auto': shared_axes = (0, ) + tuple(range(2, len(input_shape))) elif shared_axes == 'all': shared_axes = tuple(range(len(input_shape))) elif isinstance(shared_axes, int): shared_axes = (shared_axes, ) else: shared_axes = shared_axes # ====== main logic ====== # if not is_training() or upper == lower: x = relu(x, (upper + lower) / 2.0) else: # Training mode shape = list(input_shape) if builtins.any(s is None for s in shape): shape = list(x.shape) for ax in shared_axes: shape[ax] = 1 rnd = random_uniform(tuple(shape), low=lower, high=upper, dtype=FLOATX) rnd = addbroadcast(rnd, *shared_axes) x = relu(x, rnd) add_shape(x, input_shape) return x