def set_params(self): dim = self.input_dim hdim = self.hidden_dim self.input = T.matrix() self.W_i = self.init((dim, hdim)) self.U_i = self.inner_init((hdim, hdim)) self.b_i = shared_zeros((hdim)) self.W_f = self.init((dim, hdim)) self.U_f = self.inner_init((hdim, hdim)) self.b_f = self.forget_bias_init((hdim)) self.W_c = self.init((dim, hdim)) self.U_c = self.inner_init((hdim, hdim)) self.b_c = shared_zeros((hdim)) self.W_o = self.init((dim, hdim)) self.U_o = self.inner_init((hdim, hdim)) self.b_o = shared_zeros((hdim)) self.W_x = self.init((hdim, dim)) self.b_x = shared_zeros((dim)) self.params = [ self.W_i, self.U_i, self.b_i, self.W_c, self.U_c, self.b_c, self.W_f, self.U_f, self.b_f, self.W_o, self.U_o, self.b_o, self.W_x, self.b_x ]
def build(self): self.readout.build() self.init_h = shared_zeros((self.state_dim, )) # here is difference on the sizes input_dim = self.input_shape[2] + self.readout.output_shape[1] # copy-paste from keras.recurrent.GRU self.W_z = self.init((input_dim, self.state_dim)) self.U_z = self.inner_init((self.state_dim, self.state_dim)) self.b_z = shared_zeros((self.state_dim)) self.W_r = self.init((input_dim, self.state_dim)) self.U_r = self.inner_init((self.state_dim, self.state_dim)) self.b_r = shared_zeros((self.state_dim)) self.W_h = self.init((input_dim, self.state_dim)) self.U_h = self.inner_init((self.state_dim, self.state_dim)) self.b_h = shared_zeros((self.state_dim)) self.params = [ self.init_h, self.W_z, self.U_z, self.b_z, self.W_r, self.U_r, self.b_r, self.W_h, self.U_h, self.b_h, ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def __init__(self, input_dim, hidden_dim, init='glorot_uniform', activation='linear', weights=None, corruption_level=0.3): self.init = initializations.get(init) self.activation = activations.get(activation) self.input_dim = input_dim self.hidden_dim = hidden_dim self.output_dim = input_dim self.input = T.matrix() self.W = self.init((self.input_dim, self.hidden_dim)) self.b = shared_zeros((self.hidden_dim)) self.b_prime = shared_zeros((self.input_dim)) numpy_rng = np.random.RandomState(123) self.theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.params = [self.W, self.b, self.b_prime] self.corruption_level = corruption_level if weights is not None: self.set_weights(weights)
def _build(self): nw = len( self.initial_weights) if self.initial_weights is not None else 0 if self.initial_state is not None: self.h = sharedX(self.initial_state[0]) self.c = sharedX(self.initial_state[1]) del self.initial_state elif self.batch_size is not None: self.h = shared_zeros((self.batch_size, self.hidden_dim)) self.c = shared_zeros((self.batch_size, self.hidden_dim)) elif self.initial_weights is not None: if nw == len(self.params) + 2: self.h = sharedX(self.initial_weights[-1]) self.c = sharedX(self.initial_weights[-2]) nw -= 2 else: raise Exception("Hidden state not provided in weights") else: raise Exception( "One of the following arguments must be provided for stateful RNNs: hidden_state, batch_size, weights" ) self.state = [self.h, self.c] if self.initial_weights is not None: self.set_weights(self.initial_weights[:nw]) del self.initial_weights
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() self.W_sum = self.init((input_dim, self.output_dim)) self.U_sum = self.inner_init((self.output_dim, self.output_dim)) self.b_sum = shared_zeros((self.output_dim)) self.W_i = self.init((input_dim, self.output_dim)) self.U_i = self.inner_init((self.output_dim, self.output_dim)) self.b_i = shared_zeros((self.output_dim)) self.W_f = self.init((input_dim, self.output_dim)) self.U_f = self.inner_init((self.output_dim, self.output_dim)) self.b_f = self.forget_bias_init((self.output_dim)) self.W_c = self.init((input_dim, self.output_dim)) self.U_c = self.inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim)) self.W_o = self.init((input_dim, self.output_dim)) self.U_o = self.inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim)) self.params = [ self.W_sum, self.U_sum, self.b_sum, self.W_i, self.U_i, self.b_i, self.W_c, self.U_c, self.b_c, self.W_f, self.U_f, self.b_f, self.W_o, self.U_o, self.b_o, ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def __init__(self, input_dim, output_dim=128, init= 'uniform', inner_init='glorot_normal', activation='softplus', inner_activation='hard_sigmoid', gate_activation= 'tanh', weights=None, truncate_gradient=-1, return_sequences=False): super(SGU, self).__init__() self.input_dim = input_dim self.output_dim = output_dim self.truncate_gradient = truncate_gradient self.return_sequences = return_sequences self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) self.gate_activation = activations.get(gate_activation) self.input = TT.tensor3() self.W = self.init((self.input_dim, self.output_dim)) self.U = self.inner_init((self.output_dim, self.output_dim)) self.b = shared_zeros((self.output_dim)) self.W_gate = self.init((self.input_dim, self.output_dim)) self.b_gate = shared_zeros((self.output_dim)) self.U_gate = self.inner_init((self.output_dim, self.output_dim)) self.params = [ self.W, self.U, self.b, self.W_gate, self.b_gate, self.U_gate ] if weights is not None: self.set_weights(weights)
def __init__(self, input_dim, states_dim, causes_dim, init='glorot_uniform', inner_init='orthogonal', activation='sigmoid', gate_activation='sigmoid', weights=None, return_mode='states', truncate_gradient=-1, return_sequences=False): super(FDPCN, self).__init__() self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.input_dim = input_dim self.states_dim = states_dim self.causes_dim = causes_dim self.truncate_gradient = truncate_gradient self.activation = activations.get(activation) self.gate_activation = activations.get(gate_activation) self.return_sequences = return_sequences self.return_mode = return_mode self.input = T.tensor3() self.I2S = self.init((self.input_dim, self.states_dim)) self.S2S = self.inner_init((self.states_dim, self.states_dim)) self.Sb = shared_zeros((self.states_dim)) self.S2C = self.init((self.states_dim, self.causes_dim)) self.C2C = self.inner_init((self.causes_dim, self.causes_dim)) self.Cb = shared_zeros((self.causes_dim)) self.CbS = shared_zeros((self.states_dim)) self.C2S = self.init((self.causes_dim, self.states_dim)) self.params = [self.I2S, self.S2S, self.Sb, self.C2S, self.C2C, self.Cb, self.S2C, self.CbS] if weights is not None: self.set_weights(weights)
def build(self): self.readout.build() self.init_h = shared_zeros((self.state_dim,)) # here is difference on the sizes input_dim = self.input_shape[2] + self.readout.output_shape[1] # copy-paste from keras.recurrent.GRU self.W_z = self.init((input_dim, self.state_dim)) self.U_z = self.inner_init((self.state_dim, self.state_dim)) self.b_z = shared_zeros((self.state_dim)) self.W_r = self.init((input_dim, self.state_dim)) self.U_r = self.inner_init((self.state_dim, self.state_dim)) self.b_r = shared_zeros((self.state_dim)) self.W_h = self.init((input_dim, self.state_dim)) self.U_h = self.inner_init((self.state_dim, self.state_dim)) self.b_h = shared_zeros((self.state_dim)) self.trainable_weights = [ self.init_h, self.W_z, self.U_z, self.b_z, self.W_r, self.U_r, self.b_r, self.W_h, self.U_h, self.b_h, ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() self.W_g = self.init((input_dim, self.output_dim)) # self.U_g = sharedX(np.random.uniform(low=-scale, high=scale, size=(self.output_dim, 6 , self.output_dim))) self.U_g = self.inner_init((self.output_dim, 6, self.output_dim)) self.b_g = shared_zeros((self.output_dim)) self.W_c = self.init((input_dim, self.output_dim)) self.U_c = self.inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim)) self.W_o = self.init((input_dim, self.output_dim)) self.U_o = self.inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim)) self.EPS = 1e-10 scalar_init = 1 scale = 0.01 # self.k_parameters = shared_ones((11,)) self.k_parameters = sharedX( np.random.uniform(low=scalar_init - scale, high=scalar_init + scale, size=(11, ))) # self.sigma_se = shared_scalar(scalar_init) # self.sigma_per = shared_scalar(scalar_init) # self.sigma_b_lin = shared_scalar(scalar_init) # self.sigma_v_lin = shared_scalar(scalar_init) # self.sigma_rq = shared_scalar(scalar_init) # self.l_se = shared_scalar(scalar_init) # self.l_per = shared_scalar(scalar_init) # self.l_lin = shared_scalar(scalar_init) # self.l_rq = shared_scalar(scalar_init) # self.alpha_rq = shared_scalar(scalar_init) # self.p_per = shared_scalar(scalar_init) self.params = [ self.k_parameters, # self.sigma_se, self.sigma_per, self.sigma_b_lin, self.sigma_v_lin,self.sigma_rq, # self.l_se, self.l_per, self.l_lin, self.l_rq, # self.alpha_rq, self.p_per, self.W_g, self.U_g, self.b_g, self.W_c, self.U_c, self.b_c, self.W_o, self.U_o, self.b_o, ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def __init__(self, input_dim, output_dim=128, train_init_cell=True, train_init_h=True, init='glorot_uniform', inner_init='orthogonal', forget_bias_init='one', input_activation='tanh', gate_activation='hard_sigmoid', output_activation='tanh', weights=None, truncate_gradient=-1, return_sequences=False): super(LSTMLayer, self).__init__() self.input_dim = input_dim self.output_dim = output_dim self.truncate_gradient = truncate_gradient self.return_sequences = return_sequences self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.forget_bias_init = initializations.get(forget_bias_init) self.input_activation = activations.get(input_activation) self.gate_activation = activations.get(gate_activation) self.output_activation = activations.get(output_activation) self.input = T.tensor3() self.time_range = None W_z = self.init((self.input_dim, self.output_dim)).get_value(borrow=True) R_z = self.inner_init((self.output_dim, self.output_dim)).get_value(borrow=True) # self.b_z = shared_zeros(self.output_dim) W_i = self.init((self.input_dim, self.output_dim)).get_value(borrow=True) R_i = self.inner_init((self.output_dim, self.output_dim)).get_value(borrow=True) # self.b_i = shared_zeros(self.output_dim) W_f = self.init((self.input_dim, self.output_dim)).get_value(borrow=True) R_f = self.inner_init((self.output_dim, self.output_dim)).get_value(borrow=True) # self.b_f = self.forget_bias_init(self.output_dim) W_o = self.init((self.input_dim, self.output_dim)).get_value(borrow=True) R_o = self.inner_init((self.output_dim, self.output_dim)).get_value(borrow=True) # self.b_o = shared_zeros(self.output_dim) self.h_m1 = shared_zeros(shape=(1, self.output_dim), name='h0') self.c_m1 = shared_zeros(shape=(1, self.output_dim), name='c0') W = np.vstack((W_z[np.newaxis, :, :], W_i[np.newaxis, :, :], W_f[np.newaxis, :, :], W_o[np.newaxis, :, :])) # shape = (4, input_dim, output_dim) R = np.vstack((R_z[np.newaxis, :, :], R_i[np.newaxis, :, :], R_f[np.newaxis, :, :], R_o[np.newaxis, :, :])) # shape = (4, output_dim, output_dim) self.W = theano.shared(W, name='Input to hidden weights (zifo)', borrow=True) self.R = theano.shared(R, name='Recurrent weights (zifo)', borrow=True) self.b = theano.shared(np.zeros(shape=(4, self.output_dim), dtype=theano.config.floatX), name='bias', borrow=True) self.params = [self.W, self.R] if train_init_cell: self.params.append(self.c_m1) if train_init_h: self.params.append(self.h_m1) if weights is not None: self.set_weights(weights)
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() # self.n_param = 0 # forget gate params self.W_xf = self.init((input_dim, self.output_dim)) # self.U_hf = self.inner_init((input_dim, self.output_dim)) self.b_f = shared_zeros((self.output_dim)) # input/feature params self.W_xz = self.init((input_dim, self.output_dim)) # self.U_xz = self.inner_init((input_dim, self.output_dim)) self.b_z = shared_zeros((self.output_dim)) # output params self.W_xo = self.init((input_dim, self.output_dim)) # self.U_xo = self.inner_init((input_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim)) self.n_param += 3 * (input_dim + 1) * self.output_dim self.params = [ self.W_xf, self.b_f, self.W_xz, self.b_z, self.W_xo, self.b_o, ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def get_param_updates(params, grads, lr, method=None, **kwargs): rho = 0.95 epsilon = 1e-6 accumulators = [shared_zeros(p.get_value().shape) for p in params] updates=[] if 'constraint' in kwargs: constraint = kwargs['constraint'] else: constraint = None if method == 'adadelta': print "Using ADADELTA" delta_accumulators = [shared_zeros(p.get_value().shape) for p in params] for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators): new_a = rho * a + (1 - rho) * g ** 2 # update accumulator # use the new accumulator and the *old* delta_accumulator update = g * T.sqrt(d_a + epsilon) / T.sqrt(new_a + epsilon) new_p = p - lr * update # update delta_accumulator new_d_a = rho * d_a + (1 - rho) * update ** 2 updates.append((p, new_p)) updates.append((a, new_a)) updates.append((d_a, new_d_a)) elif method == 'adagrad': print "Using ADAGRAD" for p, g, a in zip(params, grads, accumulators): new_a = a + g ** 2 # update accumulator new_p = p - lr * g / T.sqrt(new_a + epsilon) updates.append((p, new_p)) # apply constraints updates.append((a, new_a)) elif method == 'momentum': # Default print "Using MOMENTUM" momentum = kwargs['momentum'] for param, gparam in zip(params, grads): param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable) gparam_constrained = maxnorm_constraint(gparam) param_update_update = momentum*param_update + (1. - momentum)*gparam_constrained updates.append((param, param - param_update * lr)) updates.append((param_update, param_update_update)) else: # Default print "Using DEFAULT" for param, gparam in zip(params, grads): param_update = maxnorm_constraint(gparam) updates.append((param, param - param_update * lr)) # apply constraints on self.weights update # assumes that updates[0] corresponds to self.weights param if constraint != None: updates[0] = (updates[0][0], constraint(updates[0][1])) return updates
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() self.W_x2e = self.init((self.n_experts, input_dim, self.output_dim)) self.W_x2g = self.init((input_dim, self.output_dim)) self.b_x2e = shared_zeros((self.n_experts, self.output_dim)) self.b_x2g = shared_zeros((self.output_dim)) self.W_h2e = shared_zeros( (self.n_experts, self.output_dim, self.output_dim)) scale = 0.05 self.U_g = sharedX( np.random.uniform(low=-scale, high=scale, size=(self.output_dim, self.n_experts, self.output_dim))) self.params = [ self.W_x2e, self.W_x2g, self.b_x2g, self.b_x2e, self.W_h2e, self.U_g ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def build(self): input_dim = self.input_shape[2] def init_U(way = self.U_init): if way == "identity": return theano.shared(np.identity(self.output_dim).astype("float32")*0.6) if way == "orthogonal": return self.inner_init((self.output_dim, self.output_dim)) if way == "uniform": return self.init((self.output_dim, self.output_dim)) self.W1 = self.init((input_dim, self.output_dim)) self.U1 = init_U() self.W2 = self.init((self.output_dim, self.output_dim)) self.U2 = init_U() #self.V2 = theano.shared(np.zeros((self.output_dim, self.output_dim)).astype('float32')) self.V2 = self.init((self.output_dim, self.output_dim)) self.b1 = shared_zeros((self.output_dim)) self.b2 = shared_zeros((self.output_dim)) self.params = [self.W1, self.U1] +\ [self.W2, self.U2, self.V2] +\ [self.b1, self.b2] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() scale=0.05 self.W_maxout = sharedX(np.random.uniform(low=-scale, high=scale, size=(self.n_opt, 2 , self.n_pieces))) self.b_maxout = shared_zeros((self.output_dim, self.n_opt, self.n_pieces)) self.W_g = self.init((input_dim, self.output_dim)) self.U_g = sharedX(np.random.uniform(low=-scale, high=scale, size=(self.output_dim, self.n_opt , self.output_dim))) self.b_g = shared_zeros((self.output_dim)) self.W_c = self.init((input_dim, self.output_dim)) self.U_c = self.inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim)) self.W_o = self.init((input_dim, self.output_dim)) self.U_o = self.inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim)) self.params = [ self.W_maxout, self.b_maxout, self.W_g, self.U_g, self.b_g, self.W_c, self.U_c, self.b_c, self.W_o, self.U_o, self.b_o, ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() scale = 0.05 self.W_maxout = sharedX( np.random.uniform(low=-scale, high=scale, size=(2, self.n_pieces))) self.b_maxout = shared_zeros(((self.output_dim, self.n_pieces))) self.W_c = self.init((input_dim, self.output_dim)) self.U_c = self.inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim)) self.W_o = self.init((input_dim, self.output_dim)) self.U_o = self.inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim)) self.params = [ self.W_maxout, self.b_maxout, self.W_c, self.U_c, self.b_c, self.W_o, self.U_o, self.b_o, ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def __init__(self, input_dim, output_dim=128, init='uniform', inner_init='orthogonal', activation='tanh', inner_activation='hard_sigmoid', weights=None, truncate_gradient=-1, return_sequences=False): self.input_dim = input_dim self.output_dim = output_dim self.truncate_gradient = truncate_gradient self.return_sequences = return_sequences self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) self.input = T.tensor3() self.W_i = self.init((self.input_dim, self.output_dim)) self.U_i = self.inner_init((self.output_dim, self.output_dim)) self.b_i = shared_zeros((self.output_dim)) self.W_f = self.init((self.input_dim, self.output_dim)) self.U_f = self.inner_init((self.output_dim, self.output_dim)) self.b_f = shared_zeros((self.output_dim)) self.W_c = self.init((self.input_dim, self.output_dim)) self.U_c = self.inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim)) self.W_o = self.init((self.input_dim, self.output_dim)) self.U_o = self.inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim)) self.params = [ self.W_i, self.U_i, self.b_i, self.W_c, self.U_c, self.b_c, self.W_f, self.U_f, self.b_f, self.W_o, self.U_o, self.b_o, ] # C1, H1: starting C, H values self.C1 = T.matrix() self.H1 = T.matrix() if weights is not None: self.set_weights(weights)
def __init__(self, periods, input_dim, output_dim=128, init= 'uniform', inner_init='glorot_normal', activation='softplus', inner_activation='hard_sigmoid', gate_activation= 'tanh', weights=None, truncate_gradient=-1, return_sequences=False): super(ClockworkSGU, self).__init__() self.periods = periods self.input_dim = input_dim self.output_dim = output_dim self.truncate_gradient = truncate_gradient self.return_sequences = return_sequences self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) self.gate_activation = activations.get(gate_activation) self.n = self.output_dim // len(self.periods) assert self.output_dim % len(self.periods) == 0 self.input = TT.tensor3() self.W = self.init((self.input_dim, self.output_dim)) self.b = shared_zeros((self.output_dim)) self.W_gate = self.init((self.input_dim, self.output_dim)) self.b_gate = shared_zeros((self.output_dim)) self.clock_h = {} for i, period in enumerate(self.periods): self.clock_h[period] = self.inner_init(( (i + 1) * self.n, self.n )) self.clock_gates = {} for i, period in enumerate(self.periods): self.clock_gates[period] = self.inner_init(( (i + 1) * self.n, self.n )) self.params = [ self.W, self.b, self.W_gate, self.b_gate, ] self.params.extend(self.clock_h.values()) self.params.extend(self.clock_gates.values()) if weights is not None: self.set_weights(weights)
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() self.W_i = self.init((input_dim, self.output_dim)) self.U_i = self.inner_init((self.output_dim, self.output_dim)) self.b_i = shared_zeros((self.output_dim)) self.W_f = self.init((input_dim, self.output_dim)) self.U_f = self.inner_init((self.output_dim, self.output_dim)) self.b_f = self.forget_bias_init((self.output_dim)) self.W_c = self.init((input_dim, self.output_dim)) self.U_c = self.inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim)) self.W_o = self.init((input_dim, self.output_dim)) self.U_o = self.inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim)) self.params = [ self.W_i, self.U_i, self.b_i, self.W_c, self.U_c, self.b_c, self.W_f, self.U_f, self.b_f, self.W_o, self.U_o, self.b_o, ] nw = len( self.initial_weights) if self.initial_weights is not None else 0 if self.initial_state is not None: self.h = sharedX(self.initial_state[0]) self.c = sharedX(self.initial_state[1]) del self.initial_state elif self.batch_size is not None: self.h = shared_zeros((self.batch_size, self.output_dim)) self.c = shared_zeros((self.batch_size, self.output_dim)) elif self.initial_weights is not None: if nw == len(self.params) + 2: self.h = sharedX(self.initial_weights[-1]) self.c = sharedX(self.initial_weights[-2]) nw -= 2 else: raise Exception("Hidden state not provided in weights") else: raise Exception( "One of the following arguments must be provided for stateful RNNs: hidden_state, batch_size, weights" ) self.state = [self.h, self.c] if self.initial_weights is not None: self.set_weights(self.initial_weights[:nw]) del self.initial_weights
def __init__(self, input_dim, output_dim, causes_dim, hid2output, init='glorot_uniform', W_regularizer=None, W_constraint=None, b_regularizer=None, b_constraint=None, activation=lambda X: T.minimum(20, T.maximum(0, X)), activity_regularizer=None, truncate_gradient=-1, weights=None, name=None, return_mode='both', return_sequences=True): super(GAE, self).__init__() self.input_dim = input_dim self.output_dim = output_dim self.causes_dim = causes_dim self.activation = activations.get(activation) self.init = initializations.get(init) self.truncate_gradient = truncate_gradient self.input = T.tensor3() self.return_mode = return_mode self.return_sequences = return_sequences self.V = self.init((input_dim, output_dim)) self.U = self.init((input_dim, output_dim)) self.W = self.init((output_dim, causes_dim)) self.bo = shared_zeros((self.output_dim)) self.bc = shared_zeros((self.causes_dim)) self.params = [self.V, self.U, self.W] self.regularizers = [] self.W_regularizer = regularizers.get(W_regularizer) if self.W_regularizer: self.W_regularizer.set_param(self.W) self.regularizers.append(self.W_regularizer) self.b_regularizer = regularizers.get(b_regularizer) if self.b_regularizer: self.b_regularizer.set_param(self.b) self.regularizers.append(self.b_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) if self.activity_regularizer: self.activity_regularizer.set_layer(self) self.regularizers.append(self.activity_regularizer) self.W_constraint = constraints.get(W_constraint) self.b_constraint = constraints.get(b_constraint) self.constraints = [self.W_constraint, self.b_constraint] if weights is not None: self.set_weights(weights) if name is not None: self.set_name(name)
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() self.W_g = self.init((input_dim, self.output_dim)) # self.U_g = sharedX(np.random.uniform(low=-scale, high=scale, size=(self.output_dim, 6 , self.output_dim))) self.U_g = self.inner_init((self.output_dim, 6, self.output_dim)) self.b_g = shared_zeros((self.output_dim)) self.W_c = self.init((input_dim, self.output_dim)) self.U_c = self.inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim)) self.W_o = self.init((input_dim, self.output_dim)) self.U_o = self.inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim)) self.EPS = 1e-10 scalar_init = 1 scale=0.01 # self.k_parameters = shared_ones((11,)) self.k_parameters = sharedX(np.random.uniform(low=scalar_init-scale, high=scalar_init+scale, size=(11, ))) # self.sigma_se = shared_scalar(scalar_init) # self.sigma_per = shared_scalar(scalar_init) # self.sigma_b_lin = shared_scalar(scalar_init) # self.sigma_v_lin = shared_scalar(scalar_init) # self.sigma_rq = shared_scalar(scalar_init) # self.l_se = shared_scalar(scalar_init) # self.l_per = shared_scalar(scalar_init) # self.l_lin = shared_scalar(scalar_init) # self.l_rq = shared_scalar(scalar_init) # self.alpha_rq = shared_scalar(scalar_init) # self.p_per = shared_scalar(scalar_init) self.params = [ self.k_parameters, # self.sigma_se, self.sigma_per, self.sigma_b_lin, self.sigma_v_lin,self.sigma_rq, # self.l_se, self.l_per, self.l_lin, self.l_rq, # self.alpha_rq, self.p_per, self.W_g, self.U_g, self.b_g, self.W_c, self.U_c, self.b_c, self.W_o, self.U_o, self.b_o, ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def get_updates(self, params, grads, method=None, **kwargs): self.rho = 0.95 self.epsilon = 1e-6 accumulators = [shared_zeros(p.get_value().shape) for p in params] updates = [] if method == 'adadelta': print "Using ADADELTA" delta_accumulators = [ shared_zeros(p.get_value().shape) for p in params ] for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators): new_a = self.rho * a + (1 - self.rho) * g**2 # update accumulator updates.append((a, new_a)) # use the new accumulator and the *old* delta_accumulator update = g * T.sqrt(d_a + self.epsilon) / T.sqrt(new_a + self.epsilon) new_p = p - self.lr * update updates.append((p, new_p)) # apply constraints # update delta_accumulator new_d_a = self.rho * d_a + (1 - self.rho) * update**2 updates.append((d_a, new_d_a)) elif method == 'adam': # unimplemented print "Using ADAM" elif method == 'adagrad': print "Using ADAGRAD" for p, g, a in zip(params, grads, accumulators): new_a = a + g**2 # update accumulator updates.append((a, new_a)) new_p = p - self.lr * g / T.sqrt(new_a + self.epsilon) updates.append((p, new_p)) # apply constraints else: # Default print "Using MOMENTUM" l_rate = kwargs['l_rate'] for param, gparam in zip(params, gradient): param_update = theano.shared(param.get_value() * 0., broadcastable=param.broadcastable) updates.append((param, param - param_update * l_rate)) updates.append((param_update, self.momentum * param_update + (1. - self.momentum) * gparam)) return updates
def build(self): self.input = T.tensor4() if self.inner_rnn == 'gru': self.enc = GRU( input_length=self.n_steps, input_dim=self._input_shape[0]*2*self.N_enc**2 + self.output_dim, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init) self.dec = GRU( input_length=self.n_steps, input_dim=self.code_dim, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init) elif self.inner_rnn == 'lstm': self.enc = LSTM( input_length=self.n_steps, input_dim=self._input_shape[0]*2*self.N_enc**2 + self.output_dim, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init) self.dec = LSTM( input_length=self.n_steps, input_dim=self.code_dim, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init) else: raise ValueError('This type of inner_rnn is not supported') self.enc.build() self.dec.build() self.init_canvas = shared_zeros(self._input_shape) # canvas and hidden state self.init_h_enc = shared_zeros((self.output_dim)) # initial values self.init_h_dec = shared_zeros((self.output_dim)) # should be trained self.L_enc = self.enc.init((self.output_dim, 5)) # "read" attention parameters (eq. 21) self.L_dec = self.enc.init((self.output_dim, 5)) # "write" attention parameters (eq. 28) self.b_enc = shared_zeros((5)) # "read" attention parameters (eq. 21) self.b_dec = shared_zeros((5)) # "write" attention parameters (eq. 28) self.W_patch = self.enc.init((self.output_dim, self.N_dec**2*self._input_shape[0])) self.b_patch = shared_zeros((self.N_dec**2*self._input_shape[0])) self.W_mean = self.enc.init((self.output_dim, self.code_dim)) self.W_sigma = self.enc.init((self.output_dim, self.code_dim)) self.b_mean = shared_zeros((self.code_dim)) self.b_sigma = shared_zeros((self.code_dim)) self.trainable_weights = self.enc.trainable_weights + self.dec.trainable_weights + [ self.L_enc, self.L_dec, self.b_enc, self.b_dec, self.W_patch, self.b_patch, self.W_mean, self.W_sigma, self.b_mean, self.b_sigma, self.init_canvas, self.init_h_enc, self.init_h_dec] if self.inner_rnn == 'lstm': self.init_cell_enc = shared_zeros((self.output_dim)) # initial values self.init_cell_dec = shared_zeros((self.output_dim)) # should be trained self.trainable_weights = self.trainable_weights + [self.init_cell_dec, self.init_cell_enc]
def __init__(self, input_dim, output_dim=128, init='glorot_uniform', inner_init='orthogonal', forget_bias_init='one', activation='tanh', inner_activation='hard_sigmoid', weights=None, truncate_gradient=-1, return_sequences=False): super(LangLSTMLayerV0, self).__init__() self.input_dim = input_dim self.output_dim = output_dim self.truncate_gradient = truncate_gradient self.return_sequences = return_sequences self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.forget_bias_init = initializations.get(forget_bias_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) self.input = T.tensor3() self.W_i = self.init((self.input_dim, self.output_dim)) self.U_i = self.inner_init((self.output_dim, self.output_dim)) self.b_i = shared_zeros(self.output_dim) self.W_f = self.init((self.input_dim, self.output_dim)) self.U_f = self.inner_init((self.output_dim, self.output_dim)) self.b_f = self.forget_bias_init(self.output_dim) self.W_c = self.init((self.input_dim, self.output_dim)) self.U_c = self.inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros(self.output_dim) self.W_o = self.init((self.input_dim, self.output_dim)) self.U_o = self.inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros(self.output_dim) self.h00 = shared_zeros(shape=(1, self.output_dim)) self.params = [ self.W_i, self.U_i, self.b_i, self.W_c, self.U_c, self.b_c, self.W_f, self.U_f, self.b_f, self.W_o, self.U_o, self.b_o, self.h00 ] if weights is not None: self.set_weights(weights)
def build(self): input_dim = self.input_shape[2] def init_U(way = self.U_init, n = 6): U_ = np.zeros((self.output_dim, self.output_dim * n)).astype("float32") for k in xrange(n): if way == "identity": U_[:, k*self.output_dim: (k+1)*self.output_dim] = np.identity(self.output_dim).astype("float32")*0.95 if way == "orthogonal": U = self.inner_init((self.output_dim, self.output_dim)).get_value() U_[:, k*self.output_dim: (k+1)*self.output_dim] = U if way == "uniform": U = self.init((self.output_dim, self.output_dim), self.v_init).get_value() U_[:, k*self.output_dim: (k+1)*self.output_dim] = U return U_ # U is a big matrix for all the hidden layers # for each hidden layer, U = [U_f, U_i, U_o, U_c] U = np.zeros((self.output_dim*(self.dp-1), self.output_dim*4)).astype('float32') for i in xrange(self.dp-1): U[i*self.output_dim:(i+1)*self.output_dim, :] = init_U(n=4) self.U = theano.shared(U) self.b = shared_zeros((self.dp-1, self.output_dim*4)) b = np.zeros((self.dp-1, self.output_dim*4), dtype = "float32") #############important###########set b######### b[:, 0:3*self.output_dim] = 5*np.ones((self.dp-1, 3*self.output_dim), dtype = "float32") b[:, 0:1*self.output_dim] = -5*np.ones((self.dp-1, self.output_dim), dtype = "float32") self.b.set_value(b) # U_1 is a big matrix for the low states: # U for hid-hid, W for in-hid, V for skew-top-down. # [ W_f_u, W_i, W_o, W_c; # U_f_u, U_i, U_o, U_c; # V_f_u, V_i, V_o, V_c ] self.W1 = self.init((input_dim, self.output_dim *4), self.v_init) self.U1 = self.init((self.output_dim, self.output_dim * 4), self.v_init) self.U1.set_value(init_U(n=4)) self.b1 = shared_zeros((self.output_dim*4)) # initialize b so that b for U_f_w and V_f_w be -k, U_f_u be 1. b = np.zeros((self.output_dim*4), dtype = "float32") b[0:self.output_dim] = np.ones((self.output_dim), dtype = "float32") self.b1.set_value(b) self.params = [self.U, self.b, self.W1, self.U1, self.b1] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def __init__(self, input_dim, hidden_dim, init='glorot_uniform', weights=None, name=None, W_regularizer=None, bx_regularizer=None, bh_regularizer=None, #activity_regularizer=None, W_constraint=None, bx_constraint=None, bh_constraint=None): super(RBM, self).__init__() self.init = initializations.get(init) self.input_dim = input_dim self.hidden_dim = hidden_dim self.input = T.matrix() self.W = self.init((self.input_dim, self.hidden_dim)) self.bx = shared_zeros((self.input_dim)) self.bh = shared_zeros((self.hidden_dim)) self.params = [self.W, self.bx, self.bh] self.regularizers = [] self.W_regularizer = regularizers.get(W_regularizer) if self.W_regularizer: self.W_regularizer.set_param(self.W) self.regularizers.append(self.W_regularizer) self.bx_regularizer = regularizers.get(bx_regularizer) if self.bx_regularizer: self.bx_regularizer.set_param(self.bx) self.regularizers.append(self.bx_regularizer) self.bh_regularizer = regularizers.get(bh_regularizer) if self.bh_regularizer: self.bh_regularizer.set_param(self.bh) self.regularizers.append(self.bh_regularizer) #self.activity_regularizer = regularizers.get(activity_regularizer) #if self.activity_regularizer: # self.activity_regularizer.set_layer(self) # self.regularizers.append(self.activity_regularizer) self.W_constraint = constraints.get(W_constraint) self.bx_constraint = constraints.get(bx_constraint) self.bh_constraint = constraints.get(bh_constraint) self.constraints = [self.W_constraint, self.bx_constraint, self.bh_constraint] if weights is not None: self.set_weights(weights) if name is not None: self.set_name(name) self.srng = RandomStreams(seed=np.random.randint(10e6))
def __init__(self, filter_shape, init_mode = 'glorot_uniform', w_shared = True, n_inputs = 1, regularizers = None, constraints = None): self.name = self.__class__.__name__ self.init = initializations.get(init_mode) self.w_shared = w_shared self.filter_shape = filter_shape self.params_dict = OrderedDict( \ [('W', [self.init(filter_shape)] if w_shared \ else [self.init(filter_shape) for i in xrange(n_inputs)]), ('b', [shared_zeros((filter_shape[1],))] if w_shared \ else [shared_zeros((filter_shape[1],)) for i in xrange(n_inputs)])]) self.params = [param for sublist in self.params_dict.values() for param in sublist] self.set_constraints(constraints) self.set_regularizers(regularizers)
def __init__(self, nb_filter, stack_size, filter_length, init='glorot_uniform', activation='linear', weights=None, image_shape=None, border_mode='valid', subsample_length=1): super(Convolution1D, self).__init__() nb_row = 1 nb_col = filter_length subsample = (1, subsample_length) self.init = initializations.get(init) self.activation = activations.get(activation) self.subsample = subsample self.border_mode = border_mode self.image_shape = image_shape self.nb_filter = nb_filter self.stack_size = stack_size self.input = T.tensor4() self.W_shape = (nb_filter, stack_size, nb_row, nb_col) self.W = self.init(self.W_shape) self.b = shared_zeros((nb_filter, )) self.params = [self.W, self.b] if weights is not None: self.set_weights(weights)
def build(self): input_dim = self.input_shape[2] self.input = T.matrix() self.W = self.init((input_dim, self.output_dim)) self.b = shared_zeros((self.output_dim,)) self.params = [self.W, self.b] self.regularizers = [] if self.W_regularizer: self.W_regularizer.set_param(self.W) self.regularizers.append(self.W_regularizer) if self.b_regularizer: self.b_regularizer.set_param(self.b) self.regularizers.append(self.b_regularizer) if self.activity_regularizer: self.activity_regularizer.set_layer(self) self.regularizers.append(self.activity_regularizer) if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def build(self): input_shape = self.input_shape input_dim = input_shape[2] # 嵌入维度 self.e0 = self.init((input_dim,)) # 句子开头 #print 'e0.type:', e0.type #self.e0 = e0.dimshuffle('x', 0, 1) # 样本维可广播 #print 'self.e0.type:', self.e0.type self.c0 = self.init((self.context_dim,)) #self.c0 = c0.dimshuffle('x', 0, 1) self.en = self.init((input_dim,)) # 句子结尾 #self.en = en.dimshuffle('x', 0, 1) self.cn = self.init((self.context_dim,)) #self.cn = cn.dimshuffle('x', 0, 1) self.Wl = self.init((self.context_dim, self.context_dim)) self.Wr = self.init((self.context_dim, self.context_dim)) self.Wsl = self.init((input_dim, self.context_dim)) self.Wsr = self.init((input_dim, self.context_dim)) self.W2 = self.init((input_dim + 2*self.context_dim, self.output_dim)) self.b2 = shared_zeros((self.output_dim),) self.params = [self.e0, self.c0, self.en, self.cn, self.Wl, self.Wr, self.Wsl, self.Wsr, self.W2, self.b2]
def __init__(self, input_dim, hidden_dim, init='glorot_uniform', activation='linear', weights=None): nvis = input_dim nhid = hidden_dim W_shape = nhid,nvis lim=np.sqrt(6./(2*nvis+1)) W_init=np.random.uniform(-lim,lim,W_shape) W=theano.shared(W_init) hbias=theano.shared(np.zeros((nhid,1)),broadcastable=[False,True]) self.init = initializations.get(init) self.activation = activations.get(activation) self.input_dim = input_dim self.hidden_dim = hidden_dim self.output_dim = input_dim self.input = T.matrix() #maybe need to replace the initialization function self.W = self.init((self.input_dim, self.hidden_dim)) self.b = shared_zeros((self.hidden_dim)) #self.b_tilde = shared_zeros((self.input_dim)) self.params = [self.W, self.b] if weights is not None: self.set_weights(weights)
def build(self): input_dim = self.input_shape[2] self.input = T.matrix() self.W = self.init((input_dim, self.output_dim)) self.b = shared_zeros((self.output_dim, )) self.params = [self.W, self.b] self.regularizers = [] if self.W_regularizer: self.W_regularizer.set_param(self.W) self.regularizers.append(self.W_regularizer) if self.b_regularizer: self.b_regularizer.set_param(self.b) self.regularizers.append(self.b_regularizer) if self.activity_regularizer: self.activity_regularizer.set_layer(self) self.regularizers.append(self.activity_regularizer) if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def __init__(self, input_dim, hidden_dim, init='glorot_uniform', activation='linear', weights=None): nvis = input_dim nhid = hidden_dim W_shape = nhid, nvis lim = np.sqrt(6. / (2 * nvis + 1)) W_init = np.random.uniform(-lim, lim, W_shape) W = theano.shared(W_init) hbias = theano.shared(np.zeros((nhid, 1)), broadcastable=[False, True]) self.init = initializations.get(init) self.activation = activations.get(activation) self.input_dim = input_dim self.hidden_dim = hidden_dim self.output_dim = input_dim self.input = T.matrix() #maybe need to replace the initialization function self.W = self.init((self.input_dim, self.hidden_dim)) self.b = shared_zeros((self.hidden_dim)) #self.b_tilde = shared_zeros((self.input_dim)) self.params = [self.W, self.b] if weights is not None: self.set_weights(weights)
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() self.W_i = self.init((input_dim, self.output_dim)) self.U_i = self.inner_init((self.output_dim, self.output_dim)) self.b_i = shared_zeros((self.output_dim)) self.W_f = self.init((input_dim, self.output_dim)) self.U_f = self.inner_init((self.output_dim, self.output_dim)) self.b_f = self.forget_bias_init((self.output_dim)) self.W_c = self.init((input_dim, self.output_dim)) self.U_c = self.inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim)) self.W_o = self.init((input_dim, self.output_dim)) self.U_o = self.inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim)) self.params = [ self.W_i, self.U_i, self.b_i, self.W_c, self.U_c, self.b_c, self.W_f, self.U_f, self.b_f, self.W_o, self.U_o, self.b_o, ] nw = len(self.initial_weights) if self.initial_weights is not None else 0 if self.initial_state is not None: self.h = sharedX(self.initial_state[0]) self.c = sharedX(self.initial_state[1]) del self.initial_state elif self.batch_size is not None: self.h = shared_zeros((self.batch_size, self.output_dim)) self.c = shared_zeros((self.batch_size, self.output_dim)) elif self.initial_weights is not None: if nw == len(self.params) + 2: self.h = sharedX(self.initial_weights[-1]) self.c = sharedX(self.initial_weights[-2]) nw -= 2 else: raise Exception("Hidden state not provided in weights") else: raise Exception("One of the following arguments must be provided for stateful RNNs: hidden_state, batch_size, weights") self.state = [self.h, self.c] if self.initial_weights is not None: self.set_weights(self.initial_weights[:nw]) del self.initial_weights
def get_updates(self, params, grads, method=None, **kwargs): self.rho = 0.95 self.epsilon = 1e-6 accumulators = [shared_zeros(p.get_value().shape) for p in params] updates=[] if method == 'adadelta': print "Using ADADELTA" delta_accumulators = [shared_zeros(p.get_value().shape) for p in params] for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators): new_a = self.rho * a + (1 - self.rho) * g ** 2 # update accumulator updates.append((a, new_a)) # use the new accumulator and the *old* delta_accumulator update = g * T.sqrt(d_a + self.epsilon) / T.sqrt(new_a + self.epsilon) new_p = p - self.lr * update updates.append((p, new_p)) # apply constraints # update delta_accumulator new_d_a = self.rho * d_a + (1 - self.rho) * update ** 2 updates.append((d_a, new_d_a)) elif method == 'adam': # unimplemented print "Using ADAM" elif method == 'adagrad': print "Using ADAGRAD" for p, g, a in zip(params, grads, accumulators): new_a = a + g ** 2 # update accumulator updates.append((a, new_a)) new_p = p - self.lr * g / T.sqrt(new_a + self.epsilon) updates.append((p, new_p)) # apply constraints else: # Default print "Using MOMENTUM" l_rate = kwargs['l_rate'] for param, gparam in zip(params, gradient): param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable) updates.append((param, param - param_update * l_rate)) updates.append((param_update, self.momentum*param_update + (1. - self.momentum)*gparam)) return updates
def __init__(self, *args, **kwargs): super(ProdTensor, self).__init__(*args, **kwargs) self.W = self.init((self.input_dim, self.output_dim)) self.C = self.init((self.causes_dim, self.output_dim)) self.b0 = shared_zeros((self.output_dim)) self.params[0] = self.W self.params[1] = self.C self.params = self.params + [self.b0, ]
def __init__(self, weights): super(FixedEmbedding, self).__init__() self.input_dim, self.output_dim = weights.shape self.input = T.imatrix() self.W = shared_zeros((self.input_dim, self.output_dim)) self.W.set_value(weights) self.params = []
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() self.W_x2e = self.init((self.n_experts, input_dim, self.output_dim)) self.W_e2e = self.init((self.output_dim, self.output_dim)) self.b_x2e = shared_zeros((self.n_experts, self.output_dim)) self.W_x2g = self.init((input_dim, self.output_dim)) self.b_x2g = shared_zeros((self.output_dim)) self.U_g = self.init((self.output_dim, self.n_experts, self.output_dim)) self.params = [self.W_x2e, self.W_e2e, self.b_x2e, self.W_x2g, self.b_x2g, self.U_g] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def __init__(self, n_channels, batch_size=30): self.n_channels = n_channels self.batch_size = batch_size self.conv1_W = initializations.uniform((96, n_channels, 7,7)) self.conv1_b = shared_zeros((96,)) self.conv2_W = initializations.uniform((256,96,5,5)) self.conv2_b = shared_zeros((256,)) self.conv3_W = initializations.uniform((512,256,3,3)) self.conv3_b = shared_zeros((512,)) self.conv4_W = initializations.uniform((512,512,3,3)) self.conv4_b = shared_zeros((512,)) self.conv5_W = initializations.uniform((512,512,3,3)) self.conv5_b = shared_zeros((512,))
def __init__(self, input_dim, output_dim, init='glorot_uniform', activation='linear', weights=None, name=None, W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None, corruption_level=0.0): super(DAE, self).__init__() self.srng = RandomStreams(seed=np.random.randint(10e6)) self.init = initializations.get(init) self.activation = activations.get(activation) self.input_dim = input_dim self.output_dim = output_dim self.corruption_level = corruption_level self.input = T.matrix() self.W = self.init((self.input_dim, self.output_dim)) self.b = shared_zeros((self.output_dim)) self.bT = shared_zeros((self.input_dim)) self.params = [self.W, self.b, self.bT] self.regularizers = [] self.W_regularizer = regularizers.get(W_regularizer) if self.W_regularizer: self.W_regularizer.set_param(self.W) self.regularizers.append(self.W_regularizer) self.b_regularizer = regularizers.get(b_regularizer) if self.b_regularizer: self.b_regularizer.set_param(self.b) self.regularizers.append(self.b_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) if self.activity_regularizer: self.activity_regularizer.set_layer(self) self.regularizers.append(self.activity_regularizer) self.W_constraint = constraints.get(W_constraint) self.b_constraint = constraints.get(b_constraint) self.constraints = [self.W_constraint, self.b_constraint] if weights is not None: self.set_weights(weights) if name is not None: self.set_name(name)
def __init__(self, input_dim, output_dim=128, mem=None, mem_dim=128, init='glorot_uniform', inner_init='orthogonal', activation='sigmoid', inner_activation='hard_sigmoid', weights=None, truncate_gradient=-1, return_sequences=False, return_mode='states'): super(GRUM, self).__init__(input_dim, output_dim, init=init, inner_init=inner_init, activation=activation, inner_activation=inner_activation, truncate_gradient=truncate_gradient, return_sequences=return_sequences) if mem is None: self.mem = shared_zeros((1, mem_dim)) else: self.mem = mem self.mem_dim = mem_dim self.return_mode = return_mode self.Hm_z = self.init((self.mem_dim, self.output_dim)) self.Hm_r = self.init((self.mem_dim, self.output_dim)) self.Hm_h = self.init((self.mem_dim, self.output_dim)) self.Wm_z = self.init((self.input_dim, self.mem_dim)) self.Um_z = self.inner_init((self.mem_dim, self.mem_dim)) self.Vm_z = self.inner_init((self.output_dim, self.mem_dim)) self.bm_z = shared_zeros((self.mem_dim)) self.Wm_r = self.init((self.input_dim, self.mem_dim)) self.Um_r = self.inner_init((self.mem_dim, self.mem_dim)) self.Vm_r = self.inner_init((self.output_dim, self.mem_dim)) self.bm_r = shared_zeros((self.mem_dim)) self.Wm_h = self.init((self.input_dim, self.mem_dim)) self.Um_h = self.inner_init((self.mem_dim, self.mem_dim)) self.Vm_h = self.inner_init((self.mem_dim, self.mem_dim)) self.bm_h = shared_zeros((self.mem_dim)) self.params = self.params + [ self.Hm_z, self.Hm_r, self.Hm_h, self.Wm_z, self.Um_z, self.bm_z, self.Wm_r, self.Um_r, self.bm_r, self.Wm_h, self.Um_h, self.bm_h, ]
def __init__(self, n_vocab, dim_word, dim_ctx, dim): self.n_vocab = n_vocab self.dim_word = dim_word self.dim_ctx = dim_ctx self.dim = dim ### Word Embedding ### self.Wemb = initializations.uniform((n_vocab, self.dim_word)) ### LSTM initialization NN ### self.Init_state_W = initializations.uniform((self.dim_ctx, self.dim)) self.Init_state_b = shared_zeros((self.dim)) self.Init_memory_W = initializations.uniform((self.dim_ctx, self.dim)) self.Init_memory_b = shared_zeros((self.dim)) ### Main LSTM ### self.lstm_W = initializations.uniform((self.dim_word, self.dim * 4)) self.lstm_U = sharedX(np.concatenate([ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim)], axis=1)) self.lstm_b = shared_zeros((self.dim*4)) self.Wc = initializations.uniform((self.dim_ctx, self.dim*4)) # image -> LSTM hidden self.Wc_att = initializations.uniform((self.dim_ctx, self.dim_ctx)) # image -> 뉴럴넷 한번 돌린것 self.Wd_att = initializations.uniform((self.dim, self.dim_ctx)) # LSTM hidden -> image에 영향 self.b_att = shared_zeros((self.dim_ctx)) self.U_att = initializations.uniform((self.dim_ctx, 1)) # image 512개 feature 1차원으로 줄임 self.c_att = shared_zeros((1)) ### Decoding NeuralNets ### self.decode_lstm_W = initializations.uniform((self.dim, self.dim_word)) self.decode_lstm_b = shared_zeros((self.dim_word)) self.decode_word_W = initializations.uniform((self.dim_word, n_vocab)) self.decode_word_b = shared_zeros((n_vocab)) self.params = [self.Wemb, self.Init_state_W, self.Init_state_b, self.Init_memory_W, self.Init_memory_b, self.lstm_W, self.lstm_U, self.lstm_b, self.Wc, self.Wc_att, self.Wd_att, self.b_att, self.U_att, self.c_att, self.decode_lstm_W, self.decode_lstm_b, self.decode_word_W, self.decode_word_b] self.param_names = ['Wemb', 'Init_state_W', 'Init_state_b', 'Init_memory_W', 'Init_memory_b', 'lstm_W', 'lstm_U', 'lstm_b', 'Wc', 'Wc_att', 'Wd_att', 'b_att', 'U_att', 'c_att', 'decode_lstm_W', 'decode_lstm_b', 'decode_word_W', 'decode_word_b']
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() self.W_x2e = self.init((self.n_experts, input_dim, self.output_dim)) self.W_e2e = self.init((self.output_dim, self.output_dim)) self.b_x2e = shared_zeros((self.n_experts, self.output_dim)) self.W_x2g = self.init((input_dim, self.output_dim)) self.b_x2g = shared_zeros((self.output_dim)) self.U_g = self.init( (self.output_dim, self.n_experts, self.output_dim)) self.params = [ self.W_x2e, self.W_e2e, self.b_x2e, self.W_x2g, self.b_x2g, self.U_g ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) lr = self.lr * (1.0 / (1.0 + self.decay * self.iterations)) self.updates = [(self.iterations, self.iterations + 1.)] for p, g, c in zip(params, grads, constraints): m = shared_zeros(p.get_value().shape) # momentum v = self.momentum * m - lr * g # velocity self.updates.append((m, v)) if self.nesterov: new_p = p + self.momentum * v - lr * g else: new_p = p + v c_new_p = _proxOp(c(new_p), self.lr * self.lambdav, self.soft_threshold) self.updates.append((p, c_new_p)) return self.updates
def __init__(self, input_dim, output_dim, init='uniform', activation='linear', weights=None): self.init = initializations.get(init) self.activation = activations.get(activation) self.input_dim = input_dim self.output_dim = output_dim self.input = T.tensor3() self.W = self.init((self.input_dim, self.output_dim)) self.b = shared_zeros((self.output_dim)) self.params = [self.W, self.b] if weights is not None: self.set_weights(weights)
def __init__(self, n_words=1000, n_embedding=100, lr=0.01, margin=0.1, momentum=0.9, word_to_id=None): self.n_embedding = n_embedding self.n_lstm_embed = n_embedding self.word_embed = n_embedding self.lr = lr self.momentum = momentum self.margin = margin self.n_words = n_words self.n_D = 3 * self.n_words + 3 self.word_to_id = word_to_id self.id_to_word = dict((v, k) for k, v in word_to_id.iteritems()) # Question x = T.vector('x') phi_x = T.vector('phi_x') # True statements phi_f1_1 = T.vector('phi_f1_1') phi_f2_1 = T.vector('phi_f2_1') # False statements phi_f1_2 = T.vector('phi_f1_2') phi_f2_2 = T.vector('phi_f2_2') # Supporting memories m0 = T.vector('m0') m1 = T.vector('m1') phi_m0 = T.vector('phi_m0') phi_m1 = T.vector('phi_m1') # True word r = T.vector('r') # Word sequence words = T.ivector('words') # Scoring function self.U_O = init_shared_normal(n_embedding, self.n_D, 0.01) # Word embedding self.L = glorot_uniform((self.n_words, self.word_embed)) self.Lprime = glorot_uniform((self.n_words, self.n_lstm_embed)) # LSTM self.W_i = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_i = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_i = shared_zeros((self.n_lstm_embed)) self.W_f = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_f = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_f = shared_zeros((self.n_lstm_embed)) self.W_c = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_c = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_c = shared_zeros((self.n_lstm_embed)) self.W_o = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_o = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_o = shared_zeros((self.n_lstm_embed)) mem_cost = self.calc_cost(phi_x, phi_f1_1, phi_f1_2, phi_f2_1, phi_f2_2, phi_m0) lstm_output = self.lstm_cost(words) self.predict_function_r = theano.function(inputs=[words], outputs=lstm_output, allow_input_downcast=True) lstm_cost = -T.sum(T.mul(r, T.log(lstm_output))) cost = mem_cost + lstm_cost params = [ self.U_O, self.W_i, self.U_i, self.b_i, self.W_f, self.U_f, self.b_f, self.W_c, self.U_c, self.b_c, self.W_o, self.U_o, self.b_o, self.L, self.Lprime ] grads = T.grad(cost, params) # Parameter updates updates = self.get_updates(params, grads, method='adagrad') l_rate = T.scalar('l_rate') # Theano functions self.train_function = theano.function( inputs=[ phi_x, phi_f1_1, phi_f1_2, phi_f2_1, phi_f2_2, phi_m0, r, words, theano.Param(l_rate, default=self.lr) ], outputs=cost, updates=updates, on_unused_input='warn', allow_input_downcast=True, ) #mode='FAST_COMPILE') #mode='DebugMode') #mode=theano.compile.MonitorMode(pre_func=inspect_inputs,post_func=inspect_outputs)) # Candidate statement for prediction phi_f = T.vector('phi_f') score_o = self.calc_score_o(phi_x, phi_f) self.predict_function_o = theano.function(inputs=[phi_x, phi_f], outputs=score_o)
def __init__(self, input_dim, output_dim=128, init='glorot_uniform', inner_init='orthogonal', activation='tanh', inner_activation='hard_sigmoid', weights=None, truncate_gradient=-1, output_mode='sum'): super(BiDirectionLSTM, self).__init__() self.input_dim = input_dim self.output_dim = output_dim self.truncate_gradient = truncate_gradient self.output_mode = output_mode # output_mode is either sum or concatenate self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) self.input = T.tensor3() # forward weights self.W_i = self.init((self.input_dim, self.output_dim)) self.U_i = self.inner_init((self.output_dim, self.output_dim)) self.b_i = shared_zeros((self.output_dim)) self.W_f = self.init((self.input_dim, self.output_dim)) self.U_f = self.inner_init((self.output_dim, self.output_dim)) self.b_f = shared_zeros((self.output_dim)) self.W_c = self.init((self.input_dim, self.output_dim)) self.U_c = self.inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim)) self.W_o = self.init((self.input_dim, self.output_dim)) self.U_o = self.inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim)) # backward weights self.Wb_i = self.init((self.input_dim, self.output_dim)) self.Ub_i = self.inner_init((self.output_dim, self.output_dim)) self.bb_i = shared_zeros((self.output_dim)) self.Wb_f = self.init((self.input_dim, self.output_dim)) self.Ub_f = self.inner_init((self.output_dim, self.output_dim)) self.bb_f = shared_zeros((self.output_dim)) self.Wb_c = self.init((self.input_dim, self.output_dim)) self.Ub_c = self.inner_init((self.output_dim, self.output_dim)) self.bb_c = shared_zeros((self.output_dim)) self.Wb_o = self.init((self.input_dim, self.output_dim)) self.Ub_o = self.inner_init((self.output_dim, self.output_dim)) self.bb_o = shared_zeros((self.output_dim)) self.params = [ self.W_i, self.U_i, self.b_i, self.W_c, self.U_c, self.b_c, self.W_f, self.U_f, self.b_f, self.W_o, self.U_o, self.b_o, self.Wb_i, self.Ub_i, self.bb_i, self.Wb_c, self.Ub_c, self.bb_c, self.Wb_f, self.Ub_f, self.bb_f, self.Wb_o, self.Ub_o, self.bb_o, ] if weights is not None: self.set_weights(weights)
def __init__(self, input_dim, output_dim=128, train_init_cell=True, train_init_h=True, init='glorot_uniform', inner_init='orthogonal', forget_bias_init='one', input_activation='tanh', gate_activation='hard_sigmoid', output_activation='tanh', weights=None, truncate_gradient=-1, return_sequences=False): super(LSTMLayerV0, self).__init__() self.input_dim = input_dim self.output_dim = output_dim self.truncate_gradient = truncate_gradient self.return_sequences = return_sequences self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.forget_bias_init = initializations.get(forget_bias_init) self.input_activation = activations.get(input_activation) self.gate_activation = activations.get(gate_activation) self.output_activation = activations.get(output_activation) self.input = T.tensor3() W_z = self.init( (self.input_dim, self.output_dim)).get_value(borrow=True) R_z = self.inner_init( (self.output_dim, self.output_dim)).get_value(borrow=True) # self.b_z = shared_zeros(self.output_dim) W_i = self.init( (self.input_dim, self.output_dim)).get_value(borrow=True) R_i = self.inner_init( (self.output_dim, self.output_dim)).get_value(borrow=True) # self.b_i = shared_zeros(self.output_dim) W_f = self.init( (self.input_dim, self.output_dim)).get_value(borrow=True) R_f = self.inner_init( (self.output_dim, self.output_dim)).get_value(borrow=True) # self.b_f = self.forget_bias_init(self.output_dim) W_o = self.init( (self.input_dim, self.output_dim)).get_value(borrow=True) R_o = self.inner_init( (self.output_dim, self.output_dim)).get_value(borrow=True) # self.b_o = shared_zeros(self.output_dim) self.h_m1 = shared_zeros(shape=(1, self.output_dim), name='h0') self.c_m1 = shared_zeros(shape=(1, self.output_dim), name='c0') W = np.vstack( (W_z[np.newaxis, :, :], W_i[np.newaxis, :, :], W_f[np.newaxis, :, :], W_o[np.newaxis, :, :])) # shape = (4, input_dim, output_dim) R = np.vstack( (R_z[np.newaxis, :, :], R_i[np.newaxis, :, :], R_f[np.newaxis, :, :], R_o[np.newaxis, :, :])) # shape = (4, output_dim, output_dim) self.W = theano.shared(W, name='Input to hidden weights (zifo)', borrow=True) self.R = theano.shared(R, name='Recurrent weights (zifo)', borrow=True) self.b = theano.shared(np.zeros(shape=(4, self.output_dim), dtype=theano.config.floatX), name='bias', borrow=True) self.params = [self.W, self.R] if train_init_cell: self.params.append(self.c_m1) if train_init_h: self.params.append(self.h_m1) if weights is not None: self.set_weights(weights)
def build(self): input_leng, input_dim = self.input_shape[1:] self.input = T.tensor3() if self.inner_rnn == 'gru': self.rnn = GRU(input_dim=input_dim + self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init) elif self.inner_rnn == 'lstm': self.rnn = LSTM(input_dim=input_dim + self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init) else: raise ValueError('this inner_rnn is not implemented yet.') self.rnn.build() # initial memory, state, read and write vecotrs self.M = theano.shared((.001 * np.ones((1, )).astype(floatX))) self.init_h = shared_zeros((self.output_dim)) self.init_wr = self.rnn.init((self.n_slots, )) self.init_ww = self.rnn.init((self.n_slots, )) # write self.W_e = self.rnn.init((self.output_dim, self.m_length)) # erase self.b_e = shared_zeros((self.m_length)) self.W_a = self.rnn.init((self.output_dim, self.m_length)) # add self.b_a = shared_zeros((self.m_length)) # get_w parameters for reading operation self.W_k_read = self.rnn.init((self.output_dim, self.m_length)) self.b_k_read = self.rnn.init((self.m_length, )) self.W_c_read = self.rnn.init( (self.output_dim, 3)) # 3 = beta, g, gamma see eq. 5, 7, 9 in Graves et. al 2014 self.b_c_read = shared_zeros((3)) self.W_s_read = self.rnn.init((self.output_dim, self.shift_range)) self.b_s_read = shared_zeros((self.shift_range)) # get_w parameters for writing operation self.W_k_write = self.rnn.init((self.output_dim, self.m_length)) self.b_k_write = self.rnn.init((self.m_length, )) self.W_c_write = self.rnn.init( (self.output_dim, 3)) # 3 = beta, g, gamma see eq. 5, 7, 9 self.b_c_write = shared_zeros((3)) self.W_s_write = self.rnn.init((self.output_dim, self.shift_range)) self.b_s_write = shared_zeros((self.shift_range)) self.C = _circulant(self.n_slots, self.shift_range) self.params = self.rnn.params + [ self.W_e, self.b_e, self.W_a, self.b_a, self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read, self.W_s_read, self.b_s_read, self.W_k_write, self.b_k_write, self.W_s_write, self.b_s_write, self.W_c_write, self.b_c_write, self.M, self.init_h, self.init_wr, self.init_ww ] if self.inner_rnn == 'lstm': self.init_c = shared_zeros((self.output_dim)) self.params = self.params + [ self.init_c, ]
def get_param_updates(params, grads, lr, method=None, **kwargs): rho = 0.95 epsilon = 1e-6 accumulators = [shared_zeros(p.get_value().shape) for p in params] updates = [] if 'constraint' in kwargs: constraint = kwargs['constraint'] else: constraint = None if method == 'adadelta': print "Using ADADELTA" delta_accumulators = [ shared_zeros(p.get_value().shape) for p in params ] for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators): new_a = rho * a + (1 - rho) * g**2 # update accumulator # use the new accumulator and the *old* delta_accumulator update = g * T.sqrt(d_a + epsilon) / T.sqrt(new_a + epsilon) new_p = p - lr * update # update delta_accumulator new_d_a = rho * d_a + (1 - rho) * update**2 updates.append((p, new_p)) updates.append((a, new_a)) updates.append((d_a, new_d_a)) elif method == 'adagrad': print "Using ADAGRAD" for p, g, a in zip(params, grads, accumulators): new_a = a + g**2 # update accumulator new_p = p - lr * g / T.sqrt(new_a + epsilon) updates.append((p, new_p)) # apply constraints updates.append((a, new_a)) elif method == 'momentum': # Default print "Using MOMENTUM" momentum = kwargs['momentum'] for param, gparam in zip(params, grads): param_update = theano.shared(param.get_value() * 0., broadcastable=param.broadcastable) gparam_constrained = maxnorm_constraint(gparam) param_update_update = momentum * param_update + ( 1. - momentum) * gparam_constrained updates.append((param, param - param_update * lr)) updates.append((param_update, param_update_update)) else: # Default print "Using DEFAULT" for param, gparam in zip(params, grads): param_update = maxnorm_constraint(gparam) updates.append((param, param - param_update * lr)) # apply constraints on self.weights update # assumes that updates[0] corresponds to self.weights param if constraint != None: updates[0] = (updates[0][0], constraint(updates[0][1])) return updates