def __init__(self, input_dim, output_dim=128, init= 'uniform', inner_init='glorot_normal', activation='softplus', inner_activation='hard_sigmoid', gate_activation= 'tanh', weights=None, truncate_gradient=-1, return_sequences=False): super(SGU, self).__init__() self.input_dim = input_dim self.output_dim = output_dim self.truncate_gradient = truncate_gradient self.return_sequences = return_sequences self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) self.gate_activation = activations.get(gate_activation) self.input = TT.tensor3() self.W = self.init((self.input_dim, self.output_dim)) self.U = self.inner_init((self.output_dim, self.output_dim)) self.b = shared_zeros((self.output_dim)) self.W_gate = self.init((self.input_dim, self.output_dim)) self.b_gate = shared_zeros((self.output_dim)) self.U_gate = self.inner_init((self.output_dim, self.output_dim)) self.params = [ self.W, self.U, self.b, self.W_gate, self.b_gate, self.U_gate ] if weights is not None: self.set_weights(weights)
def __init__(self, output_dim, init='uniform', inner_init='orthogonal', forget_bias_init='one', activation='tanh', inner_activation='hard_sigmoid', U_init = 'identity', v_init = 0.1, b_init = 0, weights=None, truncate_gradient=-1, return_sequences=False, input_dim=None, input_length=None, **kwargs): self.output_dim = output_dim self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.forget_bias_init = initializations.get(forget_bias_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) self.truncate_gradient = truncate_gradient self.return_sequences = return_sequences self.initial_weights = weights self.U_init = U_init self.v_init = v_init self.b_init = b_init self.input_dim = input_dim self.input_length = input_length if self.input_dim: kwargs['input_shape'] = (self.input_length, self.input_dim, self.input_dim_c) super(LSTM_td, self).__init__(**kwargs)
def __init__(self, nb_filter, filter_length, direction='Down', init='glorot_uniform', inner_init='orthogonal', forget_bias_init='one', activation='tanh', inner_activation='hard_sigmoid', border_mode="same", sub_sample=(1, 1), W_regularizer=None, U_regularizer=None, b_regularizer=None, dropout_W=0., dropout_U=0., **kwargs): self.nb_filter = nb_filter self.filter_length = filter_length self.border_mode = border_mode self.subsample = sub_sample self.direction = direction self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.forget_bias_init = initializations.get(forget_bias_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) self.W_regularizer = regularizers.get(W_regularizer) self.U_regularizer = regularizers.get(U_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.dropout_W, self.dropout_U = dropout_W, dropout_U kwargs["nb_filter"] = nb_filter kwargs["filter_length"] = filter_length if self.dropout_W or self.dropout_U: self.uses_learning_phase = True super(DiagLSTM, self).__init__(**kwargs)
def __init__(self, input_dim, output_dim=128, train_init_cell=True, train_init_h=True, init='glorot_uniform', inner_init='orthogonal', forget_bias_init='one', input_activation='tanh', gate_activation='hard_sigmoid', output_activation='tanh', weights=None, truncate_gradient=-1, return_sequences=False): super(LSTMLayer, self).__init__() self.input_dim = input_dim self.output_dim = output_dim self.truncate_gradient = truncate_gradient self.return_sequences = return_sequences self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.forget_bias_init = initializations.get(forget_bias_init) self.input_activation = activations.get(input_activation) self.gate_activation = activations.get(gate_activation) self.output_activation = activations.get(output_activation) self.input = T.tensor3() self.time_range = None W_z = self.init((self.input_dim, self.output_dim)).get_value(borrow=True) R_z = self.inner_init((self.output_dim, self.output_dim)).get_value(borrow=True) # self.b_z = shared_zeros(self.output_dim) W_i = self.init((self.input_dim, self.output_dim)).get_value(borrow=True) R_i = self.inner_init((self.output_dim, self.output_dim)).get_value(borrow=True) # self.b_i = shared_zeros(self.output_dim) W_f = self.init((self.input_dim, self.output_dim)).get_value(borrow=True) R_f = self.inner_init((self.output_dim, self.output_dim)).get_value(borrow=True) # self.b_f = self.forget_bias_init(self.output_dim) W_o = self.init((self.input_dim, self.output_dim)).get_value(borrow=True) R_o = self.inner_init((self.output_dim, self.output_dim)).get_value(borrow=True) # self.b_o = shared_zeros(self.output_dim) self.h_m1 = shared_zeros(shape=(1, self.output_dim), name='h0') self.c_m1 = shared_zeros(shape=(1, self.output_dim), name='c0') W = np.vstack((W_z[np.newaxis, :, :], W_i[np.newaxis, :, :], W_f[np.newaxis, :, :], W_o[np.newaxis, :, :])) # shape = (4, input_dim, output_dim) R = np.vstack((R_z[np.newaxis, :, :], R_i[np.newaxis, :, :], R_f[np.newaxis, :, :], R_o[np.newaxis, :, :])) # shape = (4, output_dim, output_dim) self.W = theano.shared(W, name='Input to hidden weights (zifo)', borrow=True) self.R = theano.shared(R, name='Recurrent weights (zifo)', borrow=True) self.b = theano.shared(np.zeros(shape=(4, self.output_dim), dtype=theano.config.floatX), name='bias', borrow=True) self.params = [self.W, self.R] if train_init_cell: self.params.append(self.c_m1) if train_init_h: self.params.append(self.h_m1) if weights is not None: self.set_weights(weights)
def __init__(self, input_dim, states_dim, causes_dim, init='glorot_uniform', inner_init='orthogonal', activation='sigmoid', gate_activation='sigmoid', weights=None, return_mode='states', truncate_gradient=-1, return_sequences=False): super(FDPCN, self).__init__() self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.input_dim = input_dim self.states_dim = states_dim self.causes_dim = causes_dim self.truncate_gradient = truncate_gradient self.activation = activations.get(activation) self.gate_activation = activations.get(gate_activation) self.return_sequences = return_sequences self.return_mode = return_mode self.input = T.tensor3() self.I2S = self.init((self.input_dim, self.states_dim)) self.S2S = self.inner_init((self.states_dim, self.states_dim)) self.Sb = shared_zeros((self.states_dim)) self.S2C = self.init((self.states_dim, self.causes_dim)) self.C2C = self.inner_init((self.causes_dim, self.causes_dim)) self.Cb = shared_zeros((self.causes_dim)) self.CbS = shared_zeros((self.states_dim)) self.C2S = self.init((self.causes_dim, self.states_dim)) self.params = [self.I2S, self.S2S, self.Sb, self.C2S, self.C2C, self.Cb, self.S2C, self.CbS] if weights is not None: self.set_weights(weights)
def __init__(self, weights=None, axis=-1, momentum=0.9, beta_init='zero', gamma_init='one', **kwargs): """Init a Scale layer. Parameters ---------- weights: Initialization weights. List of 2 Numpy arrays, with shapes: `[(input_shape,), (input_shape,)]` axis: integer, axis along which to normalize in mode 0. For instance, if your input tensor has shape (samples, channels, rows, cols), set axis to 1 to normalize per feature map (channels axis). momentum: momentum in the computation of the exponential average of the mean and standard deviation of the data, for feature-wise normalization. beta_init: name of initialization function for shift parameter (see [initializations](../initializations.md)), or alternatively, Theano/TensorFlow function to use for weights initialization. This parameter is only relevant if you don't pass a `weights` argument. gamma_init: name of initialization function for scale parameter (see [initializations](../initializations.md)), or alternatively, Theano/TensorFlow function to use for weights initialization. This parameter is only relevant if you don't pass a `weights` argument. """ self.momentum = momentum self.axis = axis self.beta_init = initializations.get(beta_init) self.gamma_init = initializations.get(gamma_init) self.initial_weights = weights super(KerasScale, self).__init__(**kwargs)
def __init__(self, output_dim, nb_rows, nb_cols, n_dim = 2, init='glorot_uniform', inner_init='orthogonal', forget_bias_init='one', activation='tanh', inner_activation='hard_sigmoid', weights=None, truncate_gradient=-1, return_sequences=False, input_dim=None, input_length=None, go_backwards=False, **kwargs): self.n_dim = n_dim + 1 self.nb_cols = nb_cols self.nb_rows = nb_rows self.output_dim = 1 #output_dim self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.forget_bias_init = initializations.get(forget_bias_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) self.truncate_gradient = truncate_gradient self.return_sequences = return_sequences self.initial_weights = weights self.go_backwards = go_backwards # Calculate the number of dimensions self.input_dim = input_dim self.input_length = input_length if self.input_dim: kwargs['input_shape'] = (self.input_length, self.input_dim) super(GridLSTM, self).__init__(**kwargs)
def __init__(self, weights=None, axis=-1, momentum = 0.9, beta_init='zero', gamma_init='one', **kwargs): self.momentum = momentum self.axis = axis self.beta_init = initializations.get(beta_init) self.gamma_init = initializations.get(gamma_init) self.initial_weights = weights super(Scale, self).__init__(**kwargs)
def __init__(self, output_dim, init='glorot_uniform', inner_init='orthogonal', forget_bias_init='one', activation='tanh', inner_activation='hard_sigmoid', weights=None, truncate_gradient=-1, input_dim=None, input_length=None, hidden_state=None, batch_size=None, return_sequences = False,decoder=None,decoders=[], remember_state=False, go_backwards=False, **kwargs): self.output_dim = output_dim self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.forget_bias_init = initializations.get(forget_bias_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) self.truncate_gradient = truncate_gradient self.initial_weights = weights self.initial_state = hidden_state self.batch_size = batch_size self.input_dim = input_dim self.input_length = input_length self.remember_state = remember_state self.return_sequences = return_sequences self.go_backwards = go_backwards if self.input_dim: kwargs['input_shape'] = (self.input_length, self.input_dim) super(LSTMEncoder, self).__init__(**kwargs) if decoder is not None: decoders += decoder self.decoders = decoders self.broadcast_state(decoders)# send hidden state to decoders
def __init__(self, output_classes, n_trees=5, n_depth=3, d_init=None, l_init=None, randomize_training=0, name='diff_forest', **kwargs): self.output_classes = output_classes self.n_trees = n_trees self.n_depth = n_depth self.randomize_training = randomize_training self.name = name def norm(scale): return lambda shape, name=None: initializations.uniform(shape, scale=scale, name=name) #Not clear if these are generally good initializations #Or if they are just good for MNIST if d_init is None: self.d_init = norm(1) else: self.d_init = initializations.get(init) if l_init is None: self.l_init = norm(2) else: self.l_init = initializations.get(init) super(DiffForest, self).__init__(**kwargs)
def __init__( self, output_dim, n_experts, init="glorot_uniform", inner_init="orthogonal", activation="tanh", inner_activation="hard_sigmoid", weights=None, truncate_gradient=-1, return_sequences=False, input_dim=None, input_length=None, go_backwards=False, **kwargs ): self.output_dim = output_dim self.n_experts = n_experts self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) self.truncate_gradient = truncate_gradient self.return_sequences = return_sequences self.initial_weights = weights self.go_backwards = go_backwards self.input_dim = input_dim self.input_length = input_length if self.input_dim: kwargs["input_shape"] = (self.input_length, self.input_dim) super(ExpertIIgated, self).__init__(**kwargs)
def __init__(self, output_dim, init='glorot_uniform', inner_init='orthogonal', activation='sigmoid', inner_activation='hard_sigmoid', weights=None, truncate_gradient=-1, return_sequences=False, input_dim=None, input_length=None, go_backwards=False, dropout=0.0, **kwargs): self.output_dim = output_dim self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) self.truncate_gradient = truncate_gradient self.return_sequences = return_sequences self.initial_weights = weights self.go_backwards = go_backwards # for dropout self.p = dropout #dropout rate self.srng = RandomStreams(seed=np.random.randint(10e6)) self.input_dim = input_dim self.input_length = input_length if self.input_dim: kwargs['input_shape'] = (self.input_length, self.input_dim) super(TGRU, self).__init__(**kwargs)
def __init__(self, nb_filter, nb_row, nb_col, init='glorot_uniform', inner_init='orthogonal', forget_bias_init='one', activation='tanh', inner_activation='hard_sigmoid', dim_ordering="tf", border_mode="valid", sub_sample=(1, 1), W_regularizer=None, U_regularizer=None, b_regularizer=None, dropout_W=0., dropout_U=0., **kwargs): self.nb_filter = nb_filter self.nb_row = nb_row self.nb_col = nb_col self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.forget_bias_init = initializations.get(forget_bias_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) self.border_mode = border_mode self.subsample = sub_sample assert dim_ordering in {'tf', "th"}, 'dim_ordering must be in {tf,"th}' self.dim_ordering = dim_ordering kwargs["nb_filter"] = nb_filter kwargs["nb_row"] = nb_row kwargs["nb_col"] = nb_col kwargs["dim_ordering"] = dim_ordering self.W_regularizer = W_regularizer self.U_regularizer = U_regularizer self.b_regularizer = b_regularizer self.dropout_W, self.dropout_U = dropout_W, dropout_U super(LSTMConv2D, self).__init__(**kwargs)
def __init__(self, output_dim, init='glorot_uniform', inner_init='orthogonal', activation='sigmoid', **kwargs): self.output_dim = output_dim self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.activation = activations.get(activation) super(RecTest, self).__init__(**kwargs)
def __init__(self, beta_init='zero', gamma_init='uniform', epsilon=1e-6, mode=0, momentum=0.9, weights=None, **kwargs): self.beta_init = initializations.get(beta_init) self.gamma_init = initializations.get(gamma_init) self.epsilon = epsilon self.mode = mode self.momentum = momentum self.initial_weights = weights super(BatchNormalization, self).__init__(**kwargs)
def __init__(self, periods, input_dim, output_dim=128, init= 'uniform', inner_init='glorot_normal', activation='softplus', inner_activation='hard_sigmoid', gate_activation= 'tanh', weights=None, truncate_gradient=-1, return_sequences=False): super(ClockworkSGU, self).__init__() self.periods = periods self.input_dim = input_dim self.output_dim = output_dim self.truncate_gradient = truncate_gradient self.return_sequences = return_sequences self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) self.gate_activation = activations.get(gate_activation) self.n = self.output_dim // len(self.periods) assert self.output_dim % len(self.periods) == 0 self.input = TT.tensor3() self.W = self.init((self.input_dim, self.output_dim)) self.b = shared_zeros((self.output_dim)) self.W_gate = self.init((self.input_dim, self.output_dim)) self.b_gate = shared_zeros((self.output_dim)) self.clock_h = {} for i, period in enumerate(self.periods): self.clock_h[period] = self.inner_init(( (i + 1) * self.n, self.n )) self.clock_gates = {} for i, period in enumerate(self.periods): self.clock_gates[period] = self.inner_init(( (i + 1) * self.n, self.n )) self.params = [ self.W, self.b, self.W_gate, self.b_gate, ] self.params.extend(self.clock_h.values()) self.params.extend(self.clock_gates.values()) if weights is not None: self.set_weights(weights)
def __init__(self, epsilon=1e-6, axis=-1, momentum=0.9, weights=None, beta_init='zero', gamma_init='one', **kwargs): self.beta_init = initializations.get(beta_init) self.gamma_init = initializations.get(gamma_init) self.epsilon = epsilon self.axis = axis self.momentum = momentum self.initial_weights = weights self.uses_learning_phase = True super(BatchNormalization, self).__init__(**kwargs)
def __init__(self, epsilon=1e-5, momentum=0.9, weights=None, beta_init='zero', gamma_init='normal', **kwargs): self.beta_init = initializations.get(beta_init) self.gamma_init = initializations.get(gamma_init) self.epsilon = epsilon self.momentum = momentum self.initial_weights = weights # self.uses_learning_phase = True self.ema = tf.train.ExponentialMovingAverage(decay=self.momentum) super(Bnorm2D, self).__init__(**kwargs)
def __init__(self, output_dim, init='glorot_uniform', inner_init='orthogonal', forget_bias_init='one', activation='tanh', inner_activation='hard_sigmoid', **kwargs): self.output_dim = output_dim self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.forget_bias_init = initializations.get(forget_bias_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) super(PeepHoleLayer, self).__init__(**kwargs)
def __init__(self, output_dim, context_dim, init='glorot_uniform', inner_init='orthogonal', activation='sigmoid', inner_activation='hard_sigmoid', **kwargs): self.output_dim = output_dim self.context_dim = context_dim self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) super(BiContextLayer, self).__init__(**kwargs)
def __init__(self, output_dim, init='glorot_uniform', inner_init='orthogonal', forget_bias_init='one', activation='tanh', inner_activation='hard_sigmoid', batch_size = 64, feed_state = False, **kwargs): self.output_dim = output_dim self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.forget_bias_init = initializations.get(forget_bias_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) self.batch_size = batch_size self.feed_state = feed_state super(LstmAttentionLayer, self).__init__(**kwargs)
def __init__(self, s2l, truncate_gradient=1, return_mode='all', init='glorot_uniform', inner_init='identity'): super(TSC, self).__init__() self.return_sequences = True self.truncate_gradient = truncate_gradient self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) s2l.return_mode = return_mode self.s2l = s2l self.A = self.inner_init((s2l.output_dim, s2l.output_dim)) self.params = s2l.params # + [self.A, ] self.input = T.tensor3()
def __init__(self, output_dim, depth=1, readout=False, dropout=.5, init='glorot_uniform', inner_init='orthogonal', forget_bias_init='one', activation='tanh', inner_activation='hard_sigmoid', **kwargs): self.output_dim = output_dim self.depth = depth self.readout = readout self.dropout = dropout self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.forget_bias_init = initializations.get(forget_bias_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) self._kwargs = kwargs super(DeepLSTM, self).__init__(**kwargs)
def __init__(self, input_dim, output_dim, init='uniform', truncate_gradient=-1, gamma=.1, n_steps=10, W_regularizer=None, activity_regularizer=None, **kwargs): self.init = initializations.get(init) self.input_dim = input_dim self.output_dim = output_dim self.gamma = gamma self.n_steps = n_steps self.truncate_gradient = truncate_gradient self.activation = lambda x: .5*(1 + T.exp(-x)) self.input = T.matrix() self.W = self.init((self.output_dim, self.input_dim)) self.params = [self.W] self.regularizers = [] if W_regularizer: W_regularizer.set_param(self.W) self.regularizers.append(W_regularizer) if activity_regularizer: activity_regularizer.set_layer(self) self.regularizers.append(activity_regularizer) kwargs['input_shape'] = (None, self.input_dim) super(VarianceCoding, self).__init__(**kwargs)
def __init__(self, input_dim, output_dim, init='glorot_uniform', activation='linear', weights=None, W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None): self.input_dim = input_dim self.output_dim = output_dim self.init = initializations.get(init) self.activation = activations.get(activation) ''' self.W_regularizer = regularizers.get(W_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.W_constraint = constraints.get(W_constraint) self.b_constraint = constraints.get(b_constraint) self.constraints = [self.W_constraint, self.b_constraint] self.initial_weights = weights ''' #super(TimeDistributedDense, self).__init__(**kwargs) #def build(self): self.W = self.init((self.input_dim, self.output_dim)) self.b = K.zeros((self.output_dim,)) self.params = [self.W, self.b] '''
def __init__(self, input_dim, output_dim, init='glorot_uniform', activation='linear', truncate_gradient=-1, gamma=.1, n_steps=10, return_reconstruction=False, W_regularizer=None, activity_regularizer=None, **kwargs): self.init = initializations.get(init) self.input_dim = input_dim self.output_dim = output_dim self.gamma = gamma self.n_steps = n_steps self.truncate_gradient = truncate_gradient self.activation = activations.get(activation) self.return_reconstruction = return_reconstruction self.input = T.matrix() self.W = self.init((self.output_dim, self.input_dim)) self.params = [self.W, ] self.regularizers = [] if W_regularizer: W_regularizer.set_param(self.W) self.regularizers.append(W_regularizer) if activity_regularizer: activity_regularizer.set_layer(self) self.regularizers.append(activity_regularizer) kwargs['input_shape'] = (self.input_dim,) super(SparseCoding, self).__init__(**kwargs)
def __init__(self, s2l, truncate_gradient=1, return_mode='all', init='glorot_uniform', inner_init='identity', **kwargs): self.return_sequences = True self.truncate_gradient = truncate_gradient self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) s2l.return_mode = return_mode self.s2l = s2l self.A = self.inner_init((s2l.output_dim, s2l.output_dim)) self.params = s2l.params # + [self.A, ] self.input = T.tensor3() kwargs['input_shape'] = (None, None, self.s2l.input_dim) super(VarianceCoding, self).__init__(**kwargs)
def __init__(self, input_dim, hidden_dim, init='glorot_uniform', activation='linear', weights=None, corruption_level=0.3): self.init = initializations.get(init) self.activation = activations.get(activation) self.input_dim = input_dim self.hidden_dim = hidden_dim self.output_dim = input_dim self.input = T.matrix() self.W = self.init((self.input_dim, self.hidden_dim)) self.b = shared_zeros((self.hidden_dim)) self.b_prime = shared_zeros((self.input_dim)) numpy_rng = np.random.RandomState(123) self.theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.params = [self.W, self.b, self.b_prime] self.corruption_level = corruption_level if weights is not None: self.set_weights(weights)
def __init__(self, output_dim, init='glorot_uniform', activation='linear', weights=None, W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None, input_dim=None, input_length1=None, input_length2=None, **kwargs): self.output_dim = output_dim self.init = initializations.get(init) self.activation = activations.get(activation) self.W_regularizer = regularizers.get(W_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.W_constraint = constraints.get(W_constraint) self.b_constraint = constraints.get(b_constraint) self.constraints = [self.W_constraint, self.b_constraint] self.initial_weights = weights self.input_dim = input_dim self.input_length1 = input_length1 self.input_length2 = input_length2 if self.input_dim: kwargs['input_shape'] = (self.input_length1, self.input_length2, self.input_dim) self.input = K.placeholder(ndim=4) super(HigherOrderTimeDistributedDense, self).__init__(**kwargs)
def __init__(self, prototype, transition_net, truncate_gradient=-1, return_mode='reconstruction', init='glorot_uniform', **kwargs): self.return_sequences = True self.init = initializations.get(init) self.prototype = prototype self.W = prototype.W # Sparse coding parameter I - Wx self.regularizers = prototype.regularizers self.activation = prototype.activation self.tnet = transition_net try: self.is_conv = False self.input_dim = prototype.input_dim self.output_dim = prototype.output_dim self.A = self.init(( self.output_dim, self.output_dim)) # Predictive transition x_t - Ax_t-1 self.input = T.tensor3() except: self.is_conv = True self.nb_filter = prototype.nb_filter self.stack_size = prototype.stack_size self.nb_row = prototype.nb_row self.nb_col = prototype.nb_col self.A = self.init(self.W.get_value().shape) self.input = T.TensorType(floatX, (False,)*5)() self.params = prototype.params # + [self.A, ] self.truncate_gradient = truncate_gradient self.return_mode = return_mode kwargs['input_shape'] = (None,) + self.prototype.input_shape super(TemporalSparseCoding, self).__init__(**kwargs)
def __init__(self, output_dim, init='glorot_uniform', activation='linear', weights=None, W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None, input_output_mat=None, group_gene_dict=None, bias=True, input_dim=None, **kwargs): self.init = initializations.get(init) self.activation = activations.get(activation) self.output_dim = output_dim self.input_dim = input_dim self.input_output_mat = input_output_mat self.group_gene_dict = group_gene_dict #print self.input_output_mat if self.input_output_mat is not None: self.output_dim = self.input_output_mat.shape[1] #print 'input_dim: ',self.input_dim #print 'output_dim: ',self.output_dim self.bias = bias self.initial_weights = weights self.input_spec = [InputSpec(ndim=2)] self.W_regularizer = regularizers.get(W_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.W_constraint = constraints.get(W_constraint) self.b_constraint = constraints.get(b_constraint) if self.input_dim: kwargs['input_shape'] = (self.input_dim, ) super(MyLayer, self).__init__(**kwargs)
def __init__(self, downsampling_factor=10, init='glorot_uniform', activation='linear', weights=None, W_regularizer=None, activity_regularizer=None, W_constraint=None, input_dim=None, **kwargs): self.downsampling_factor = downsampling_factor self.init = initializations.get(init) self.activation = activations.get(activation) self.W_regularizer = regularizers.get(W_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.W_constraint = constraints.get(W_constraint) self.constraints = [self.W_constraint] self.initial_weights = weights self.input_dim = input_dim if self.input_dim: kwargs['input_shape'] = (self.input_dim,) self.input_spec = [InputSpec(ndim=4)] super(EltWiseProduct, self).__init__(**kwargs)
def __init__(self, output_dim, token_dim, knowledge_dim, knowledge_length, attention_init='uniform', attention_activation='tanh', **kwargs): """ output_dim (int): Dimensionality of output (same as LSTM) token_dim (int): Input dimensionality of token embeddings knowledge_dim (int): Input dimensionality of background info knowledge_length (int): Number of units of background information provided per token attention_init (str): Initialization heuristic for attention scorer attention_activation (str): Activation used at hidden layer in the attention MLP """ self.token_dim = token_dim self.knowledge_dim = knowledge_dim self.knowledge_length = knowledge_length self.attention_init = initializations.get(attention_init) self.attention_activation = activations.get(attention_activation) # LSTM's constructor expects output_dim. So pass it along. kwargs['output_dim'] = output_dim super(KnowledgeBackedLSTM, self).__init__(**kwargs) # This class' grand parent (Recurrent) would have set ndim (number of # input dimensions) to 3. Let's change that to 4. self.input_spec = [InputSpec(ndim=4)] if self.consume_less == 'cpu': # Keras' implementation of LSTM precomputes the inputs to all gates # to save CPU. However, in this implementation, part of the input is # a weighted average of the background knowledge, with the weights being # a function of the output of the previous time step. So the # precomputation cannot be done, making consume_less = cpu meaningless. warnings.warn( "Current implementation does not support consume_less=cpu. \ Ignoring the setting.") self.consume_less = "mem"
def __init__(self, nb_filter, nb_row, nb_col, init='glorot_uniform', activation='linear', weights=None, border_mode='valid', subsample=(1, 1), W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None, **kwargs): if border_mode not in {'valid', 'full', 'same'}: raise Exception( 'Invalid border mode for TimeDistributedConvolution2D:', border_mode) self.nb_filter = nb_filter self.nb_row = nb_row self.nb_col = nb_col self.init = initializations.get(init) self.activation = activations.get(activation) self.border_mode = border_mode self.subsample = tuple(subsample) self.W_regularizer = regularizers.get(W_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.W_constraint = constraints.get(W_constraint) self.b_constraint = constraints.get(b_constraint) self.constraints = [self.W_constraint, self.b_constraint] self.initial_weights = weights super(TimeDistributedConvolution2D, self).__init__(**kwargs)
def __init__(self, output_dim, window_size=2, return_sequences=False, go_backwards=False, stateful=False, unroll=False, subsample_length=1, init='uniform', activation='tanh', W_regularizer=None, b_regularizer=None, W_constraint=None, b_constraint=None, dropout=0, weights=None, bias=True, input_dim=None, input_length=None, **kwargs): self.return_sequences = return_sequences self.go_backwards = go_backwards self.stateful = stateful self.unroll = unroll self.output_dim = output_dim self.window_size = window_size self.subsample = (subsample_length, 1) self.bias = bias self.init = initializations.get(init) self.activation = activations.get(activation) self.W_regularizer = regularizers.get(W_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.W_constraint = constraints.get(W_constraint) self.b_constraint = constraints.get(b_constraint) self.dropout = dropout if self.dropout is not None and 0. < self.dropout < 1.: self.uses_learning_phase = True self.initial_weights = weights self.supports_masking = True self.input_spec = [InputSpec(ndim=3)] self.input_dim = input_dim self.input_length = input_length if self.input_dim: kwargs['input_shape'] = (self.input_length, self.input_dim) super(QRNN, self).__init__(**kwargs)
def __init__(self, W_regularizer=None, b_regularizer=None, W_constraint=None, b_constraint=None, bias=True, **kwargs): """ Keras Layer that implements an Content Attention mechanism. Supports Masking. """ self.supports_masking = True self.init = initializations.get('glorot_uniform') self.W_regularizer = regularizers.get(W_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.W_constraint = constraints.get(W_constraint) self.b_constraint = constraints.get(b_constraint) self.bias = bias super(Attention, self).__init__(**kwargs)
def __init__(self, input_dim, output_dim, init='uniform', input_length=None, W_regularizer=None, activity_regularizer=None, W_constraint=None, mask_zero=False, weights=None, dropout=0., **kwargs): self.input_dim = input_dim self.output_dim = output_dim self.init = initializations.get(init) self.input_length = input_length self.mask_zero = mask_zero self.dropout = dropout self.W_constraint = constraints.get(W_constraint) self.constraints = [self.W_constraint] self.W_regularizer = regularizers.get(W_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) if 0. < self.dropout < 1.: self.uses_learning_phase = True self.initial_weights = weights kwargs['input_shape'] = (self.input_dim,) kwargs['input_dtype'] = 'int32' super(FixedEmbedding, self).__init__(**kwargs)
def __init__(self, input_dim, output_dim, init='glorot_uniform', activation='linear', truncate_gradient=-1, gamma=.1, n_steps=10, return_reconstruction=False, W_regularizer=None, activity_regularizer=None, **kwargs): self.init = initializations.get(init) self.input_dim = input_dim self.output_dim = output_dim self.gamma = gamma self.n_steps = n_steps self.truncate_gradient = truncate_gradient self.activation = activations.get(activation) self.return_reconstruction = return_reconstruction self.input = T.matrix() self.W = self.init((self.output_dim, self.input_dim)) self.params = [ self.W, ] self.regularizers = [] if W_regularizer: W_regularizer.set_param(self.W) self.regularizers.append(W_regularizer) if activity_regularizer: activity_regularizer.set_layer(self) self.regularizers.append(activity_regularizer) kwargs['input_shape'] = (self.input_dim, ) super(SparseCoding, self).__init__(**kwargs)
def __init__(self, prototype, transition_net, truncate_gradient=-1, return_mode='reconstruction', init='glorot_uniform', **kwargs): self.return_sequences = True self.init = initializations.get(init) self.prototype = prototype self.W = prototype.W # Sparse coding parameter I - Wx self.regularizers = prototype.regularizers self.activation = prototype.activation self.tnet = transition_net try: self.is_conv = False self.input_dim = prototype.input_dim self.output_dim = prototype.output_dim self.A = self.init( (self.output_dim, self.output_dim)) # Predictive transition x_t - Ax_t-1 self.input = T.tensor3() except: self.is_conv = True self.nb_filter = prototype.nb_filter self.stack_size = prototype.stack_size self.nb_row = prototype.nb_row self.nb_col = prototype.nb_col self.A = self.init(self.W.get_value().shape) self.input = T.TensorType(floatX, (False, ) * 5)() self.params = prototype.params # + [self.A, ] self.truncate_gradient = truncate_gradient self.return_mode = return_mode kwargs['input_shape'] = (None, ) + self.prototype.input_shape super(TemporalSparseCoding, self).__init__(**kwargs)
def __init__(self, init='glorot_uniform', n_rel=5, mean=1, input_dim=None, output_dim=None, class_dim=None, activation='linear', weights=None, W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None, bias=True, **kwargs): self.init = initializations.get(init) self.activation = activations.get(activation) self.n_rel = n_rel self.mean = mean #self.prefEffect = prefEffect ### MD+DEV+YANG ---- additional Variables self.W_regularizer = regularizers.get(W_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.W_constraint = constraints.get(W_constraint) self.b_constraint = constraints.get(b_constraint) self.bias = bias self.initial_weights = weights self.input_spec = [InputSpec(ndim=2)] self.input_dim = input_dim self.output_dim = output_dim self.class_dim = class_dim #MD if self.input_dim: kwargs['input_shape'] = (self.input_dim, ) super(GraphDense, self).__init__(**kwargs)
def __init__(self, input_dim, proj_dim=128, init='uniform', activation='sigmoid', weights=None): super(WordContextProduct, self).__init__() self.input_dim = input_dim self.proj_dim = proj_dim self.init = initializations.get(init) self.activation = activations.get(activation) self.input = T.imatrix() # two different embeddings for pivot word and its context # because p(w|c) != p(c|w) self.W_w = self.init((input_dim, proj_dim)) self.W_c = self.init((input_dim, proj_dim)) self.params = [self.W_w, self.W_c] if weights is not None: self.set_weights(weights)
def __init__(self, W_regularizer=None, u_regularizer=None, b_regularizer=None, W_constraint=None, u_constraint=None, b_constraint=None, bias=True, **kwargs): self.supports_masking = True self.init = initializations.get('glorot_uniform') self.W_regularizer = regularizers.get(W_regularizer) self.u_regularizer = regularizers.get(u_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.W_constraint = constraints.get(W_constraint) self.u_constraint = constraints.get(u_constraint) self.b_constraint = constraints.get(b_constraint) self.bias = bias super(AttentionWithContext, self).__init__(**kwargs)
def __init__(self, output_dim, window_size=3, subsample_length=1, init='uniform', activation='linear', W_regularizer=None, b_regularizer=None, W_constraint=None, b_constraint=None, weights=None, bias=True, input_dim=None, input_length=None, **kwargs): self.output_dim = output_dim self.window_size = window_size self.subsample = (subsample_length, 1) self.bias = bias self.init = initializations.get(init) self.activation = activations.get(activation) self.W_regularizer = regularizers.get(W_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.W_constraint = constraints.get(W_constraint) self.b_constraint = constraints.get(b_constraint) self.initial_weights = weights self.supports_masking = False self.input_spec = [InputSpec(ndim=3)] self.input_dim = input_dim self.input_length = input_length if self.input_dim: kwargs['input_shape'] = (self.input_length, self.input_dim) super(GCNN, self).__init__(**kwargs)
def __init__(self, output_dim, support=1, featureless=False, init='glorot_uniform', activation='linear', weights=None, W_regularizer=None, num_bases=-1, b_regularizer=None, bias=False, dropout=0., **kwargs): self.init = initializers.get(init) self.activation = activations.get(activation) self.output_dim = output_dim # number of features per node self.support = support # filter support / number of weights self.featureless = featureless # use/ignore input features self.dropout = dropout assert support >= 1 self.W_regularizer = regularizers.get(W_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.bias = bias self.initial_weights = weights self.num_bases = num_bases # these will be defined during build() self.input_dim = None self.W = None self.W_comp = None self.b = None self.num_nodes = None super(GraphConvolution, self).__init__(**kwargs)
def __init__(self, output_dim, batch_size, init='glorot_uniform', activation='tanh', weights=None, input_dim=None, regularizer_scale=1, prior_mean=0, prior_logsigma=1, **kwargs): self.prior_mean = prior_mean self.prior_logsigma = prior_logsigma self.regularizer_scale = regularizer_scale self.batch_size = batch_size self.init = initializations.get(init) self.activation = activations.get(activation) self.output_dim = output_dim self.initial_weights = weights self.input_dim = input_dim if self.input_dim: kwargs['input_shape'] = (self.input_dim, ) self.input = K.placeholder(ndim=2) super(VariationalDense, self).__init__(**kwargs)
def __init__(self, first_dim, last_dim, init='glorot_uniform', activation=None, weights=None, W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None, bias=True, input_dim=None, **kwargs): self.init = initializations.get(init) self.activation = activations.get(activation) self.input_dim = input_dim self.first_dim = first_dim self.last_dim = last_dim self.W_regularizer = regularizers.get(W_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.W_constraint = constraints.get(W_constraint) self.b_constraint = constraints.get(b_constraint) self.bias = bias self.initial_weights = weights self.input_spec = [InputSpec(ndim=2)] if self.input_dim: kwargs['input_shape'] = (self.input_dim, ) super(Dense3D, self).__init__(**kwargs)
def __init__(self, input_dim, output_dim, init='glorot_uniform', activation='linear', weights=None, W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None): self.input_dim = input_dim self.output_dim = output_dim self.init = initializations.get(init) self.activation = activations.get(activation) ''' self.W_regularizer = regularizers.get(W_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.W_constraint = constraints.get(W_constraint) self.b_constraint = constraints.get(b_constraint) self.constraints = [self.W_constraint, self.b_constraint] self.initial_weights = weights ''' #super(TimeDistributedDense, self).__init__(**kwargs) #def build(self): self.W = self.init((self.input_dim, self.output_dim)) self.b = K.zeros((self.output_dim, )) self.params = [self.W, self.b] '''
def __init__(self, init='glorot_uniform', transform_bias=-2, n_rel=5, mean=1, activation='linear', weights=None, W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None, bias=True, input_dim=None, **kwargs): self.init = initializations.get(init) self.transform_bias = transform_bias self.activation = activations.get(activation) self.n_rel = n_rel self.mean = mean self.W_regularizer = regularizers.get(W_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.W_constraint = constraints.get(W_constraint) self.b_constraint = constraints.get(b_constraint) self.bias = bias self.initial_weights = weights self.input_spec = [InputSpec(ndim=2)] self.input_dim = input_dim if self.input_dim: kwargs['input_shape'] = (self.input_dim, ) super(GraphHighway, self).__init__(**kwargs)
def __init__(self, nb_classes, frequency_table=None, mode=0, init='glorot_uniform', weights=None, W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None, bias=True, verbose=False, **kwargs): ''' # Arguments: nb_classes: Number of classes. frequency_table: list. Frequency of each class. More frequent classes will have shorter huffman codes. mode: integer. One of [0, 1] verbose: boolean. Set to true to see the progress of building huffman tree. ''' self.nb_classes = nb_classes if frequency_table is None: frequency_table = [1] * nb_classes self.frequency_table = frequency_table self.mode = mode self.init = initializations.get(init) self.W_regularizer = regularizers.get(W_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.W_constraint = constraints.get(W_constraint) self.b_constraint = constraints.get(b_constraint) self.bias = bias self.initial_weights = weights self.verbose = verbose super(Huffmax, self).__init__(**kwargs)
def __init__(self, value, init='glorot_uniform', regularizer=None, constraint=None, trainable=True, name=None): if type(value) == int: value = (value, ) if type(value) in [tuple, list]: if type(init) == str: init = initializations.get(init) self.value = init(value, name=name) elif 'numpy' in str(type(value)): self.value = K.variable(value, name=name) else: self.value = value if type(regularizer) == str: regularizer = regularizers.get(regularizer) if type(constraint) == str: constraint = constants.get(constraint) self.regularizer = regularizer self.constraint = constraint self.trainable = trainable
from keras.preprocessing.sequence import pad_sequences from keras.regularizers import l2 from sklearn.metrics import roc_curve, roc_auc_score import sys sys.path.append('/home/huangzhengjie/quora_pair/') from birnn import MaxPoolingOverTime, TimeReverse, DotProductLayer, \ MaskBilinear, MaskMeanPoolingOverTime, MaskSumPoolingOverTime, \ ElementWiseConcat, StopGradientLayer max_sequence_length = 30 dropout_rate = 0.5 vocab_size = 100000 weight_initializer = K_init.get('normal', scale=0.1) def siamese_conv(pretrain=False): input = Input(shape=(max_sequence_length, ), dtype='int32') input_mask = Input(shape=(max_sequence_length, ), dtype='bool') print input.get_shape() embedding_dim = 300 with tf.device('/cpu:0'): if pretrain: embedding_input = Embedding( vocab_size, embedding_dim, weights=[embedding_matrix], trainable=True, mask_zero=False,
def __init__(self, mem_size, vec_dim, unk_spk='NO', **kwargs): self.mem_size = mem_size self.vec_dim = vec_dim self.unk_spk = unk_spk self.init = initializations.get('zero') super(SpkLifeLongMemory, self).__init__(**kwargs)
def __init__(self, input_dim, output_dim=128, init='glorot_uniform', inner_init='orthogonal', activation='tanh', inner_activation='hard_sigmoid', weights=None, truncate_gradient=-1, output_mode='sum'): super(BiDirectionLSTM, self).__init__() self.input_dim = input_dim self.output_dim = output_dim self.truncate_gradient = truncate_gradient self.output_mode = output_mode # output_mode is either sum or concatenate self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) self.input = T.tensor3() # forward weights self.W_i = self.init((self.input_dim, self.output_dim)) self.U_i = self.inner_init((self.output_dim, self.output_dim)) self.b_i = shared_zeros((self.output_dim)) self.W_f = self.init((self.input_dim, self.output_dim)) self.U_f = self.inner_init((self.output_dim, self.output_dim)) self.b_f = shared_zeros((self.output_dim)) self.W_c = self.init((self.input_dim, self.output_dim)) self.U_c = self.inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim)) self.W_o = self.init((self.input_dim, self.output_dim)) self.U_o = self.inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim)) # backward weights self.Wb_i = self.init((self.input_dim, self.output_dim)) self.Ub_i = self.inner_init((self.output_dim, self.output_dim)) self.bb_i = shared_zeros((self.output_dim)) self.Wb_f = self.init((self.input_dim, self.output_dim)) self.Ub_f = self.inner_init((self.output_dim, self.output_dim)) self.bb_f = shared_zeros((self.output_dim)) self.Wb_c = self.init((self.input_dim, self.output_dim)) self.Ub_c = self.inner_init((self.output_dim, self.output_dim)) self.bb_c = shared_zeros((self.output_dim)) self.Wb_o = self.init((self.input_dim, self.output_dim)) self.Ub_o = self.inner_init((self.output_dim, self.output_dim)) self.bb_o = shared_zeros((self.output_dim)) self.params = [ self.W_i, self.U_i, self.b_i, self.W_c, self.U_c, self.b_c, self.W_f, self.U_f, self.b_f, self.W_o, self.U_o, self.b_o, self.Wb_i, self.Ub_i, self.bb_i, self.Wb_c, self.Ub_c, self.bb_c, self.Wb_f, self.Ub_f, self.bb_f, self.Wb_o, self.Ub_o, self.bb_o, ] if weights is not None: self.set_weights(weights)
def __init__(self, **kwargs): self.init = initializations.get('normal') # self.input_spec = [InputSpec(ndim=3)] super(AttLayer, self).__init__(**kwargs)
def __init__(self, init='zero', alpha=None, weights=None, **kwargs): self.init = initializations.get(init) self.initial_weights = weights self.initial_alpha = alpha self.axis = 1 super(PReLU, self).__init__(**kwargs)
def __init__(self, input_dim, output_dim=128, train_init_cell=True, train_init_h=True, init='glorot_uniform', inner_init='orthogonal', forget_bias_init='one', input_activation='tanh', gate_activation='hard_sigmoid', output_activation='tanh', weights=None, truncate_gradient=-1, return_sequences=False): super(LSTMLayerV0, self).__init__() self.input_dim = input_dim self.output_dim = output_dim self.truncate_gradient = truncate_gradient self.return_sequences = return_sequences self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.forget_bias_init = initializations.get(forget_bias_init) self.input_activation = activations.get(input_activation) self.gate_activation = activations.get(gate_activation) self.output_activation = activations.get(output_activation) self.input = T.tensor3() W_z = self.init( (self.input_dim, self.output_dim)).get_value(borrow=True) R_z = self.inner_init( (self.output_dim, self.output_dim)).get_value(borrow=True) # self.b_z = shared_zeros(self.output_dim) W_i = self.init( (self.input_dim, self.output_dim)).get_value(borrow=True) R_i = self.inner_init( (self.output_dim, self.output_dim)).get_value(borrow=True) # self.b_i = shared_zeros(self.output_dim) W_f = self.init( (self.input_dim, self.output_dim)).get_value(borrow=True) R_f = self.inner_init( (self.output_dim, self.output_dim)).get_value(borrow=True) # self.b_f = self.forget_bias_init(self.output_dim) W_o = self.init( (self.input_dim, self.output_dim)).get_value(borrow=True) R_o = self.inner_init( (self.output_dim, self.output_dim)).get_value(borrow=True) # self.b_o = shared_zeros(self.output_dim) self.h_m1 = shared_zeros(shape=(1, self.output_dim), name='h0') self.c_m1 = shared_zeros(shape=(1, self.output_dim), name='c0') W = np.vstack( (W_z[np.newaxis, :, :], W_i[np.newaxis, :, :], W_f[np.newaxis, :, :], W_o[np.newaxis, :, :])) # shape = (4, input_dim, output_dim) R = np.vstack( (R_z[np.newaxis, :, :], R_i[np.newaxis, :, :], R_f[np.newaxis, :, :], R_o[np.newaxis, :, :])) # shape = (4, output_dim, output_dim) self.W = theano.shared(W, name='Input to hidden weights (zifo)', borrow=True) self.R = theano.shared(R, name='Recurrent weights (zifo)', borrow=True) self.b = theano.shared(np.zeros(shape=(4, self.output_dim), dtype=theano.config.floatX), name='bias', borrow=True) self.params = [self.W, self.R] if train_init_cell: self.params.append(self.c_m1) if train_init_h: self.params.append(self.h_m1) if weights is not None: self.set_weights(weights)
def __init__(self, **kwargs): self.init = initializations.get('normal') super(AttLayer, self).__init__(**kwargs)
def build_model(): reset_session() dropout_rate = 0.2 weight_initializer = K_init.get('normal', scale=0.1) def siamese_conv(pretrain=False): input = Input(shape=(max_sequence_length, ), dtype='int32') input_mask = Input(shape=(max_sequence_length, ), dtype='bool') embedding_dim = 300 with tf.device('/cpu:0'): if pretrain: embedding_input = Embedding( embedding_matrix.shape[0], embedding_dim, weights=[embedding_matrix], trainable=True, mask_zero=False, )(input) else: embedding_input = Embedding( embedding_matrix.shape[0], embedding_dim, trainable=True, init=weight_initializer, mask_zero=False, )(input) cnn_config = [(32, 2), (32, 3), (64, 4), (64, 5), (128, 7)] cnn_output = [] for fs, fl in cnn_config: o1 = Conv1D(fs, fl, activation='relu', border_mode='same')(embedding_input) o1 = MaxPooling1D(pool_length=30, border_mode='valid')(o1) o1 = Flatten()(o1) cnn_output.append(o1) output = Merge(mode='concat', concat_axis=-1)(cnn_output) output = Dense(128, activation='tanh')(output) model = Model(input=[input, input_mask], output=output) return model sen_model = siamese_conv(pretrain=False) sen_1 = Input(shape=(max_sequence_length, ), dtype='int32') sen_1_mask = Input(shape=(max_sequence_length, ), dtype='bool') sen_2 = Input(shape=(max_sequence_length, ), dtype='int32') sen_2_mask = Input(shape=(max_sequence_length, ), dtype='bool') embedding_sen_1 = sen_model([sen_1, sen_1_mask]) embedding_sen_2 = sen_model([sen_2, sen_2_mask]) dense_dim = 300 abs_merge = lambda x: tf.abs(x[0] - x[1]) mul_merge = lambda x: tf.mul(x[0], x[1]) abs_feature = Merge(mode=abs_merge, output_shape=lambda x: x[0])( [embedding_sen_1, embedding_sen_2]) mul_feature = Merge(mode=mul_merge, output_shape=lambda x: x[0])( [embedding_sen_1, embedding_sen_2]) leaks_input = Input(shape=(3, ), dtype='float32') leaks_dense = Dense(50, activation='relu')(leaks_input) feature = Merge(mode='concat', concat_axis=-1)([abs_feature, mul_feature, leaks_dense]) feature = Dropout(dropout_rate)(feature) feature = Dense(64, activation='relu')(feature) feature = Dropout(dropout_rate)(feature) feature = Dense(1, activation='sigmoid')(feature) final_model = Model( input=[sen_1, sen_1_mask, sen_2, sen_2_mask, leaks_input], output=feature) optimizer = Adam(lr=1e-3) final_model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) return final_model
def __init__(self, **kwargs): self.attention = None self.init = initializations.get('normal') self.supports_masking = True super(SelfAttLayer, self).__init__(**kwargs)