def __init__(self, in_dim, hidden_dim, pooling, activation, gates=("sigmoid", "sigmoid", "sigmoid"), prefix="", initializer=default_initializer, dropout=0, verbose=True): if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) super(BiLSTMEncoder, self).__init__(in_dim, hidden_dim, pooling, activation, gates, prefix, initializer, dropout, verbose) self.out_dim = hidden_dim * 2 # Composition Function Weight -- Gates # W [in, forget, output, recurrent] self.W_forward, self.W_forward.name = self.W, prefix + "W_forward" self.W_backward = shared_rand_matrix( (self.hidden_dim * 4, self.in_dim), prefix + 'W_backward', initializer) # U [in, forget, output, recurrent] self.U_forward, self.U_forward.name = self.U, prefix + "U_forward" self.U_backward = shared_rand_matrix( (self.hidden_dim * 4, self.hidden_dim), prefix + 'U_backward', initializer) # b [in, forget, output, recurrent] self.b_forward, self.b_forward.name = self.b, prefix + "b_forward" self.b_backward = shared_zero_matrix((self.hidden_dim * 4, ), prefix + 'b_backward') self.params = [ self.W_forward, self.U_forward, self.b_forward, self.W_backward, self.U_backward, self.b_backward ] self.norm_params = [ self.W_forward, self.U_forward, self.W_backward, self.U_backward ] self.l1_norm = T.sum( [T.sum(T.abs_(param)) for param in self.norm_params]) self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params]) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Pooling methods: %s' % self.pooling) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Input Gate: %s' % self.in_gate.method) logger.debug('Forget Gate: %s' % self.forget_gate.method) logger.debug('Output Gate: %s' % self.out_gate.method) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)
def __init__(self, in_dim, hidden_dim, pooling, activation, prefix="", initializer=default_initializer, dropout=0, verbose=True): super(BiRecurrentEncoder, self).__init__(in_dim, hidden_dim, pooling, activation, prefix, initializer, dropout, verbose) if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) self.out_dim = hidden_dim * 2 # Forward Direction - Backward Direction # Feed-Forward Matrix (hidden, in) self.W_forward = self.W self.W_forward.name = prefix + "W_forward" self.W_backward = shared_rand_matrix( (self.hidden_dim, self.in_dim), prefix + 'W_backward', initializer) # Bias Term (hidden,) self.b_forward = self.b self.b_forward.name = prefix + "b_forward" self.b_backward = shared_zero_matrix((self.hidden_dim, ), prefix + 'b_backward') # Recurrent Matrix (hidden, hidden) self.U_forward = self.U self.U_forward.name = prefix + "U_forward" self.U_backward = shared_rand_matrix( (self.hidden_dim, self.hidden_dim), prefix + 'U_backward', initializer) self.params = [ self.W_forward, self.W_backward, self.U_forward, self.U_backward, self.b_forward, self.b_backward ] self.norm_params = [ self.W_forward, self.W_backward, self.U_forward, self.U_backward ] # L1, L2 Norm self.l1_norm = T.sum( [T.sum(T.abs_(param)) for param in self.norm_params]) self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params]) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Pooling methods: %s' % self.pooling) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)
def __init__(self, in_dim, hidden_dim, pooling, activation, gates=("sigmoid", "sigmoid", "sigmoid"), prefix="", initializer=default_initializer, dropout=0, verbose=True): if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) self.in_dim = in_dim self.hidden_dim = hidden_dim self.out_dim = hidden_dim self.pooling = pooling self.act = Activation(activation) self.in_gate, self.forget_gate, self.out_gate = Activation( gates[0]), Activation(gates[1]), Activation(gates[2]) self.dropout = dropout # W [in, forget, output, recurrent] (4 * hidden, in) self.W = shared_rand_matrix((self.hidden_dim * 4, self.in_dim), prefix + 'W', initializer) # U [in, forget, output, recurrent] (4 * hidden, hidden) self.U = shared_rand_matrix((self.hidden_dim * 4, self.hidden_dim), prefix + 'U', initializer) # b [in, forget, output, recurrent] (4 * hidden,) self.b = shared_zero_matrix((self.hidden_dim * 4, ), prefix + 'b') self.params = [self.W, self.U, self.b] self.l1_norm = T.sum(T.abs_(self.W)) + T.sum(T.abs_(self.U)) self.l2_norm = T.sum(self.W**2) + T.sum(self.U**2) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Pooling methods: %s' % self.pooling) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Input Gate: %s' % self.in_gate.method) logger.debug('Forget Gate: %s' % self.forget_gate.method) logger.debug('Output Gate: %s' % self.out_gate.method) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)
def __init__(self, num_in, initializer=default_initializer): self.W = shared_rand_matrix(shape=(num_in, 1), name="logistic_W", initializer=initializer) self.b = shared_zero_matrix(np.asarray([0]), name='logistic_b') self.params = [self.W, self.b] self.l1_norm = T.sum(T.abs_(self.W)) self.l2_norm = T.sum(self.W**2)
def __init__(self, in_dim, hidden_dim, pooling, activation, prefix="", initializer=default_initializer, dropout=0, verbose=True): if verbose: logger.debug('Building {}...'.format(self.__class__.__name__)) self.in_dim = in_dim self.out_dim = hidden_dim self.hidden_dim = hidden_dim self.pooling = pooling self.dropout = dropout self.act = Activation(activation) # Composition Function Weight # Feed-Forward Matrix (hidden, in) self.W = shared_rand_matrix((self.hidden_dim, self.in_dim), prefix + 'W_forward', initializer) # Bias Term (hidden) self.b = shared_zero_matrix((self.hidden_dim, ), prefix + 'b_forward') # Recurrent Matrix (hidden, hidden) self.U = shared_rand_matrix((self.hidden_dim, self.hidden_dim), prefix + 'U_forward', initializer) self.params = [self.W, self.U, self.b] self.norm_params = [self.W, self.U] # L1, L2 Norm self.l1_norm = T.sum(T.abs_(self.W)) + T.sum(T.abs_(self.U)) self.l2_norm = T.sum(self.W**2) + T.sum(self.U**2) if verbose: logger.debug('Architecture of {} built finished'.format( self.__class__.__name__)) logger.debug('Input dimension: %d' % self.in_dim) logger.debug('Hidden dimension: %d' % self.hidden_dim) logger.debug('Pooling methods: %s' % self.pooling) logger.debug('Activation Func: %s' % self.act.method) logger.debug('Dropout Rate: %f' % self.dropout)
def __init__(self, num_in, num_out, initializer=default_initializer): self.num_in = num_in self.num_out = num_out self.W = shared_rand_matrix(shape=(num_in, num_out), name="softmax_W", initializer=initializer) self.b = shared_zero_matrix((num_out, ), 'softmax_b') self.params = [self.W, self.b] self.l1_norm = T.sum(T.abs_(self.W)) self.l2_norm = T.sum(self.W**2)
def __init__(self, entity_dim, relation_num, k=50, activation='tanh', initializer=default_initializer, prefix='', verbose=True): super(SingleLayerModel, self).__init__() self.k = k self.entity_dim = entity_dim self.relation_num = relation_num # (relation_num, k, entity_dim) self.W_1 = shared_rand_matrix((relation_num, self.k, self.entity_dim), prefix + 'W_1', initializer) # (relation_num, k, entity_dim) self.W_2 = shared_rand_matrix((relation_num, self.k, self.entity_dim), prefix + 'W_2', initializer) # (relation_num, k, ) self.u = shared_rand_matrix((relation_num, self.k, ), prefix + 'u', initializer) self.act = Activation(activation) self.params = [self.W_1, self.W_2, self.u] self.l1_norm = T.sum(T.abs_(self.W_1)) + T.sum(T.abs_(self.W_2)) + T.sum(T.abs_(self.u)) self.l2_norm = T.sum(self.W_1 ** 2) + T.sum(self.W_2 ** 2) + T.sum(self.u ** 2) if verbose: logger.debug('Architecture of Single Layer Model built finished, summarized as below:') logger.debug('Entity Dimension: %d' % self.entity_dim) logger.debug('K Dimension: %d' % self.k) logger.debug('Relation Number: %d' % self.relation_num)
def __init__(self, w=None, size=10000, dim=50, initializer=default_initializer, prefix=""): if w is None: # Random generate Matrix self.size = size self.dim = dim self.W = shared_rand_matrix(shape=(self.size, self.dim), initializer=initializer, name=prefix + 'Embedding') else: self.size = w.shape[0] self.dim = w.shape[1] self.W = shared_matrix(np.array(w, dtype=theano.config.floatX), name=prefix + 'Embedding') self.params = [self.W] self.l1_norm = T.sum( T.abs_(self.W[1:] )) # sum([T.sum(T.abs_(param)) for param in self.params]) self.l2_norm = T.sum( self.W[1:]**2) # sum([T.sum(param ** 2) for param in self.params])
def __init__(self, source_dim, target_dim, initializer=default_initializer, config=None, verbose=True): self.source_dim = source_dim self.target_dim = target_dim self.alpha = config.alpha self.uniform_range = config.uniform_range self.normalize = config.normalize self.weight_rec = config.weight_rec self.weight_sem = config.weight_sem self.weight_l2 = config.weight_l2 self.dropout = config.dropout self.verbose = verbose self.learning_rate = config.optimizer.param["lr"] self.source_encoder = NegativePhraseRAE(self.source_dim, initializer=initializer, normalize=self.normalize, dropout=self.dropout, verbose=self.verbose) self.target_encoder = NegativePhraseRAE(self.target_dim, initializer=initializer, normalize=self.normalize, dropout=self.dropout, verbose=self.verbose) self.source_pos = self.source_encoder.output self.source_neg = self.source_encoder.neg_output self.target_pos = self.target_encoder.output self.target_neg = self.target_encoder.neg_output # Define Bilingual Parameters self.Wsl = shared_rand_matrix(size=(self.target_dim, self.source_dim), name="Wsl", initializer=initializer) self.Wtl = shared_rand_matrix(size=(self.source_dim, self.target_dim), name="Wtl", initializer=initializer) self.bsl = shared_rand_matrix(size=(self.target_dim, ), name="bsl", initializer=initializer) self.btl = shared_rand_matrix(size=(self.source_dim, ), name="btl", initializer=initializer) self.param = [self.Wsl, self.Wtl, self.bsl, self.btl] self.loss_l2 = sum(T.sum(param**2) for param in [self.Wsl, self.Wtl]) * self.weight_sem def sem_distance(p1, w1, b1, p2): transform_p1 = T.tanh(T.dot(w1, p1) + b1) return T.sum((p2 - transform_p1)**2) / 2 def sem_sim_distance(p1, w1, b1, p2): transform_p1 = T.tanh(T.dot(w1, p1) + b1) return cosine_similarity(transform_p1, p2) self.source_pos_sem = sem_distance(self.source_pos, self.Wsl, self.bsl, self.target_pos) self.target_pos_sem = sem_distance(self.target_pos, self.Wtl, self.btl, self.source_pos) self.source_neg_sem = sem_distance(self.source_pos, self.Wsl, self.bsl, self.target_neg) self.target_neg_sem = sem_distance(self.target_pos, self.Wtl, self.btl, self.source_neg) self.source_tar_sim = sem_sim_distance(self.source_pos, self.Wsl, self.bsl, self.target_pos) self.target_src_sim = sem_sim_distance(self.target_pos, self.Wtl, self.btl, self.source_pos) self.max_margin_source = T.maximum( 0.0, self.source_pos_sem - self.source_neg_sem + 1.0) self.max_margin_target = T.maximum( 0.0, self.target_pos_sem - self.target_neg_sem + 1.0) self.loss_sem = self.max_margin_source + self.max_margin_target self.loss_rec = self.source_encoder.loss_rec + self.target_encoder.loss_rec self.loss_l2 = self.loss_l2 + ( self.source_encoder.loss_l2 + self.target_encoder.loss_l2) * self.weight_rec self.loss = self.alpha * self.loss_rec + \ (1 - self.alpha) * self.loss_sem + \ self.loss_l2 self.params = self.source_encoder.params + self.target_encoder.params + self.param self.inputs = [ self.source_encoder.pos_vectors, self.source_encoder.neg_vectors, self.target_encoder.pos_vectors, self.target_encoder.neg_vectors ] self.input_grad = T.grad(self.loss, self.inputs) grads = T.grad(self.loss, self.params) self.updates = OrderedDict() self.single_updates = OrderedDict() self.grad = {} for param, grad in zip(self.params, grads): g = theano.shared( np.asarray(np.zeros_like(param.get_value()), dtype=theano.config.floatX)) self.grad[param] = g self.updates[g] = g + grad self.single_updates[param] = param - grad * self.learning_rate self._compute_result_grad = theano.function( inputs=[ self.source_encoder.pos_vectors, self.source_encoder.seq, self.source_encoder.neg_vectors, self.source_encoder.neg_seq, self.target_encoder.pos_vectors, self.target_encoder.seq, self.target_encoder.neg_vectors, self.target_encoder.neg_seq ], outputs=[ self.alpha * self.loss_rec, (1 - self.alpha) * self.loss_sem, self.loss_l2 ] + self.input_grad, updates=self.updates, allow_input_downcast=True) self._compute_result_grad_single = theano.function( inputs=[ self.source_encoder.pos_vectors, self.source_encoder.seq, self.source_encoder.neg_vectors, self.source_encoder.neg_seq, self.target_encoder.pos_vectors, self.target_encoder.seq, self.target_encoder.neg_vectors, self.target_encoder.neg_seq ], outputs=[ self.alpha * self.loss_rec, (1 - self.alpha) * self.loss_sem, self.loss_l2 ] + self.input_grad, updates=self.single_updates, allow_input_downcast=True) self.get_source_output = theano.function( inputs=[self.source_encoder.pos_vectors, self.source_encoder.seq], outputs=self.source_encoder.output, allow_input_downcast=True) self.get_target_output = theano.function( inputs=[self.target_encoder.pos_vectors, self.target_encoder.seq], outputs=self.target_encoder.output, allow_input_downcast=True) self.get_sem_distance = theano.function( inputs=[ self.source_encoder.pos_vectors, self.source_encoder.seq, self.target_encoder.pos_vectors, self.target_encoder.seq ], outputs=[self.source_tar_sim, self.target_src_sim], allow_input_downcast=True)
def __init__(self, vectors, dim, normalize, dropout, activation, initializer=default_initializer, verbose=True): """ :param vectors: a theano tensor variable :param dim: :param uniform_range: :param normalize: :param dropout: :param verbose: :return: """ self.vectors = vectors self.dim = dim self.normalize = normalize self.dropout = dropout self.verbose = verbose self.act = Activation(activation) # Composition Function Weight self.W = shared_rand_matrix((self.dim, 2 * self.dim), 'W', initializer=initializer) self.b = shared_rand_matrix((self.dim, ), 'b', 0) # Reconstruction Function Weight self.Wr = shared_rand_matrix((2 * self.dim, self.dim), 'Wr', initializer=initializer) self.br = shared_rand_matrix((self.dim * 2, ), 'br', 0) self.params = [self.W, self.b, self.Wr, self.br] self.seq = T.lmatrix() self.left_vector = T.vector() self.right_vector = T.vector() self.zero = theano.shared(np.array([0.0], dtype=theano.config.floatX)) self.scan_result, _ = theano.scan(self.encode, sequences=[self.seq], outputs_info=[self.vectors, None], name="pos_rae_build") self.loss_rec = T.sum(self.scan_result[1]) self.loss_l1 = sum([T.sum(T.abs_(param)) for param in self.params]) self.loss_l2 = sum([T.sum(param**2) for param in self.params]) # all history vector in scan self.history_output = self.scan_result[0] self.all_output = self.history_output[-1] # final output # self.output = self.all_output[-1] # Just for two compose self.two_compose_result, self.two_compose_rec = self.compose( self.left_vector, self.right_vector) self.compose_two = theano.function( inputs=[self.left_vector, self.right_vector], outputs=[self.two_compose_result, self.two_compose_rec]) # Compose Vectors: N vectors -> N-1 Vectors compose_vector = T.fmatrix() compose_len = T.iscalar() path = T.imatrix() hs, _ = theano.scan(fn=self.compose_step, sequences=T.arange(compose_len - 1), non_sequences=compose_vector, name="compose_phrase") comp_vec = hs[0] comp_rec = hs[1] min_index = T.argmin(comp_rec) """compose_result, _ = theano.scan(fn=self.greed_step, sequences=[T.arange(compose_len - 1), T.arange(compose_len - 1)], non_sequences=[compose_vector, path])""" self._compose_vectors = theano.function( [compose_vector, compose_len], [comp_vec[min_index], min_index]) """self._compose_result = theano.function([compose_vector, compose_len, path], [path])""" if verbose: logger.debug( 'Architecture of RAE built finished, summarized as below: ') logger.debug('Hidden dimension: %d' % self.dim) logger.debug('Normalize: %s' % self.normalize) logger.debug('Dropout Rate: %s' % self.dropout)