class SVDLinear(link.Link): """ U x V """ def __init__(self, in_size, out_size=None, nobias=False, initialV=None, initialU=None, initial_bias=None, k=16): super(SVDLinear, self).__init__() if out_size is None: in_size, out_size = None, in_size self.out_size = out_size self.k = k with self.init_scope(): U_initializer = initializers._get_initializer(initialU) V_initializer = initializers._get_initializer(initialV) # Is it dirty code? self.U = Parameter(V_initializer) self.U.to_gpu() self.V = Parameter(U_initializer) self.V.to_gpu() self.register_persistent('U') if in_size is not None: self._initialize_params(in_size) if nobias: self.b = None else: if initial_bias is None: initial_bias = 0 bias_initializer = initializers._get_initializer(initial_bias) self.b = Parameter(bias_initializer, out_size) def _initialize_params(self, in_size): self.U.initialize((self.k, in_size)) self.V.initialize((self.out_size, self.k)) def __call__(self, x): """Applies the linear layer. However, I checked this code for simple data, It does not work... Args: x (~chainer.Variable): Batch of input vectors. Returns: ~chainer.Variable: Output of the linear layer. """ if self.U.data is None or self.V.data is not None: in_size = x.shape[1] self._initialize_params(in_size) # x: (batch_size, CxHxW) # V: (CxHxW, k) # W: (k, CxHxW) # (V*(U*x))+b = Wx + b W1 = linear.linear(x, self.U) return linear.linear(W1, self.V, self.b)
class RVec(link.Link): def __init__(self, counts, vecDims): super(RVec, self).__init__() with self.init_scope(): initializer = Normal(0.1) self.vecDims = vecDims self.edge2vec = Parameter(initializer) self.edge2vec.initialize((counts, vecDims)) def forward(self, indexs): vecs = F.embed_id(indexs, self.edge2vec).reshape(-1, self.vecDims) return vecs
class npUnconcat(chainer.Chain): def __init__(self, hidden_units): super(npUnconcat, self).__init__() with self.init_scope(): initializer = Normal() self.encoderL = L.Linear(None, hidden_units[0]) self.encoderR = L.Linear(None, hidden_units[0]) self.z = Parameter(initializer) self.z.initialize(hidden_units[1]) self.decoderL = L.Linear(None, hidden_units[2]) self.decoderR = L.Linear(None, hidden_units[2])
class NodeAverageLink(link.Link): def __init__(self, v_in_size, out_size=None, nobias=False, initialW=None, initial_bias=None, residual=False): super(NodeAverageLink, self).__init__() if out_size is None: v_in_size, out_size = None, v_in_size self.out_size = out_size self.residual = residual with self.init_scope(): W_initializer = initializers._get_initializer(initialW) self.Wc = Parameter(W_initializer) self.Wn = Parameter(W_initializer) if v_in_size is not None: self._initialize_params_v(v_in_size) if nobias: self.b = None else: if initial_bias is None: initial_bias = 0 bias_initializer = initializers._get_initializer(initial_bias) self.b = Parameter(bias_initializer, out_size) def _initialize_params_v(self, v_in_size): self.Wc.initialize((v_in_size, self.out_size)) self.Wn.initialize((v_in_size, self.out_size)) def __call__(self, vertex, edge, adj, num_array): if self.Wc.array is None: v_in_size = vertex.shape[1] self._initialize_params_v(v_in_size) neighbor = F.matmul(vertex, self.Wn) neighbor = F.sparse_matmul(adj, neighbor) / num_array center = F.matmul(vertex, self.Wc) output = center + neighbor if self.residual: output = vertex + output if self.b is not None: output += self.b return output, edge, adj, num_array
class NVec(link.Link): def __init__(self, counts, vecDims): super(NVec, self).__init__() with self.init_scope(): initializer = Uniform(6 / np.sqrt(vecDims)) self.counts = counts self.vecDims = vecDims self.node2vec = Parameter(initializer) self._initialize_params() def _initialize_params(self): self.node2vec.initialize((self.counts, self.vecDims)) def forward(self, indexs): self.nodeVecs = F.normalize(self.node2vec) vecs = F.embed_id(indexs, self.node2vec).reshape(-1, self.vecDims) return vecs
class NodeEdgeAverageLink(link.Link): def __init__(self, v_in_size, e_in_size, out_size=None, nobias=False, initialW=None, initial_bias=None): super(NodeEdgeAverageLink, self).__init__() if out_size is None: v_in_size, out_size = None, v_in_size self.out_size = out_size with self.init_scope(): W_initializer = initializers._get_initializer(initialW) self.Wc = Parameter(W_initializer) self.Wn = Parameter(W_initializer) self.We = Parameter(W_initializer) if v_in_size is not None: self._initialize_params_v(v_in_size) if e_in_size is not None: self._initialize_params_e(e_in_size) if nobias: self.b = None else: if initial_bias is None: initial_bias = 0 bias_initializer = initializers._get_initializer(initial_bias) self.b = Parameter(bias_initializer, out_size) def _initialize_params_v(self, v_in_size): self.Wc.initialize((v_in_size, self.out_size)) self.Wn.initialize((v_in_size, self.out_size)) def _initialize_params_e(self, e_in_size): self.We.initialize((e_in_size, self.out_size)) def __call__(self, vertex, edge, adj, num_array): if self.Wc.array is None: v_in_size = vertex.shape[1] self._initialize_params_v(v_in_size) if self.We.array is None: e_in_size = edge.shape[1] self._initialize_params_e(e_in_size) neighbor = F.matmul(vertex, self.Wn) neighbor = F.sparse_matmul(adj, neighbor) / num_array center = F.matmul(vertex, self.Wc) edge_feature = F.sparse_matmul(edge, self.We) length = int(np.sqrt(edge_feature.shape[0])) edge_feature = F.reshape(edge_feature, [length, length, edge_feature.shape[1]]) edge_feature = F.sum(edge_feature, axis=0) / num_array output = center + neighbor + edge_feature if self.b is not None: output += self.b return output, edge, adj, num_array
class Decoder(chainer.Chain): def __init__(self, vocabulary_size: int, word_embeddings_size: int, hidden_layer_size: int, attention_hidden_layer_size: int, encoder_output_size: int, maxout_layer_size: int, maxout_pool_size: int = 2, ignore_label: int = -1, dynamic_attention: bool = False): super(Decoder, self).__init__() with self.init_scope(): self.embed_id = L.EmbedID(vocabulary_size, word_embeddings_size, ignore_label=ignore_label) self.rnn = L.StatelessLSTM( word_embeddings_size + encoder_output_size, hidden_layer_size ) self.maxout = L.Maxout(word_embeddings_size + encoder_output_size + hidden_layer_size, maxout_layer_size, maxout_pool_size) self.linear = L.Linear(maxout_layer_size, vocabulary_size) if dynamic_attention: self.attention = DynamicAttentionModule( encoder_output_size, attention_hidden_layer_size, hidden_layer_size, word_embeddings_size ) else: self.attention = AttentionModule( encoder_output_size, attention_hidden_layer_size, hidden_layer_size, word_embeddings_size ) self.bos_state = Parameter( initializer=self.xp.random.randn( 1, hidden_layer_size ).astype('f') ) self.vocabulary_size = vocabulary_size self.word_embeddings_size = word_embeddings_size self.hidden_layer_size = hidden_layer_size self.encoder_output_size = encoder_output_size def __call__( self, encoded: Variable, target: ndarray ) -> Variable: minibatch_size, max_sentence_size, encoder_output_size = encoded.shape assert encoder_output_size == self.encoder_output_size assert target.shape[0] == minibatch_size self.setup(encoded) cell, state, previous_words = self.get_initial_states(minibatch_size) total_loss = Variable(self.xp.array(0, 'f')) total_predictions = 0 for target_id in self.xp.hsplit(target, target.shape[1]): target_id = target_id.reshape((minibatch_size,)) cell, state, context, concatenated = \ self.advance_one_step(cell, state, previous_words) logit, state = self.compute_logit(concatenated, state, context) current_sentence_count = self.xp.sum(target_id != PAD) loss = F.softmax_cross_entropy(logit, target_id, ignore_label=PAD) total_loss += loss * current_sentence_count total_predictions += current_sentence_count previous_words = target_id return total_loss / total_predictions def setup(self, encoded: Variable): if self.bos_state.array is None: self.bos_state.initialize((1, self.hidden_layer_size)) self.attention.precompute(encoded) def get_initial_states( self, minibatch_size: int ) -> Tuple[Variable, Variable, ndarray]: cell = Variable( self.xp.zeros((minibatch_size, self.hidden_layer_size), 'f') ) state = F.broadcast_to( self.bos_state, (minibatch_size, self.hidden_layer_size) ) previous_words = self.xp.full((minibatch_size,), EOS, 'i') return cell, state, previous_words def advance_one_step( self, cell: Variable, state: Variable, previous_words: ndarray ) -> Tuple[Variable, Variable, Variable, Variable]: minibatch_size = cell.shape[0] previous_embedding = self.embed_id(previous_words) context = self.attention(state, previous_embedding) assert context.shape == (minibatch_size, self.encoder_output_size) concatenated = F.concat((previous_embedding, context)) cell, state = self.rnn(cell, state, concatenated) return cell, state, context, concatenated def compute_logit( self, concatenated: Variable, state: Variable, context: Variable ) -> Tuple[Variable, Variable]: all_concatenated = F.concat((concatenated, state)) logit = self.linear(self.maxout(all_concatenated)) return logit, state def translate( self, encoded: Variable, max_length: int = 100 ) -> List[ndarray]: sentence_count = encoded.shape[0] self.setup(encoded) cell, state, previous_words = self.get_initial_states(sentence_count) result = [] for _ in range(max_length): cell, state, context, concatenated = \ self.advance_one_step(cell, state, previous_words) logit, state = self.compute_logit(concatenated, state, context) output_id = F.reshape(F.argmax(logit, axis=1), (sentence_count,)) result.append(output_id) previous_words = output_id # Remove words after <EOS> outputs = F.separate(F.transpose(F.vstack(result)), axis=0) assert len(outputs) == sentence_count output_sentences = [] for output in outputs: assert output.shape == (max_length,) indexes = np.argwhere(output.data == EOS) if len(indexes) > 0: output = output[:indexes[0, 0] + 1] output_sentences.append(output.data) return output_sentences