class SVDLinear(link.Link):
    """
        U x V
    """
    def __init__(self, in_size, out_size=None, nobias=False,
                 initialV=None, initialU=None, initial_bias=None,
                 k=16):
        super(SVDLinear, self).__init__()

        if out_size is None:
            in_size, out_size = None, in_size
        self.out_size = out_size
        self.k = k
        with self.init_scope():
            U_initializer = initializers._get_initializer(initialU)
            V_initializer = initializers._get_initializer(initialV)

            # Is it dirty code?
            self.U = Parameter(V_initializer)
            self.U.to_gpu()
            self.V = Parameter(U_initializer)
            self.V.to_gpu()

            self.register_persistent('U')

            if in_size is not None:
                self._initialize_params(in_size)

            if nobias:
                self.b = None
            else:
                if initial_bias is None:
                    initial_bias = 0
                bias_initializer = initializers._get_initializer(initial_bias)
                self.b = Parameter(bias_initializer, out_size)

    def _initialize_params(self, in_size):
        self.U.initialize((self.k, in_size))
        self.V.initialize((self.out_size, self.k))

    def __call__(self, x):
        """Applies the linear layer. However, I checked this code for simple data, It does not work...
        Args:
            x (~chainer.Variable): Batch of input vectors.
        Returns:
            ~chainer.Variable: Output of the linear layer.
        """
        if self.U.data is None or self.V.data is not None:
            in_size = x.shape[1]
            self._initialize_params(in_size)

        # x: (batch_size, CxHxW)
        # V: (CxHxW, k)
        # W: (k, CxHxW)
        # (V*(U*x))+b = Wx + b
        W1 = linear.linear(x, self.U)
        return linear.linear(W1, self.V, self.b)
Beispiel #2
0
class RVec(link.Link):
    def __init__(self, counts, vecDims):
        super(RVec, self).__init__()
        with self.init_scope():
            initializer = Normal(0.1)
            self.vecDims = vecDims
            self.edge2vec = Parameter(initializer)
            self.edge2vec.initialize((counts, vecDims))

    def forward(self, indexs):
        vecs = F.embed_id(indexs, self.edge2vec).reshape(-1, self.vecDims)
        return vecs
Beispiel #3
0
class npUnconcat(chainer.Chain):
    def __init__(self, hidden_units):
        super(npUnconcat, self).__init__()
        with self.init_scope():
            initializer = Normal()

            self.encoderL = L.Linear(None, hidden_units[0])
            self.encoderR = L.Linear(None, hidden_units[0])
            self.z = Parameter(initializer)
            self.z.initialize(hidden_units[1])
            self.decoderL = L.Linear(None, hidden_units[2])
            self.decoderR = L.Linear(None, hidden_units[2])
class NodeAverageLink(link.Link):
    def __init__(self,
                 v_in_size,
                 out_size=None,
                 nobias=False,
                 initialW=None,
                 initial_bias=None,
                 residual=False):
        super(NodeAverageLink, self).__init__()

        if out_size is None:
            v_in_size, out_size = None, v_in_size
        self.out_size = out_size
        self.residual = residual
        with self.init_scope():
            W_initializer = initializers._get_initializer(initialW)
            self.Wc = Parameter(W_initializer)
            self.Wn = Parameter(W_initializer)
            if v_in_size is not None:
                self._initialize_params_v(v_in_size)
            if nobias:
                self.b = None
            else:
                if initial_bias is None:
                    initial_bias = 0
                bias_initializer = initializers._get_initializer(initial_bias)
                self.b = Parameter(bias_initializer, out_size)

    def _initialize_params_v(self, v_in_size):
        self.Wc.initialize((v_in_size, self.out_size))
        self.Wn.initialize((v_in_size, self.out_size))

    def __call__(self, vertex, edge, adj, num_array):
        if self.Wc.array is None:
            v_in_size = vertex.shape[1]
            self._initialize_params_v(v_in_size)

        neighbor = F.matmul(vertex, self.Wn)
        neighbor = F.sparse_matmul(adj, neighbor) / num_array
        center = F.matmul(vertex, self.Wc)
        output = center + neighbor
        if self.residual:
            output = vertex + output
        if self.b is not None:
            output += self.b
        return output, edge, adj, num_array
Beispiel #5
0
class NVec(link.Link):
    def __init__(self, counts, vecDims):
        super(NVec, self).__init__()
        with self.init_scope():
            initializer = Uniform(6 / np.sqrt(vecDims))
            self.counts = counts
            self.vecDims = vecDims
            self.node2vec = Parameter(initializer)
            self._initialize_params()

    def _initialize_params(self):
        self.node2vec.initialize((self.counts, self.vecDims))

    def forward(self, indexs):
        self.nodeVecs = F.normalize(self.node2vec)
        vecs = F.embed_id(indexs, self.node2vec).reshape(-1, self.vecDims)
        return vecs
class NodeEdgeAverageLink(link.Link):
    def __init__(self,
                 v_in_size,
                 e_in_size,
                 out_size=None,
                 nobias=False,
                 initialW=None,
                 initial_bias=None):
        super(NodeEdgeAverageLink, self).__init__()

        if out_size is None:
            v_in_size, out_size = None, v_in_size
        self.out_size = out_size
        with self.init_scope():
            W_initializer = initializers._get_initializer(initialW)
            self.Wc = Parameter(W_initializer)
            self.Wn = Parameter(W_initializer)
            self.We = Parameter(W_initializer)
            if v_in_size is not None:
                self._initialize_params_v(v_in_size)
            if e_in_size is not None:
                self._initialize_params_e(e_in_size)
            if nobias:
                self.b = None
            else:
                if initial_bias is None:
                    initial_bias = 0
                bias_initializer = initializers._get_initializer(initial_bias)
                self.b = Parameter(bias_initializer, out_size)

    def _initialize_params_v(self, v_in_size):
        self.Wc.initialize((v_in_size, self.out_size))
        self.Wn.initialize((v_in_size, self.out_size))

    def _initialize_params_e(self, e_in_size):
        self.We.initialize((e_in_size, self.out_size))

    def __call__(self, vertex, edge, adj, num_array):
        if self.Wc.array is None:
            v_in_size = vertex.shape[1]
            self._initialize_params_v(v_in_size)
        if self.We.array is None:
            e_in_size = edge.shape[1]
            self._initialize_params_e(e_in_size)
        neighbor = F.matmul(vertex, self.Wn)
        neighbor = F.sparse_matmul(adj, neighbor) / num_array
        center = F.matmul(vertex, self.Wc)
        edge_feature = F.sparse_matmul(edge, self.We)
        length = int(np.sqrt(edge_feature.shape[0]))
        edge_feature = F.reshape(edge_feature,
                                 [length, length, edge_feature.shape[1]])
        edge_feature = F.sum(edge_feature, axis=0) / num_array
        output = center + neighbor + edge_feature
        if self.b is not None:
            output += self.b
        return output, edge, adj, num_array
Beispiel #7
0
class Decoder(chainer.Chain):
    def __init__(self,
                 vocabulary_size: int,
                 word_embeddings_size: int,
                 hidden_layer_size: int,
                 attention_hidden_layer_size: int,
                 encoder_output_size: int,
                 maxout_layer_size: int,
                 maxout_pool_size: int = 2,
                 ignore_label: int = -1,
                 dynamic_attention: bool = False):
        super(Decoder, self).__init__()
        with self.init_scope():
            self.embed_id = L.EmbedID(vocabulary_size,
                                      word_embeddings_size,
                                      ignore_label=ignore_label)
            self.rnn = L.StatelessLSTM(
                word_embeddings_size + encoder_output_size,
                hidden_layer_size
            )
            self.maxout = L.Maxout(word_embeddings_size +
                                   encoder_output_size +
                                   hidden_layer_size,
                                   maxout_layer_size,
                                   maxout_pool_size)
            self.linear = L.Linear(maxout_layer_size, vocabulary_size)
            if dynamic_attention:
                self.attention = DynamicAttentionModule(
                    encoder_output_size,
                    attention_hidden_layer_size,
                    hidden_layer_size,
                    word_embeddings_size
                )
            else:
                self.attention = AttentionModule(
                    encoder_output_size,
                    attention_hidden_layer_size,
                    hidden_layer_size,
                    word_embeddings_size
                )
            self.bos_state = Parameter(
                initializer=self.xp.random.randn(
                    1,
                    hidden_layer_size
                ).astype('f')
            )
        self.vocabulary_size = vocabulary_size
        self.word_embeddings_size = word_embeddings_size
        self.hidden_layer_size = hidden_layer_size
        self.encoder_output_size = encoder_output_size

    def __call__(
            self,
            encoded: Variable,
            target: ndarray
    ) -> Variable:
        minibatch_size, max_sentence_size, encoder_output_size = encoded.shape
        assert encoder_output_size == self.encoder_output_size
        assert target.shape[0] == minibatch_size

        self.setup(encoded)
        cell, state, previous_words = self.get_initial_states(minibatch_size)

        total_loss = Variable(self.xp.array(0, 'f'))
        total_predictions = 0

        for target_id in self.xp.hsplit(target, target.shape[1]):
            target_id = target_id.reshape((minibatch_size,))
            cell, state, context, concatenated = \
                self.advance_one_step(cell, state, previous_words)
            logit, state = self.compute_logit(concatenated, state, context)

            current_sentence_count = self.xp.sum(target_id != PAD)

            loss = F.softmax_cross_entropy(logit, target_id, ignore_label=PAD)
            total_loss += loss * current_sentence_count
            total_predictions += current_sentence_count

            previous_words = target_id

        return total_loss / total_predictions

    def setup(self, encoded: Variable):
        if self.bos_state.array is None:
            self.bos_state.initialize((1, self.hidden_layer_size))
        self.attention.precompute(encoded)

    def get_initial_states(
            self,
            minibatch_size: int
    ) -> Tuple[Variable, Variable, ndarray]:
        cell = Variable(
            self.xp.zeros((minibatch_size, self.hidden_layer_size), 'f')
        )
        state = F.broadcast_to(
            self.bos_state, (minibatch_size, self.hidden_layer_size)
        )
        previous_words = self.xp.full((minibatch_size,), EOS, 'i')
        return cell, state, previous_words

    def advance_one_step(
            self,
            cell: Variable,
            state: Variable,
            previous_words: ndarray
    ) -> Tuple[Variable, Variable, Variable, Variable]:
        minibatch_size = cell.shape[0]
        previous_embedding = self.embed_id(previous_words)
        context = self.attention(state, previous_embedding)
        assert context.shape == (minibatch_size, self.encoder_output_size)
        concatenated = F.concat((previous_embedding, context))
        cell, state = self.rnn(cell, state, concatenated)
        return cell, state, context, concatenated

    def compute_logit(
            self,
            concatenated: Variable,
            state: Variable,
            context: Variable
    ) -> Tuple[Variable, Variable]:
        all_concatenated = F.concat((concatenated, state))
        logit = self.linear(self.maxout(all_concatenated))
        return logit, state

    def translate(
            self,
            encoded: Variable,
            max_length: int = 100
    ) -> List[ndarray]:
        sentence_count = encoded.shape[0]

        self.setup(encoded)
        cell, state, previous_words = self.get_initial_states(sentence_count)

        result = []
        for _ in range(max_length):
            cell, state, context, concatenated = \
                self.advance_one_step(cell, state, previous_words)
            logit, state = self.compute_logit(concatenated, state, context)

            output_id = F.reshape(F.argmax(logit, axis=1), (sentence_count,))
            result.append(output_id)

            previous_words = output_id

        # Remove words after <EOS>
        outputs = F.separate(F.transpose(F.vstack(result)), axis=0)
        assert len(outputs) == sentence_count
        output_sentences = []
        for output in outputs:
            assert output.shape == (max_length,)
            indexes = np.argwhere(output.data == EOS)
            if len(indexes) > 0:
                output = output[:indexes[0, 0] + 1]
            output_sentences.append(output.data)

        return output_sentences