Esempio n. 1
0
class SkipGram:
    def __init__(self, vocab_size, hidden_size, window_size, corpus):
        # おもみ
        w_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f')
        w_out = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f')

        # layers
        self.embed_layer = Embedding(w_in)
        self.ns_loss_layers = [
            NegativeSamplingLoss(w_out, corpus) for _ in range(2 * window_size)
        ]

        # おもみ, 勾配まとめ
        layers = [self.embed_layer] + self.ns_loss_layers
        self.params, self.grads = [], []
        for l in layers:
            self.params += l.params
            self.grads += l.grads

        # 単語の分散表現
        self.word_vecs = w_in

    def forward(self, contexts, target):
        h = self.embed_layer.forward(target)
        loss = sum([
            l.forward(h, contexts[:, i])
            for i, l in enumerate(self.ns_loss_layers)
        ])
        return loss

    def backward(self, dl=1):
        dh = sum([l.backward(dl) for i, l in enumerate(self.ns_loss_layers)])
        self.embed_layer.backward(dh)
Esempio n. 2
0
class Decoder(Sequential):
    def __init__(self, vocab_size, embedding_size, hidden_size, output_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.lstm = LSTM(embedding_size, hidden_size)
        self.lstm_output = TimeDistributed(hidden_size,
                                           output_size,
                                           activation='tanh')
        self.softmax = TimeDistributed(output_size,
                                       vocab_size,
                                       activation='softmax')
        self.embedding = Embedding(vocab_size, embedding_size)

        self.layers = [
            self.lstm, self.lstm_output, self.softmax, self.embedding
        ]
        self.params = list(
            itertools.chain(*[
                layer.params for layer in self.layers
                if hasattr(layer, 'params')
            ]))

    def forward(self, ec_H, ec_C, mask):
        (sens_size, batch_size) = T.shape(mask)

        def step(m, prev_Y, prev_H, prev_C):
            """Forward a time step of the decoder."""
            # LSTM forward time step
            (H, C) = self.lstm.step(prev_Y, m, prev_H, prev_C)
            # LSTM output
            O = self.lstm_output.forward(H)
            # Apply softmax to LSTM output
            P = self.softmax.forward(O)
            # Make prediction
            one_hot_Y = T.argmax(P, axis=1)
            # Feed the output to the next time step
            Y = self.embedding.forward(one_hot_Y)
            # FIXME: Deal with differ length ?
            return (P, Y, H, C)

        results, updates = theano.scan(fn=step,
                                       sequences=[mask],
                                       outputs_info=[
                                           None,
                                           dict(initial=T.zeros(
                                               (batch_size,
                                                self.embedding_size)),
                                                taps=[-1]),
                                           dict(initial=ec_H, taps=[-1]),
                                           dict(initial=ec_C, taps=[-1])
                                       ])

        # return np.swapaxes(results[0], 0, 1)       # returns the softmax probabilities
        return results[0]
Esempio n. 3
0
    def forward(self, xs: np.ndarray) -> np.ndarray:
        N, T = xs.shape
        V, D = self.W.shape

        out = np.empty((N, T, D), dtype=float)
        self.layers = []

        for t in range(T):
            layer = Embedding(self.W)
            out[:, t, :] = layer.forward(xs[:, t])
            self.layers.append(layer)

        return out
Esempio n. 4
0
    def forward(self, idxs):
        w, = self.params
        N, T = idxs.shape
        V, D = w.shape  # 語彙数, 分散表現の次元数

        self.layers = []
        ys = np.empty((N, T, D), dtype='f')

        for t in range(T):
            layer = Embedding(w)
            ys[:, t, :] = layer.forward(idxs[:, t])
            self.layers.append(layer)

        return ys
Esempio n. 5
0
class Decoder(Sequential):
    def __init__(self, vocab_size, embedding_size, hidden_size, output_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.lstm = LSTM(embedding_size, hidden_size)
        self.lstm_output = TimeDistributed(hidden_size, output_size, activation='tanh')
        self.softmax = TimeDistributed(output_size, vocab_size, activation='softmax')
        self.embedding = Embedding(vocab_size, embedding_size)

        self.layers = [self.lstm, self.lstm_output, self.softmax, self.embedding]
        self.params = list(itertools.chain(*[layer.params for layer in self.layers if hasattr(layer, 'params')]))

    def forward(self, ec_H, ec_C, mask):
        (sens_size, batch_size) = T.shape(mask)

        def step(m, prev_Y, prev_H, prev_C):
            """Forward a time step of the decoder."""
            # LSTM forward time step
            (H, C) = self.lstm.step(prev_Y, m, prev_H, prev_C)
            # LSTM output
            O = self.lstm_output.forward(H)
            # Apply softmax to LSTM output
            P = self.softmax.forward(O)
            # Make prediction
            one_hot_Y = T.argmax(P, axis=1)
            # Feed the output to the next time step
            Y = self.embedding.forward(one_hot_Y)
            # FIXME: Deal with differ length ?
            return (P, Y, H, C)

        results, updates = theano.scan(
            fn=step,
            sequences=[mask],
            outputs_info=[
                None,
                dict(initial=T.zeros((batch_size, self.embedding_size)), taps=[-1]),
                dict(initial=ec_H, taps=[-1]),
                dict(initial=ec_C, taps=[-1])
            ]
        )

        # return np.swapaxes(results[0], 0, 1)       # returns the softmax probabilities
        return results[0]
Esempio n. 6
0
class Encoder(Sequential):
    def __init__(self, vocab_size, embedding_size, hidden_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size

        self.embedding = Embedding(vocab_size, embedding_size)
        self.lstm = LSTM(embedding_size, hidden_size)
        self.layers = [self.embedding, self.lstm]
        self.params = list(itertools.chain(*[layer.params for layer in self.layers if hasattr(layer, 'params')]))

    def forward(self, batch, mask):
        # ``batch`` is a matrix whose row ``x`` is a sentence, e.g. x = [1, 4, 5, 2, 0]
        # ``emb`` is a list of embedding matrix, e[i].shape = (sene_size, embedding_size)
        emb = self.embedding.forward(batch)
        (H, C) = self.lstm.forward(emb, mask)
        return (H[-1], C[-1])
class EmbeddingDot:
    def __init__(self, w):
        self.embed = Embedding(w)
        self.params = self.embed.params
        self.grads = self.embed.grads
        self.cache = None

    def forward(self, h, idx):
        w_idx = self.embed.forward(idx)
        s = np.sum(h * w_idx, axis=1)
        self.cache = (h, w_idx)
        return s

    def backward(self, ds):
        ds = ds.reshape(ds.shape[0], 1)  # ???
        h, w_idx = self.cache
        dw_idx = ds * h
        self.embed.backward(dw_idx)
        dh = ds * w_idx
        return dh
Esempio n. 8
0
class Encoder(Sequential):
    def __init__(self, vocab_size, embedding_size, hidden_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size

        self.embedding = Embedding(vocab_size, embedding_size)
        self.lstm = LSTM(embedding_size, hidden_size)
        self.layers = [self.embedding, self.lstm]
        self.params = list(
            itertools.chain(*[
                layer.params for layer in self.layers
                if hasattr(layer, 'params')
            ]))

    def forward(self, batch, mask):
        # ``batch`` is a matrix whose row ``x`` is a sentence, e.g. x = [1, 4, 5, 2, 0]
        # ``emb`` is a list of embedding matrix, e[i].shape = (sene_size, embedding_size)
        emb = self.embedding.forward(batch)
        (H, C) = self.lstm.forward(emb, mask)
        return (H[-1], C[-1])
class EmbeddingDot:
    def __init__(self, W: np.ndarray) -> None:
        self.embed = Embedding(W)
        self.params = self.embed.params
        self.grads = self.embed.grads
        self.cache = None

    def forward(self, h: np.ndarray, idx: List[int]):
        target_W = self.embed.forward(idx)
        out = np.sum(target_W * h, axis=1)

        self.cache = (h, target_W)
        return out

    def backward(self, dout: np.ndarray) -> np.ndarray:
        h, target_W = self.cache

        dout = dout.reshape(dout.shape[0], 1)
        dtarget_W = dout * h
        self.embed.backward(dtarget_W)
        dh = dout * target_W
        return dh
Esempio n. 10
0
class LSTM_DQN(torch.nn.Module):
    model_name = 'lstm_dqn'

    def __init__(self,
                 model_config,
                 word_vocab,
                 generate_length=5,
                 enable_cuda=False):
        super(LSTM_DQN, self).__init__()
        self.model_config = model_config
        self.enable_cuda = enable_cuda
        self.word_vocab_size = len(word_vocab)
        self.id2word = word_vocab
        self.generate_length = generate_length
        self.read_config()
        self._def_layers()
        self.init_weights()
        # self.print_parameters()

    def print_parameters(self):
        amount = 0
        for p in self.parameters():
            amount += np.prod(p.size())
        print("total number of parameters: %s" % (amount))
        parameters = filter(lambda p: p.requires_grad, self.parameters())
        amount = 0
        for p in parameters:
            amount += np.prod(p.size())
        print("number of trainable parameters: %s" % (amount))

    def read_config(self):
        # model config
        self.embedding_size = self.model_config['embedding_size']
        self.encoder_rnn_hidden_size = self.model_config[
            'encoder_rnn_hidden_size']
        self.action_scorer_hidden_dim = self.model_config[
            'action_scorer_hidden_dim']
        self.dropout_between_rnn_layers = self.model_config[
            'dropout_between_rnn_layers']

    def _def_layers(self):

        # word embeddings
        self.word_embedding = Embedding(embedding_size=self.embedding_size,
                                        vocab_size=self.word_vocab_size,
                                        enable_cuda=self.enable_cuda)

        # lstm encoder
        self.encoder = FastUniLSTM(
            ninp=self.embedding_size,
            nhids=self.encoder_rnn_hidden_size,
            dropout_between_rnn_layers=self.dropout_between_rnn_layers)

        self.action_scorer_shared = torch.nn.Linear(
            self.encoder_rnn_hidden_size[-1], self.action_scorer_hidden_dim)
        action_scorers = []
        for _ in range(self.generate_length):
            action_scorers.append(
                torch.nn.Linear(self.action_scorer_hidden_dim,
                                self.word_vocab_size,
                                bias=False))
        self.action_scorers = torch.nn.ModuleList(action_scorers)
        self.fake_recurrent_mask = None

    def init_weights(self):
        torch.nn.init.xavier_uniform_(self.action_scorer_shared.weight.data)
        for i in range(len(self.action_scorers)):
            torch.nn.init.xavier_uniform_(self.action_scorers[i].weight.data)
        self.action_scorer_shared.bias.data.fill_(0)

    def representation_generator(self, _input_words):
        embeddings, mask = self.word_embedding.forward(
            _input_words)  # batch x time x emb
        encoding_sequence, _, _ = self.encoder.forward(
            embeddings, mask)  # batch x time x h
        mean_encoding = masked_mean(encoding_sequence, mask)  # batch x h
        return mean_encoding

    def action_scorer(self, state_representation):
        hidden = self.action_scorer_shared.forward(
            state_representation)  # batch x hid
        hidden = F.relu(hidden)  # batch x hid
        action_ranks = []
        for i in range(len(self.action_scorers)):
            action_ranks.append(
                self.action_scorers[i].forward(hidden))  # batch x n_vocab
        return action_ranks