Esempio n. 1
0
    def __init__(self,
                 rnn_type,
                 ntoken,
                 ninp,
                 nhid,
                 nlayers,
                 cutoffs,
                 dropout=0.5,
                 tie_weights=False):
        super(RNNModel, self).__init__()
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        if rnn_type is 'GRU':
            self.rnn = getattr(nn, rnn_type)(ninp,
                                             nhid,
                                             nlayers,
                                             dropout=dropout)
        else:
            try:
                nonlinearity = {
                    'RNN_TANH': 'tanh',
                    'RNN_RELU': 'relu'
                }[rnn_type]
            except KeyError:
                raise ValueError(
                    """An invalid option for `--model` was supplied,
                                 options are ['GRU', 'RNN_TANH' or 'RNN_RELU']"""
                )
            self.rnn = nn.RNN(ninp,
                              nhid,
                              nlayers,
                              nonlinearity=nonlinearity,
                              dropout=dropout)
        self.decoder = nn.Linear(nhid, ntoken)

        if tie_weights:
            if nhid != ninp:
                raise ValueError(
                    'When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight

        self.init_weights()

        self.rnn_type = rnn_type
        self.nhid = nhid
        self.nlayers = nlayers
        self.softmax = AdaptiveSoftmax(nhid, cutoffs)

        self.full = False
Esempio n. 2
0
def train():
    cutoffs = [10, 20, 300 + 1]
    embeds = nn.Embedding(300, 100)
    adaptive_logits = AdaptiveLogits(embeds, cutoffs)
    adaptive_softmax = AdaptiveSoftmax(adaptive_logits)
    model = Toy(embeds)
    x = torch.randn((3, 100))
    targets = torch.tensor([0, 1, 10])
    optimizer = optim.Adam(
        itertools.chain(
            model.parameters(),
            # adaptive_logits.parameters()))
            embeds.parameters()))
    for i in range(1000):
        optimizer.zero_grad()
        hidden = model(x)
        logits = torch.mm(
            hidden, torch.transpose(embeds(torch.tensor(range(300))), 0, 1))
        loss = F.cross_entropy(logits, targets)
        # logits = adaptive_logits(hidden, targets)
        # loss = adaptive_logits.loss(logits, targets)
        loss.backward()
        optimizer.step()
    # print(torch.argmax(adaptive_softmax(hidden), 1))
    print(torch.argmax(F.softmax(logits, 1), 1))
Esempio n. 3
0
    def __init__(self,
                 d_model,
                 num_heads,
                 max_position,
                 d_ffn,
                 num_layers,
                 mem_len,
                 vocab_size,
                 dropout_rate=0.1,
                 cutoffs=None,
                 proj_factor=4,
                 proj_dims=None,
                 straight_through=False,
                 **kwargs):
        super().__init__(**kwargs)
        assert mem_len >= 0 and max_position > 0

        self.d_model = d_model
        self.mem_len = mem_len
        self.cutoffs = cutoffs
        self.num_layers = num_layers
        self.max_position = max_position

        self.embed = tf.keras.layers.Embedding(vocab_size, d_model)

        if cutoffs:
            self.final_layer = AdaptiveSoftmax(cutoffs, proj_factor, proj_dims)
        else:
            self.final_layer = tf.keras.layers.Dense(vocab_size)

        self.stoch_blks = [
            StochasticBlock(d_model, num_heads, max_position, d_ffn,
                            dropout_rate, straight_through)
            for _ in range(num_layers)
        ]

        self.dropout = tf.keras.layers.Dropout(dropout_rate,
                                               name='inp_dropout')
Esempio n. 4
0
def test_softmax():
  batch_size = 300
  hidden_size = 200
  vocab_size = 100
  misclassification_error = 0
  best_misclassification_error = 0
  for i in range(10):
    embed_weights = nn.Parameter(torch.Tensor(vocab_size, hidden_size))
    embed_weights.data.normal_(0, 1.0/math.sqrt(hidden_size))
    vocab = nn.Embedding(vocab_size, hidden_size, _weight=embed_weights)
    cutoffs = [20, 30, vocab_size]
    adaptive_logits = AdaptiveLogits(vocab, cutoffs)
    adaptive_softmax = AdaptiveSoftmax(adaptive_logits)
    targets = torch.randint(low=0, high=vocab_size, size=[batch_size], dtype=torch.long)
    hidden = torch.randn(batch_size, hidden_size)
    probs = adaptive_softmax(hidden)
    preds = torch.argmax(probs, dim=1)
    probs_vocab = adaptive_softmax(vocab(targets))
    preds_vocab = torch.argmax(probs_vocab, dim=1)
    misclassification_error += (preds - targets).float().norm(p=0)
    best_misclassification_error += (preds_vocab - targets).float().norm(p=0)
  assert approx_eq(torch.sum(probs, dim=1), torch.ones(probs.shape[0]))
  assert approx_eq(torch.sum(probs_vocab, dim=1), torch.ones(probs.shape[0]))
  assert best_misclassification_error < misclassification_error
Esempio n. 5
0
class RNNModel(nn.Module):
    """Container module with an encoder, a recurrent module, and a decoder. Based on official pytorch examples"""
    def __init__(self,
                 rnn_type,
                 ntoken,
                 ninp,
                 nhid,
                 nlayers,
                 cutoffs,
                 dropout=0.5,
                 tie_weights=False):
        super(RNNModel, self).__init__()
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        if rnn_type is 'GRU':
            self.rnn = getattr(nn, rnn_type)(ninp,
                                             nhid,
                                             nlayers,
                                             dropout=dropout)
        else:
            try:
                nonlinearity = {
                    'RNN_TANH': 'tanh',
                    'RNN_RELU': 'relu'
                }[rnn_type]
            except KeyError:
                raise ValueError(
                    """An invalid option for `--model` was supplied,
                                 options are ['GRU', 'RNN_TANH' or 'RNN_RELU']"""
                )
            self.rnn = nn.RNN(ninp,
                              nhid,
                              nlayers,
                              nonlinearity=nonlinearity,
                              dropout=dropout)
        self.decoder = nn.Linear(nhid, ntoken)

        if tie_weights:
            if nhid != ninp:
                raise ValueError(
                    'When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight

        self.init_weights()

        self.rnn_type = rnn_type
        self.nhid = nhid
        self.nlayers = nlayers
        self.softmax = AdaptiveSoftmax(nhid, cutoffs)

        self.full = False

    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.fill_(0)
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, input, hidden):
        emb = self.drop(self.encoder(input))
        output, hidden = self.rnn(emb, hidden)
        output = self.drop(output)
        output = output.view(output.size(0) * output.size(1), output.size(2))
        if self.full:
            decode = self.softmax.log_prob(output)
        else:
            decode = self.softmax(output)
        return decode, hidden

    def init_hidden(self, bsz):
        weight = next(self.parameters()).data
        return Variable(weight.new(self.nlayers, bsz, self.nhid).zero_())