コード例 #1
0
    def forward(self, input, hidden, return_h=False, return_prob=False):
        batch_size = input.size(1)

        emb = embedded_dropout(self.encoder,
                               input,
                               dropout=self.dropoute if
                               (self.training and self.use_dropout) else 0)
        #emb = self.idrop(emb)

        emb = self.lockdrop(emb, self.dropouti if self.use_dropout else 0)

        raw_output = emb
        new_hidden = []
        #raw_output, hidden = self.rnn(emb, hidden)
        raw_outputs = []
        outputs = []
        for l, rnn in enumerate(self.rnns):
            current_input = raw_output
            raw_output, new_h = rnn(raw_output, hidden[l])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.nlayers - 1:
                #self.hdrop(raw_output)
                raw_output = self.lockdrop(
                    raw_output, self.dropouth if self.use_dropout else 0)
                outputs.append(raw_output)
        hidden = new_hidden

        output = self.lockdrop(raw_output,
                               self.dropout if self.use_dropout else 0)
        outputs.append(output)

        latent = self.latent(output)
        latent = self.lockdrop(latent,
                               self.dropoutl if self.use_dropout else 0)
        logit = self.decoder(latent.view(-1, self.ninp))

        prior_logit = self.prior(output).contiguous().view(-1, self.n_experts)
        prior = nn.functional.softmax(prior_logit, -1)

        prob = nn.functional.softmax(logit.view(-1, self.ntoken),
                                     -1).view(-1, self.n_experts, self.ntoken)
        prob = (prob * prior.unsqueeze(2).expand_as(prob)).sum(1)

        if return_prob:
            model_output = prob
        else:
            log_prob = torch.log(prob.add_(1e-8))
            model_output = log_prob

        model_output = model_output.view(-1, batch_size, self.ntoken)

        if return_h:
            return model_output, hidden, raw_outputs, outputs
        return model_output, hidden
コード例 #2
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': rnn.reset()
    rnn.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = rnn.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = rnn(data, hidden)
        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source)
コード例 #3
0
ファイル: model.py プロジェクト: prashantksharma/RNN
    def forward_test(self, inp, hprev):

        hp = hprev.clone().view(1, -1)
        # print(hp.shape)
        rec_net = rnn()
        i = 0
        for t in range(inp.shape[0]):
            #print(i)
            hp = rec_net.forward(hp, inp[t].view(1, -1), self.Bh, self.Whh,
                                 self.Wxh)
            i = i + 1
        output = hp.mm(self.Why)

        return output
コード例 #4
0
    def __init__(self, nLayers, H, B, D, isTrain):

        self.layer = []
        self.nLayers = nLayers  # no_of_layers
        self.hidden_dim = H  # hidden_layer_dim
        self.batch_size = B  # Batch_Size
        self.input_dim = D  # word_vector_size, len_unique
        self.out = 2  # no_of_output_classes
        self.isTrain = isTrain
        self.Wxh = torch.randn([self.input_dim, self.hidden_dim
                                ]).double() * 0.1
        self.Whh = torch.randn([self.hidden_dim, self.hidden_dim
                                ]).double() * 0.1
        self.Bh = torch.randn(self.hidden_dim).double() * 0.1
        self.By = torch.randn(self.out).double() * 0.1
        self.Why = torch.randn([self.hidden_dim, self.out]).double() * 0.1

        for t in range(nLayers):
            self.addlayer(rnn())
コード例 #5
0
	X_batch = []
	y_batch = []
	for i in random_ix[:batch_size]:
		X_batch.append(np.asarray(X[i:i+seq_length].reshape(-1,1)))
		y_batch.append(X[i+1:i+seq_length+1].reshape(-1,1))
	X_batch, y_batch = np.asarray(X_batch), np.asarray(y_batch)
	X_batch, y_batch = np.transpose(X_batch,(1,0,2)), np.transpose(y_batch,(1,0,2))
	return X_batch, y_batch
train_size = 4000
X_train = np.array([((i/10.)*np.sin(i/10.)+6*np.sin(5*(i/10.)))/48 for i in range(train_size)])
X_test = np.array([(((i+0.5)/10.)*np.sin((i+0.5)/10.) + 6*np.sin(5*((i+0.5)/10.)))/48 for i in range(train_size)])
n_epochs = 10
seq_length = 32
n_units = 16
learning_rate = 0.001
batch_size = 1000
n_batches = int(train_size/batch_size)

network = rnn(n_units=n_units, X_length=1, y_length=1)
# train
for epoch in range(n_epochs):
	for batch in range(n_batches):
		learning_rate*=0.94
		X_batch, y_batch = fetch_batch(batch_size, seq_length, X_train)
		X_test_batch, y_test_batch = fetch_batch(batch_size, seq_length, X_test)
		print("train loss: {}".format(network.loss(X_batch,y_batch)))
		print("test loss : {}".format(network.loss(X_test_batch, y_test_batch)))
		network.fit(X_batch, y_batch, learning_rate)

# 
コード例 #6
0
ファイル: model.py プロジェクト: Avmb/lrn
    def forward(self, *hidden, input=None, return_h=False, return_prob=False, 
                return_student_distill_loss=False, average_ensemble=False, 
                enable_rnd_distill=False, enable_rnd_tune=False,
                flatten_returned_lists=False):
        batch_size = input.size(1)

        if self.rnn_type == "lstm" or self.rnn_type == "sru":
            # hidden state must be rearranged a (h, c) tuple
            rearranged_hidden = []
            for i in range(0, len(hidden), 2):
                rearranged_hidden.append((hidden[i], hidden[i+1]))
            hidden = rearranged_hidden
        
        emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if (self.training and self.use_dropout) else 0)
        #emb = self.idrop(emb)

        emb = self.lockdrop(emb, self.dropouti if self.use_dropout else 0)

        raw_output = emb
        new_hidden = []
        #raw_output, hidden = self.rnn(emb, hidden)
        raw_outputs = []
        outputs = []
        distill_loss_acc = [torch.tensor(0.0).to(input.device)] if return_student_distill_loss else None
        for l, rnn in enumerate(self.rnns):
            state_post_proc = None
            assert(not (enable_rnd_distill and enable_rnd_tune)), "enable_rnd_distill and enable_rnd_tune can't be enabled at the same time"
            if enable_rnd_distill:
                state_post_proc = self.rnd_models[l].get_rnd_distill_loss_proc(distill_loss_acc)
            if enable_rnd_tune:
                state_post_proc = self.rnd_models[l].get_rnd_scale_proc(distill_loss_acc)

            current_input = raw_output
            if self.ndistilstudents  == 0:
                raw_output, new_h = rnn(current_input, hidden[l], 
                                        distill_loss_acc=distill_loss_acc, state_post_proc=state_post_proc)
            else:
                raw_output, new_h = rnn(current_input, hidden[l], distill_loss_acc=distill_loss_acc, average_ensemble=average_ensemble, 
                                        state_post_proc=state_post_proc)
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.nlayers - 1:
                #self.hdrop(raw_output)
                raw_output = self.lockdrop(raw_output, self.dropouth if self.use_dropout else 0)
                outputs.append(raw_output)
        hidden = new_hidden

        output = self.lockdrop(raw_output, self.dropout if self.use_dropout else 0)
        outputs.append(output)

        latent = self.latent(output)
        latent = self.lockdrop(latent, self.dropoutl if self.use_dropout else 0)
        logit = self.decoder(latent.view(-1, self.ninp) * self.decoder_gain)
        #print(self.decoder_gain.max().item(), self.decoder_gain.min().item(), self.decoder_gain.mean().item())

        prior_logit = self.prior(output).contiguous().view(-1, self.n_experts)
        prior = nn.functional.softmax(prior_logit, -1)

        prob = nn.functional.softmax(logit.view(-1, self.ntoken), -1).view(-1, self.n_experts, self.ntoken)
        prob = (prob * prior.unsqueeze(2).expand_as(prob)).sum(1)

        if return_prob:
            model_output = prob
        else:
            log_prob = torch.log(prob.add_(1e-8))
            model_output = log_prob

        model_output = model_output.view(-1, batch_size, self.ntoken)

        rv = (model_output, hidden)
        if return_h:
            rv = rv + (raw_outputs, outputs)
        if return_student_distill_loss:
            rv = rv + (distill_loss_acc[0].reshape([1, 1]), )
        if flatten_returned_lists:
            new_rv = []
            for e in rv:
                if isinstance(e, list):
                    for ee in e:
                        new_rv.append(ee)
                else:
                    new_rv.append(e)
            rv = new_rv
        return rv
コード例 #7
0
def train():
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN': rnn.reset()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = rnn.init_hidden(args.batch_size)
    batch, i = 0, 0
    while i < train_data.size(0) - 1 - 1:
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        # Prevent excessively small or negative sequence lengths
        seq_len = max(5, int(np.random.normal(bptt, 5)))
        # There's a very small chance that it could select a very long sequence length resulting in OOM
        seq_len = min(seq_len, args.bptt + 10)

        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        rnn.train()
        data, targets = get_batch(train_data, i, args, seq_len=seq_len)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden, rnn_hs, dropped_rnn_hs = rnn(data,
                                                     hidden,
                                                     return_h=True)
        raw_loss = criterion(output.view(-1, ntokens), targets)

        loss = raw_loss
        # Activiation Regularization
        loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean()
                          for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean()
                          for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(rnn.parameters(), args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        optimizer.param_groups[0]['lr'] = lr2
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                'loss {:5.2f} | ppl {:8.2f}'.format(
                    epoch, batch,
                    len(train_data) // args.bptt,
                    optimizer.param_groups[0]['lr'],
                    elapsed * 1000 / args.log_interval, cur_loss,
                    math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len
コード例 #8
0
if torch.cuda.is_available():
    rnn.cuda()

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.RMSprop(rnn.parameters(), lr=learning_rate)

# Re-train the network but don't update zero-weights (by setting the corresponding gradients to zero)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        t0 = time()
        images = to_var(images.view(-1, sequence_length, input_size))
        labels = to_var(labels)

        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = rnn(images)
        loss = criterion(outputs, labels)
        loss.backward()

        # zero-out all the gradients corresponding to the pruned connections
        # for l,p in enumerate(rnn.parameters()):
        #     pruned_inds = pruned_inds_by_layer[l]
        #     if type(pruned_inds) is not str:
        #         p.grad.data[pruned_inds] = 0.

        optimizer.step()

        losses.append(loss.data[0])

        if (i + 1) % 100 == 0:
            accuracy = compute_accuracy(rnn, sequence_length, input_size,
コード例 #9
0
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import numpy.linalg as alg
from nnutils import *
from rnn import *

if __name__ == "__main__":
    A = np.array([[1, -1, 1, -1],[2, 1, -2, 1],[-1, -1, -2, 1],[1, -2, 1, 1]])
    b = np.matrix([0, -1, -3, -1]).T
    
    nn = rnn(4, 1, 0)
    W = np.zeros([4,4])
    W[0,3] = -6
    W[3,0] = -6
    W[1,2] = 6
    W[2,1] = 6
    W[1,3] = -2
    W[3,1] = -2

    t = np.array([7, -1, -4, 2])
    x = np.matrix([0, 1, 0, 1]).T
    
    nn.setWeight(W)
    nn.setThreshold(t)
    nn.setValue(x)

    for i in range(0, 20):
        nn.printValue()
        x_now = nn.getValue()
        axb = np.dot(A, x_now) - b
コード例 #10
0
    d["t"] = t
    d["W"] = W

    return d


if __name__ == "__main__":

    n = 4
    n2 = n * n

    d = expandEnergy(energyNHP, n2)

    a = 100

    nhp = rnn(n2, a, 0)
    nhp.setThreshold(d["t"])
    nhp.setWeight(d["W"])

    num_loop = 500

    solutions = [0] * num_loop
    subs = [0] * num_loop
    count = 0

    update_until_end = [0] * num_loop

    for i in range(0, num_loop):
        nhp.setValue(randomBinaryVec(n2))
        for j in range(0, num_loop):
            nhp.update()
コード例 #11
0
import numpy as np
from rnn import *

np.random.seed(0)
X_length = 3
seq_length = 16
y_length = 3
num_units = 10
batch_size = 20
X_seq = np.random.normal(size=[seq_length, batch_size, X_length])
y_seq = np.random.normal(size=[seq_length, batch_size, y_length])
net = rnn(num_units, X_length, y_length)
net.grad_check(X_seq, y_seq)