Ejemplo n.º 1
0
def evaluate(data_source, batch_size=10, window=args.window):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    next_word_history = None
    pointer_history = None
    for i in range(0, data_source.size(0) - 1, args.bptt):
        if i > 0: print(i, len(data_source), math.exp(total_loss / i))
        data, targets = get_batch(data_source, i, evaluation=True, args=args)
        output, hidden, rnn_outs, _ = model(data, hidden, return_h=True)
        rnn_out = rnn_outs[-1].squeeze()
        output_flat = output.view(-1, ntokens)
        ###
        # Fill pointer history
        start_idx = len(next_word_history) if next_word_history is not None else 0
        next_word_history = torch.cat([one_hot(t.data[0], ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data[0], ntokens) for t in targets])])
        #print(next_word_history)
        pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0)
        #print(pointer_history)
        ###
        # Built-in cross entropy
        # total_loss += len(data) * criterion(output_flat, targets).data[0]
        ###
        # Manual cross entropy
        # softmax_output_flat = torch.nn.functional.softmax(output_flat)
        # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1))
        # entropy = -torch.log(soft)
        # total_loss += len(data) * entropy.mean().data[0]
        ###
        # Pointer manual cross entropy
        loss = 0
        softmax_output_flat = torch.nn.functional.softmax(output_flat)
        for idx, vocab_loss in enumerate(softmax_output_flat):
            p = vocab_loss
            if start_idx + idx > window:
                valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx]
                valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx]
                logits = torch.mv(valid_pointer_history, rnn_out[idx])
                theta = args.theta
                ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1)
                ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze()
                lambdah = args.lambdasm
                p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss
            ###
            target_loss = p[targets[idx].data]
            loss += (-torch.log(target_loss)).data[0]
        total_loss += loss / batch_size
        ###
        hidden = repackage_hidden(hidden)
        next_word_history = next_word_history[-window:]
        pointer_history = pointer_history[-window:]
    return total_loss / len(data_source)
Ejemplo n.º 2
0
def train():
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    batch, i = 0, 0
    while i < train_data.size(0) - 1 - 1:
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        # Prevent excessively small or negative sequence lengths
        seq_len = max(5, int(np.random.normal(bptt, 5)))
        # There's a very small chance that it could select a very long sequence length resulting in OOM
        # seq_len = min(seq_len, args.bptt + 10)

        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        model.train()
        data, targets = get_batch(train_data, i, args, seq_len=seq_len)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
        raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets)

        loss = raw_loss
        # Activiation Regularization
        if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        optimizer.param_groups[0]['lr'] = lr2
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss.item() / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format(
                epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len
Ejemplo n.º 3
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
Ejemplo n.º 4
0
def train(epoch, X_train, mask_train, Y_train, batch_size, seq_len, ntokens, char_vocab_size, args):
    if epoch % args.betapoint == 0:
        args.beta /= 2
        print ('Decrease beta = {}'.format(args.beta))

    model.train()
    start_time = time.time()
    total_loss = 0
    if args.num in [2, 3]:
        total_seq_loss = 0
        total_pred_loss = 0
    for batch, i in enumerate(range(0, X_train.size(0) - 1, batch_size)):
        X, mask, Y = utils.get_batch(X_train, mask_train, Y_train, batch_size, i)
        X = X.to(device)
        mask = mask.to(device)
        Y = Y.to(device)
        optimizer.zero_grad()
        if args.num == 1:
            output, hidden = model(X, mask)
            loss = criterion(output.view(-1, ntokens), Y.view(-1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
                
        if args.num in [2, 3]:
            output, hidden, seq_output = model(X, mask) # seq_output = b, l, c-1, char_vocab_size
            loss_pred = criterion(output.view(-1, ntokens), Y.view(-1))

            seq_pred = seq_output.view(-1, char_vocab_size) 
            loss_seq = seq_criterion(seq_pred, X[:,:,1:].contiguous().view(-1))
            loss = loss_pred + args.beta*loss_seq
            loss.backward()
            optimizer.step()
            total_pred_loss += loss_pred.item()
            total_seq_loss += loss_seq.item()
            total_loss += loss.item()

    elapsed = time.time() - start_time
    if args.num == 1:
        s = ('| epoch {:3d} | ms/epoch {:5.2f} | '
            'loss {:5.3f}'.format(epoch, elapsed * 1000, total_loss))
        output_s(s, message_filename)

    if args.num in [2,3]:
        s = ('| epoch {:3d} | ms/epoch {:5.2f} | '
            'pred_loss {:5.3f} | {:5.3f} x seq_loss {:5.3f} | loss {:5.3f} '.format(epoch, elapsed * 1000, 
            total_pred_loss, args.beta, total_seq_loss, total_loss))
        output_s(s, message_filename)

    return total_loss / X_train.size(0)
Ejemplo n.º 5
0
def evaluate_copy(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.                                                                                      
 
    model_copy.eval()

    total_loss = 0
    hidden = model_copy.init_hidden(batch_size)

    for i in range(0, data_source.size(0)-1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model_copy(data, hidden)
        total_loss += len(data) * criterion(model_copy.decoder.weight, model_copy.decoder.bias, output, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
Ejemplo n.º 6
0
def valid_model(epoch, best_acc, learning_rate):
    model.eval()

    batch_time = AverageMeter()
    losses = AverageMeter()

    end = time.time()

    ntokens = len(corpus.dictionary)
    hidden = (torch.zeros(2, eval_batch_size, args.lstm_dim).to(device),
              torch.zeros(2, eval_batch_size, args.lstm_dim).to(device))

    with torch.no_grad():
        for batch, i in enumerate(
                range(0,
                      valid_inputs.size(0) - 1, args.seq_length)):
            data, targets = get_batch(valid_inputs, valid_targets, i, args)
            data = data.to(device)
            targets = targets.to(device)

            hidden = [state.detach() for state in hidden]
            output, hidden = model(data, hidden)

            loss = F.cross_entropy(output, targets)
            losses.update(loss.item(), args.batch_size)

            batch_time.update(time.time() - end)
            end = time.time()

            if batch % args.print_freq == 0:
                print(
                    'Test Epoch: {} [{}]| Loss: {:.3f} | pexplexity: {:.3f} | batch time: {:.3f}'
                    .format(epoch, batch, losses.avg, np.exp(losses.avg),
                            batch_time.avg))

    # acc = 100.0 * (correct / total)
    writer.add_scalar('log/test loss', losses.avg, epoch)
    writer.add_scalar('log/test perplexity', np.exp(losses.avg), epoch)

    if abs(np.exp(losses.avg) - best_acc) < 1 and learning_rate > 0.001:
        learning_rate *= 0.5

    if np.exp(losses.avg) < best_acc:
        print('==> Saving model..')
        if not os.path.isdir('save_model'):
            os.mkdir('save_model')
        torch.save(model.state_dict(), './save_model/' + args.name + '.pth')
        best_acc = np.exp(losses.avg)

    return best_acc, learning_rate
Ejemplo n.º 7
0
def test(test_data):
    print("test the model...")
    model.eval()
    correct = 0
    for j in range(0, len(test_data), config.batch_size):
        batch = test_data[j:j + config.batch_size]
        X_tensor, Y_tensor = utils.get_batch(batch, use_cuda)
        logits = model(X_tensor)
        predict = torch.max(logits, 1)[1]
        for p, g in zip(predict, Y_tensor):
            correct += 1 if p == g else 0
    acc = correct / len(test_data)
    print("test model: accuarcy : %.4f " % acc)
    return acc
Ejemplo n.º 8
0
def evaluate(data_source):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0.
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(test_batch_size)
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, args_bptt):
            data, targets = utils.get_batch(data_source, i, args_bptt)
            output, hidden = model(data, hidden)
            output_flat = output.view(-1, ntokens)
            total_loss += len(data) * criterion(output_flat, targets).item()
            hidden = utils.repackage_hidden(hidden)
    return total_loss / (len(data_source) - 1)
Ejemplo n.º 9
0
 def run(self):
     if not self.next_run_required:
         self.state = TaskState.COMPLETED
     if self.state == TaskState.YET_TO_START or self.state == TaskState.IDLE:
         self.state = TaskState.RUNNING
         # here run func with actual input for current batch
         batch = utils.get_batch(self.input.data, self.batch_size)
         if batch is None or len(batch) == 0:
             self.next_run_required = False
         return self.execute(batch)
     elif self.state == TaskState.COMPLETED:
         raise TaskCompleted()
     elif self.state == TaskState.RUNNING:
         raise RunningAlreadyRunningTaskError()
Ejemplo n.º 10
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden, w_e, vt_1 = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden, vt_1 = model(data, hidden, w_e, vt_1)
        total_loss += len(data) * criterion(
            model.decoder.weight, model.decoder.bias, output, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
Ejemplo n.º 11
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, weight, bias, hidden = model(data, hidden)
        pred_targets = torch.mm(output, weight.t()) + bias
        total_loss += len(data) * criterion(pred_targets, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source)
Ejemplo n.º 12
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source)
Ejemplo n.º 13
0
def demo():
    #############################
    # 1. load dataset
    #############################
    image_dir = os.path.join(DATASET_TRAIN_PATH, 'image')
    mask_dir = os.path.join(DATASET_TRAIN_PATH, 'mask')
    image_path_list = [os.path.join(image_dir, v) for v in os.listdir(image_dir)]
    mask_path_list = [os.path.join(mask_dir, v.replace('.jpg', '.png')) for v in os.listdir(image_dir)]
    image_num = len(image_path_list)
    print("Training image num -> ", image_num)
    for image_path, mask_path in list(zip(image_path_list, mask_path_list))[:3]:
        print(image_path, mask_path)
        image = utils.cv_imread(image_path)
        mask = utils.cv_imread(mask_path, 1)
        print(image.shape, mask.shape)
        cv2.imshow('Sample', np.hstack([image, mask]))
        cv2.waitKey(1)
        pass

    batch_num = image_num // BATCH_SIZE
    data_idx_list = list(range(image_num))

    if not os.path.exists(OUT_IMAGE_DIR):
        os.makedirs(OUT_IMAGE_DIR)

    #############################
    # 2. Create Model
    #############################
    sess, tf_x, tf_y, tf_lr, tf_train, tf_logit, tf_predict, tf_cost, tf_optimizer, tf_saver = unet.model(H_IN, W_IN, C_IN, 8)
    global_step = 0

    for epoch in range(MAX_EPOCH):
        np.random.shuffle(data_idx_list)
        for step in range(batch_num):
            idx_list = data_idx_list[step * BATCH_SIZE: (step + 1) * BATCH_SIZE]
            image_batch, mask_batch = utils.get_batch(idx_list, image_path_list, mask_path_list, H_IN, W_IN)
            _, cost = sess.run([tf_optimizer, tf_cost],
                               feed_dict={tf_x: image_batch, tf_y: mask_batch, tf_train: True, tf_lr: LEARNING_RATE})
            if global_step % 10 == 0:
                print("Epoch %d: Step %d -> loss: %.5g" % (epoch, step, cost))
                predict_mask = sess.run(tf_predict, feed_dict={tf_x: image_batch, tf_train: False})
                compare_result_image = utils.create_compare_image(image_batch[0], mask_batch[0], predict_mask[0])
                cv2.imshow('Sample', compare_result_image)
                cv2.waitKey(1)
                cv2.imwrite(os.path.join(OUT_IMAGE_DIR, "train_step_%d.png" % global_step), compare_result_image)

            global_step += 1
    print("Finished. Save model to %s ..." % MODEL_SAVE_PATH)
    tf_saver.save(sess, MODEL_SAVE_PATH)
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(data_source, i, args, evaluation=True)
            targets = targets.view(-1)
            log_prob, hidden = parallel_model(data, hidden)
            loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data
            total_loss += loss * len(data)
            hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
Ejemplo n.º 15
0
def evaluate(data_source, tokens):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    hidden = model.init_hidden(eval_batch_size)

    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, seq_len):
            list_len = min(seq_len, len(tokens)-1-i)
            batch_tokens = tokens[i:i+list_len]
            # keep continuous hidden state across all sentences in the input file
            data, targets = get_batch(data_source, i, seq_len)
            output, hidden = model(data, hidden)
            output_flat = output.view(-1, vocab_size)
            output_surprisal(output_flat, targets, batch_tokens)
            hidden = repackage_hidden(hidden)
Ejemplo n.º 16
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size, args.cuda)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden, optimizer)
        total_loss += len(data) * criterion(output, targets).data
        if args.no_warm_start:
            hidden = model.init_hidden(batch_size)
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
Ejemplo n.º 17
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()

    with torch.no_grad():
        total_loss = 0
        ntokens = len(corpus.dictionary)
        hidden = model.init_hidden(batch_size)
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(data_source, i, args, evaluation=True)
            output, hidden = model(data, hidden)
            total_loss += len(data) * criterion(output.view(-1, ntokens),
                                                targets).data
            hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
def evaluate(data_source, mask):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0

    hidden = model.init_hidden(eval_batch_size)

    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, seq_len):
            # keep continuous hidden state across all sentences in the input file
            data, targets = get_batch(data_source, i, seq_len)
            print(data)
            print(targets)
            _, targets_mask = get_batch(mask, i, seq_len)
            output, hidden = model(data, hidden)
            output_flat = output.view(-1, vocab_size)
            total_loss += len(data) * nn.CrossEntropyLoss()(output_flat,
                                                            targets)

            output_candidates_probs(output_flat, targets, targets_mask)

            hidden = repackage_hidden(hidden)

    return total_loss.item() / (len(data_source) - 1)
Ejemplo n.º 19
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    with torch.no_grad():
        hidden = model.init_hidden(batch_size)
        c_hidden = model.init_c_hidden(batch_size)
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, _, targets = get_batch(data_source, i, args=args)
            hidden = repackage_hidden(hidden)
            c_hidden = repackage_hidden(c_hidden)
            output, _, hidden, c_hidden = model(data, hidden, c_hidden)
            total_loss += len(data) * criterion(
                model.decoder.weight, model.decoder.bias, output, targets).data
        return total_loss.item() / len(data_source)
Ejemplo n.º 20
0
def evaluate(_model, criterion, valid_data, eval_batch_size):
    _model.eval()
    total_loss = .0
    hidden = _model.init_hidden(eval_batch_size)

    with torch.no_grad():
        for i in range(0, valid_data.size(0) - 1, args.sequence_length):
            data, targets = utils.get_batch(
                valid_data, i,
                min(args.sequence_length,
                    len(valid_data) - 1 - i))
            output, hidden = _model(data, hidden)
            hidden = utils.repackage_hidden(hidden)
            total_loss += len(data) * criterion(output, targets).item()
    return total_loss / (len(valid_data) - 1)
Ejemplo n.º 21
0
    def train(self):
        NUM_EPOCH = []
        self.TRAIN_COLLECT = 50
        self.TRAIN_PRINT = self.TRAIN_COLLECT * 2

        saver = tf.train.Saver()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            iter = 100
            for e in range(self.EPOCH):
                for batch_x, batch_y in utils.get_batch(
                        self.INPUT, self.LABEL, self.BATCH_SIZE):
                    iter += 1
                    feed = {
                        self.MODEL.inputs: self.INPUT,
                        self.MODEL.labels: self.LABEL,
                        self.MODEL.learning_rate: self.LEARNING_RATE,
                        self.MODEL.is_training: True
                    }
                    TRAIN_LOSS, _, TRAIN_ACC = sess.run([
                        self.MODEL.cost, self.MODEL.optimizer,
                        self.MODEL.accuracy
                    ],
                                                        feed_dict=feed)

                    if iter % self.TRAIN_COLLECT == 0:
                        NUM_EPOCH.append(e)

                        if iter % self.TRAIN_PRINT == 0:
                            print("Epoch: {}/{}".format(e + 1, self.EPOCH),
                                  "Train Loss: {:.4f}".format(TRAIN_LOSS),
                                  "Train Accuracy: {:.4f}".format(TRAIN_ACC))
                        feed = {
                            self.MODEL.inputs: self.VAL_INPUT,
                            self.MODEL.labels: self.VAL_LABEL,
                            self.MODEL.is_training: False
                        }
                        VAL_LOSS, VAL_ACC = sess.run(
                            [self.MODEL.cost, self.MODEL.accuracy],
                            feed_dict=feed)

                        if iter % self.TRAIN_PRINT == 0:
                            print(
                                "Epoch: {}/{}".format(e + 1, self.EPOCH),
                                "Validation Loss: {:.4f}".format(VAL_LOSS),
                                "Validation Accuracy: {:.4f}".format(VAL_ACC))

            saver.save(sess, "checkpoint/porto_pilsa.ckpt")
Ejemplo n.º 22
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model_lm.eval()
    # model_mlp.eval()
    if args.model == 'QRNN': model_lm.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model_lm.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets, _ = get_batch(data_source, i, args, evaluation=True)
        output, hidden, _, all_outputs = model_lm(data, hidden, return_h=True)
        # output = model_mlp(all_outputs[-1]) + all_outputs[-1]
        # output = output.view(output.size(0)*output.size(1), output.size(2))
        total_loss += len(data) * criterion(model_lm.decoder.weight, model_lm.decoder.bias, output, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
Ejemplo n.º 23
0
def evaluate(model, criterion, data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    model_now = model.module
    criterion_now = criterion.module
    if args.model == 'QRNN': model_now.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model_now.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model_now(data, hidden)
        criterion_now.replicate_weight_and_bias(torch.nn.Parameter(model.module.decoder.weight),torch.nn.Parameter(model.module.decoder.bias))
        total_loss += len(data) * criterion_now(hiddens = output, targets = targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
Ejemplo n.º 24
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        targets = targets.view(-1)
        
        log_prob, hidden = parallel_model(data, hidden)
        loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data

        total_loss += len(data) * loss
        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source)
def evaluate(opt, valid_data, model, criterion):
    accu_loss = 0.0
    model.eval()
    hidden = model.init_hidden(opt.batch_size)
    for i in range(0, valid_data.shape[1] - 1, opt.bptt_len):
        origin, target = get_batch(opt, valid_data, i)
        origin = np2tensor(opt, origin)
        target = np2tensor(opt, target)
        hidden = repackage_hidden(hidden)

        predict, hidden = model(origin, hidden)
        loss = criterion(predict, target)
        accu_loss += loss.data[0]

    accu_loss /= valid_data.shape[1]
    return accu_loss
    def test_loss(self, model, args):

        model.eval()

        total_loss = 0
        hidden = model.init_hidden(self.batch_size)
        for i in range(0, self.data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(self.data_source,
                                      i,
                                      args,
                                      evaluation=True)
            output, hidden = model(data, hidden)
            total_loss += len(data) * criterion(
                model.decoder.weight, model.decoder.bias, output, targets).data
            hidden = repackage_hidden(hidden)
        return total_loss.item() / len(self.data_source)
Ejemplo n.º 27
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    with torch.no_grad():
        if args.model == 'QRNN': model.reset()
        total_loss = 0
        ntokens = len(corpus.dictionary)
        hidden = model.init_hidden(batch_size)
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(data_source, i, args, evaluation=True)
            output, hidden = model(data, hidden)
            total_loss += len(data) * criterion(
                model.decoder.weight, model.decoder.bias, output, targets).data
            hidden = repackage_hidden(hidden)
        #return total_loss[0] / len(data_source) # Error under modern PyTorch
    return total_loss / len(data_source)
Ejemplo n.º 28
0
 def rvae_estimate(self, data_x, num_iter):
     for i in range(num_iter):
         b_x, ids = utils.get_batch(data_x, self.params.batch_size)
         _, l, gen_loss, v_loss = self.sess.run(
             (self.optimizer, self.loss, self.gen_loss, self.v_loss),
             feed_dict={
                 self.x: b_x,
                 self.v: self.m_V[ids, :]
             })
         # Display logs per epoch step
         if i % self.print_step == 0 and self.verbose:
             print "Iter:", '%04d' % (i+1), \
                   "loss=", "{:.5f}".format(l), \
                   "genloss=", "{:.5f}".format(gen_loss), \
                   "vloss=", "{:.5f}".format(v_loss)
     return gen_loss
Ejemplo n.º 29
0
    def rvae_estimate(self, data_x, links, num_iter):
        gradBuffer = self.sess.run(self.tvars)
        for ix, grad in enumerate(gradBuffer):
            gradBuffer[ix] = grad * 0

        for i in range(num_iter):
            b_x, ids = utils.get_batch(data_x, self.params.batch_size)
            num = 0
            gen_loss = 0
            for j in range(self.params.batch_size):
                x = b_x[j].reshape((1, -1))
                id = ids[j]
                link_ids = links[id]
                if len(link_ids) == 0:
                    continue
                link_v = self.m_V[link_ids]
                tGrad, gen_loss_ins = self.sess.run(
                    (self.newGrads, self.gen_loss),
                    feed_dict={
                        self.x: x,
                        self.linked_v: link_v,
                        self.eta_vae: self.eta
                    })
                gen_loss += gen_loss_ins
                for ix, grad in enumerate(tGrad):
                    gradBuffer[ix] += grad
                num += 1
            gen_loss = gen_loss / num
            tGrad = self.sess.run(self.regGrads)
            for ix, grad in enumerate(tGrad):
                gradBuffer[
                    ix] += gradBuffer[ix] / num + grad * self.params.lambda_w

            feed_dict = {}
            for j in range(len(self.batchGrad)):
                feed_dict[self.batchGrad[j]] = gradBuffer[j]
            self.sess.run(self.updateGrads, feed_dict=feed_dict)
            for ix, grad in enumerate(gradBuffer):
                gradBuffer[ix] = grad * 0

            # Display logs per epoch step
            if i % self.print_step == 0 and self.verbose:
                print "Iter:", '%04d' % (i+1), \
                      "loss=", "{:.5f}".format(l), \
                      "genloss=", "{:.5f}".format(gen_loss), \
                      "vloss=", "{:.5f}".format(v_loss)
        return gen_loss
Ejemplo n.º 30
0
def train(cur_epoch):
    # Turn on training mode which enables dropout.
    total_loss = 0
    start_time = time.time()
    final_hidden_states = my_model.get_first_hidden(env.batch_size, env)
    batch, i = 0, 0
    seq_len = env.seq_len
    batches_in_epoch = len(train_data) // env.seq_len
    total_batches = batches_in_epoch * env.epochs
    while i < train_data.size(0) - 1 - 1:
        cur_total_batch = (cur_epoch - 1) * batches_in_epoch + batch
        optimizer.param_groups[0]['lr'] = lr_start * (math.exp(
            -cur_total_batch / total_batches))
        my_model.train()
        data, targets = get_batch(train_data, i, env, seq_len=seq_len)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        initial_hidden_states = repackage_hidden(final_hidden_states)
        optimizer.zero_grad()

        output, final_hidden_states = my_model(data, initial_hidden_states)

        loss = criterion(output, targets)
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        if env.clip: torch.nn.utils.clip_grad_norm_(params, env.clip)
        optimizer.step()

        total_loss += loss.data
        #optimizer.param_groups[0]['lr'] = lr2
        if batch % env.log_interval == 0 and batch > 0:
            cur_loss = total_loss.item() / env.log_interval
            elapsed = time.time() - start_time
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | '
                'ppl {:8.2f}'.format(epoch, batch,
                                     len(train_data) // env.seq_len,
                                     optimizer.param_groups[0]['lr'],
                                     elapsed * 1000 / env.log_interval,
                                     math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len
Ejemplo n.º 31
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN' and getattr(model, 'reset', None): model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = None
    mems = None
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(data_source, i, args, evaluation=True)
            #output, hidden = model(data, hidden)
            output, hidden, mems = model(data, hidden, mems=mems, return_h=False)
            total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets.view(-1)).data
            if hidden is not None:
                hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
Ejemplo n.º 32
0
def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0.
    start_time = time.time()
    hidden = model.init_hidden(args.batch_size)
    #m, batch_len = train_data.shape
    #n_batches = (batch_len -1) // seq_len
    data_len = len(train_data)
    b_n = 0
    for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        data, targets = get_batch(
            train_data, i,
            args)  # data size SEQ X BATCH_SIZE, targets: SEQ X BATCH_SIZE, 1
        output, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
        raw_loss = criterion(output.view(-1, ntokens), targets)
        loss = raw_loss
        loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean()
                          for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean()
                          for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        optimizer.step()
        #print (total_loss)
        total_loss += raw_loss.data
        optimizer.param_groups[0]['lr'] = lr2
        if b_n % args.log_interval == 0 and b_n > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                'loss {:5.2f} | ppl {:8.2f}'.format(
                    epoch, b_n,
                    len(train_data) // args.batch_size,
                    optimizer.param_groups[0]['lr'],
                    elapsed * 1000 / args.log_interval, cur_loss,
                    math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
Ejemplo n.º 33
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        logits = model.decoder(output)
        # logProba = nn.functional.log_softmax(logits, dim=1)
        # pred_idxs = torch.argmax(logProba, dim=1)
        total_loss += len(data) * criterion(
            model.decoder.weight, model.decoder.bias, output, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
Ejemplo n.º 34
0
    def train_gan(self, epochs, batch_size, sample_interval, train_data):

        # Create labels for real and fake data
        real = np.ones((batch_size, 1))
        fake = np.zeros((batch_size, 1))

        for epoch in range(epochs):

            # Get batch of real data
            real_seqs = get_batch(train_data, batch_size)
            # Generate batch of fake data using random noise
            noise = np.random.normal(0, 1, (batch_size, self.model.latent_dim))
            gen_seqs = self.model.generator.predict(noise)

            # Train the discriminator to accept real data and reject fake data
            d_loss_real = self.model.discriminator.train_on_batch(
                real_seqs, real)
            d_loss_fake = self.model.discriminator.train_on_batch(
                gen_seqs, fake)

            # Train the generator such that when it takes random noise as an
            # input, it will produce fake data which the discriminator accepts
            # as real

            noise = np.random.normal(0, 1, (batch_size, self.model.latent_dim))
            g_loss = self.model.gan.train_on_batch(noise, real)

            if epoch % sample_interval == 0:
                print("""%d [DiscLoss/Acc Real: (%10f, %10f)] 
                       [DiscLoss/Acc Fake: (%10f, %10f)] 
                       [DiscAcc %10f][GenLoss = %10f]""" %
                      (epoch, d_loss_real[0], d_loss_real[1], d_loss_fake[0],
                       d_loss_fake[1], 0.5 *
                       (d_loss_real[1] + d_loss_fake[1]), g_loss))

                self.disc_loss_r.append(d_loss_real)
                self.disc_loss_f.append(d_loss_fake)

                self.gen_loss.append(g_loss)
                sample_image(self.model, epoch, real_seqs, self.path)
            if (epoch % 1000 == 0):
                self.save_models(self.path, epoch, self.model.generator,
                                 self.model.discriminator)

        self.savedata(self.path, train_data)
        self.showLoss(self.path, save=True)
Ejemplo n.º 35
0
def test(step,verbose=None):
    N_test = len(q_test)
    n_batches = N_test // batch_size
    acc = []
    for idx in range(n_batches):
        if verbose:
            if idx%20==0:
                print("%d/%d - accuracy = %1.3f"%(idx,n_batches, np.mean(acc)))
        begin = idx*batch_size
        end = min((idx+1)*batch_size, N_test)
        Q, mask, A = get_batch(begin,end,q_test,a_test,batch_size,max_q,Na)
        a_pred = sess.run(model_outputs['answer_pred'], 
                          feed_dict={model_outputs['question']:Q,
                                     model_outputs['mask']:mask, 
                                     model_outputs['answer']:A})
        equals = 1*np.equal(A.argmax(axis=1),a_pred)
        equals = list(equals[:end-begin])
        acc += equals
    acc = tf.reduce_mean(tf.to_float(acc))
    acc_s = tf.scalar_summary("acc_tf",acc,name="acc_tf")
    acc,acc_s = sess.run([acc,acc_s])
    writer.add_summary(acc_s,step)
    return acc
Ejemplo n.º 36
0
 n_batches = N_train // batch_size + 1
 for epoch in range(n_epochs):
     epoch_loss = []
     times = 0.
     indexes = np.arange(N_train)
     np.random.shuffle(indexes)
     q_train = q_train[indexes]
     a_train = a_train[indexes]
     for idx in range(n_batches):
         tic = time()
         if idx%(n_batches//10)==0:
             print("Epoch %d - %d/%d : loss = %1.4f - time = %1.3fs"%(epoch,idx,
                                                                      n_batches,np.mean(epoch_loss),
                                                                      times/((n_train//10)*batch_size)))
             times = 0.
         begin = idx*batch_size
         end = min((idx+1)*batch_size, N_train)
         Q, mask, A = get_batch(begin,end,q_train,a_train,batch_size,max_q,Na)
         _,l,l_s = sess.run([model_outputs['train_op'],
                             model_outputs['loss'],
                             model_outputs['loss_summary']], 
                            feed_dict={model_outputs['question']:Q,
                                       model_outputs['mask']:mask,
                                       model_outputs['answer']:A})
         epoch_loss.append(l)
         writer.add_summary(l_s,idx+epoch*n_batches)
         times += time() - tic
     with tf.device('/cpu:0'):
         test_acc = test((1+epoch)*n_batches)
         print("Epoch %d - Test accuracy = %1.3f" % (epoch+1, test_acc))
     saver.save(sess, join('/home/hbenyounes/vqa/saved_models/','model'), global_step=epoch)
Ejemplo n.º 37
0
def train():
    assert args.batch_size % args.small_batch_size == 0, 'batch_size must be divisible by small_batch_size'

    # Turn on training mode which enables dropout.
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = [model.init_hidden(args.small_batch_size) for _ in range(args.batch_size // args.small_batch_size)]
    batch, i = 0, 0
    while i < train_data.size(0) - 1 - 1:
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        # Prevent excessively small or negative sequence lengths
        seq_len = max(5, int(np.random.normal(bptt, 5)))
        # There's a very small chance that it could select a very long sequence length resulting in OOM
        seq_len = min(seq_len, args.bptt + args.max_seq_len_delta)

        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        model.train()
        data, targets = get_batch(train_data, i, args, seq_len=seq_len)

        optimizer.zero_grad()

        start, end, s_id = 0, args.small_batch_size, 0
        while start < args.batch_size:
            cur_data, cur_targets = data[:, start: end], targets[:, start: end].contiguous().view(-1)

            # Starting each batch, we detach the hidden state from how it was previously produced.
            # If we didn't, the model would try backpropagating all the way to start of the dataset.
            hidden[s_id] = repackage_hidden(hidden[s_id])

            log_prob, hidden[s_id], rnn_hs, dropped_rnn_hs = parallel_model(cur_data, hidden[s_id], return_h=True)
            raw_loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), cur_targets)

            loss = raw_loss
            # Activiation Regularization
            loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
            # Temporal Activation Regularization (slowness)
            loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
            loss *= args.small_batch_size / args.batch_size
            total_loss += raw_loss.data * args.small_batch_size / args.batch_size
            loss.backward()

            s_id += 1
            start = end
            end = start + args.small_batch_size

            gc.collect()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        optimizer.step()

        # total_loss += raw_loss.data
        optimizer.param_groups[0]['lr'] = lr2
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            logging('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len