def validate(self, dev_corpus):
        # Turn on evaluation mode which disables dropout.
        self.model.eval()

        dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size)
        print('number of dev batches = ', len(dev_batches))

        num_batches = len(dev_batches)
        n_correct, n_total = 0, 0
        for batch_no in range(1, num_batches + 1):
            dev_sentences1, sent_len1, dev_sentences2, sent_len2, dev_labels = helper.batch_to_tensors(
                dev_batches[batch_no - 1], self.dictionary, True)
            if self.config.cuda and torch.cuda.is_available():
                dev_sentences1 = dev_sentences1.cuda()
                dev_sentences2 = dev_sentences2.cuda()
                dev_labels = dev_labels.cuda()

            assert dev_sentences1.size(0) == dev_sentences2.size(0)

            score = self.model(dev_sentences1, sent_len1, dev_sentences2,
                               sent_len2)
            n_correct += (torch.max(score, 1)[1].view(
                dev_labels.size()).data == dev_labels.data).sum()
            n_total += len(dev_batches[batch_no - 1])

        return 100. * n_correct / n_total
    def train(self, train_corpus):
        # Turn on training mode which enables dropout.
        self.model.train()

        # Splitting the data in batches
        train_batches = helper.batchify(train_corpus.data,
                                        self.config.batch_size)
        print('number of train batches = ', len(train_batches))

        start = time.time()
        print_acc_total = 0
        plot_acc_total = 0

        num_batches = len(train_batches)
        for batch_no in range(1, num_batches + 1):
            # Clearing out all previous gradient computations.
            self.optimizer.zero_grad()
            train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors(
                train_batches[batch_no - 1], self.dictionary)
            if self.config.cuda and torch.cuda.is_available():
                train_sentences1 = train_sentences1.cuda()
                train_sentences2 = train_sentences2.cuda()
                train_labels = train_labels.cuda()

            assert train_sentences1.size(0) == train_sentences2.size(0)

            score = self.model(train_sentences1, sent_len1, train_sentences2,
                               sent_len2)
            n_correct = (torch.max(score, 1)[1].view(
                train_labels.size()).data == train_labels.data).sum()

            loss = self.criterion(score, train_labels)
            # Important if we are using nn.DataParallel()
            if loss.size(0) > 1:
                loss = loss.mean()
            loss.backward()

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
            clip_grad_norm(
                filter(lambda p: p.requires_grad, self.model.parameters()),
                self.config.max_norm)
            self.optimizer.step()

            print_acc_total += 100. * n_correct / len(
                train_batches[batch_no - 1])
            plot_acc_total += 100. * n_correct / len(
                train_batches[batch_no - 1])

            if batch_no % self.config.print_every == 0:
                print_acc_avg = print_acc_total / self.config.print_every
                print_acc_total = 0
                print('%s (%d %d%%) %.2f' %
                      (helper.show_progress(start, batch_no / num_batches),
                       batch_no, batch_no / num_batches * 100, print_acc_avg))

            if batch_no % self.config.plot_every == 0:
                plot_acc_avg = plot_acc_total / self.config.plot_every
                self.train_accuracies.append(plot_acc_avg)
                plot_acc_total = 0
def evaluate(model, batches, batch_label, dictionary, outfile=None):
    """Evaluate question classifier model on test data."""
    # Turn on evaluation mode which disables dropout.
    model.eval()

    n_correct, n_total = 0, 0
    y_preds = []
    y_true = []
    output = []
    for batch_no in range(len(batches)):
        if args.use_elmo:
            test_sentences1, sent_len1, test_sentences2, sent_len2, test_labels = helper.batch_to_elmo_input(
                batches[batch_no], dictionary, iseval=True)
        else:
            test_sentences1, sent_len1, test_sentences2, sent_len2, test_labels = helper.batch_to_tensors(
                batches[batch_no], dictionary, iseval=True)
        if args.cuda:
            test_sentences1 = test_sentences1.cuda()
            test_sentences2 = test_sentences2.cuda()
            test_labels = test_labels.cuda()
        assert test_sentences1.size(0) == test_sentences1.size(0)

        if model.config.adversarial:
            softmax_prob, adv_loss, diff_loss = model(test_sentences1, sent_len1, test_sentences2, sent_len2,
                                                      batch_label)
        else:
            softmax_prob = model(test_sentences1, sent_len1, test_sentences2, sent_len2, batch_label)

        preds = torch.max(softmax_prob, 1)[1]
        y_preds.extend(preds.data.cpu().tolist())
        if not outfile:
            y_true.extend(test_labels.data.cpu().tolist())
            n_correct += (preds.view(test_labels.size()).data == test_labels.data).sum()
            n_total += len(batches[batch_no])
        else:
            current_y_preds = preds.data.cpu().tolist()
            for i in range(len(batches[batch_no])):
                output.append([batches[batch_no][i].id, current_y_preds[i]])

    if batch_label == 'quora':
        target_names = ['non_duplicate', 'duplicate']
    elif batch_label == 'snli' or batch_label == 'multinli':
        target_names = ['entailment', 'neutral', 'contradiction']

    if outfile:
        with open(outfile, 'w') as f:
            f.write('pairID,gold_label' + '\n')
            for item in output:
                f.write(str(item[0]) + ',' + target_names[item[1]] + '\n')
    else:
        print(classification_report(numpy.asarray(y_true), numpy.asarray(y_preds), target_names=target_names))
        return 100. * n_correct / n_total
def evaluate(model, batches, dictionary):
	"""Evaluate question classifier model on test data."""
	model.eval() # Turn on evaluation mode which disables dropout.
	SCORE = 1
	for batch_no in range(len(batches)):
		test_sentences1, sent_len1, test_sentences2, sent_len2, test_labels = helper.batch_to_tensors(batches[batch_no],dictionary)
		if args.cuda:
			test_sentences1 = test_sentences1.cuda()
			test_sentences2 = test_sentences2.cuda()
			# test_labels = test_labels.cuda()
		##
		softmax_prob = model(test_sentences1, sent_len1, test_sentences2, sent_len2)
		SCORE = torch.nn.functional.softmax(softmax_prob).cpu().data.numpy()[:,0] ##numpy() #.data.numpy() # cpu().tolist()	## use exp(x) / exp(x) + exp(y)
	return np.array(SCORE)
def evaluate(model, batches, dictionary, outfile=None):
    # Turn on evaluation mode which disables dropout.
    model.eval()

    n_correct, n_total = 0, 0
    y_preds, y_true, output = [], [], []
    for batch_no in range(len(batches)):
        test_sentences1, sent_len1, test_sentences2, sent_len2, test_labels = helper.batch_to_tensors(
            batches[batch_no], dictionary, True)
        if args.cuda and torch.cuda.is_available():
            test_sentences1 = test_sentences1.cuda()
            test_sentences2 = test_sentences2.cuda()
            test_labels = test_labels.cuda()
        assert test_sentences1.size(0) == test_sentences1.size(0)

        score = model(test_sentences1, sent_len1, test_sentences2, sent_len2)
        preds = torch.max(score, 1)[1]
        if outfile:
            predictions = preds.data.cpu().tolist()
            for i in range(len(batches[batch_no])):
                output.append([batches[batch_no][i].id, predictions[i]])
        else:
            y_preds.extend(preds.data.cpu().tolist())
            y_true.extend(test_labels.data.cpu().tolist())
            p = preds.view(test_labels.size()).data
            n_correct += (preds.view(
                test_labels.size()).data == test_labels.data).sum()
            #n_same = (np.array(y_preds) == np.array(y_true)).sum()
            #            n_correct += n_same
            n_total += len(batches[batch_no])

        if model.config.log_test:
            sent_list = [inst.sentence1_str for inst in batches[batch_no]]
            with open('test_log.txt', 'a') as f:
                for gnd_truth, pred, sent in zip(y_true, y_preds, sent_list):
                    f.write(
                        str(gnd_truth) + '\t' + str(pred) + '\t' + sent + '\n')

    clf_report = classification_report(np.array(y_true), np.array(y_preds))
    if outfile:
        target_names = ['entailment', 'neutral', 'contradiction']
        with open(outfile, 'w') as f:
            f.write('pairID,gold_label' + '\n')
            for item in output:
                f.write(str(item[0]) + ',' + target_names[item[1]] + '\n')
    else:
        return 100. * n_correct / n_total, 100. * f1_score(
            np.asarray(y_true), np.asarray(y_preds),
            average='weighted'), clf_report
Exemple #6
0
    def validate(self):
        # Turn on evaluation mode which disables dropout.
        self.generator.eval()

        # Splitting the data in batches
        batches, batch_labels = [], []
        for task_name, task in self.dev_corpus.items():
            dev_batches = helper.batchify(task.data, self.config.batch_size)
            batches.extend(dev_batches)
            batch_labels.extend([task_name] * len(dev_batches))

        combined = list(zip(batches, batch_labels))
        numpy.random.shuffle(combined)
        batches[:], batch_labels[:] = zip(*combined)
        print('number of dev batches = ', len(batches))

        num_batches = len(batches)
        n_correct, n_total = 0, 0
        for batch_no in range(1, num_batches + 1):
            if self.config.use_elmo:
                dev_sentences1, sent_len1, dev_sentences2, sent_len2, dev_labels = helper.batch_to_elmo_input(
                    batches[batch_no - 1], self.dictionary, iseval=True)
            else:
                dev_sentences1, sent_len1, dev_sentences2, sent_len2, dev_labels = helper.batch_to_tensors(
                    batches[batch_no - 1], self.dictionary, iseval=True)

            if self.config.cuda:
                dev_sentences1 = dev_sentences1.cuda()
                dev_sentences2 = dev_sentences2.cuda()
                dev_labels = dev_labels.cuda()

            assert dev_sentences1.size(0) == dev_sentences2.size(0)

            if self.config.adversarial:
                scores, adv_loss, diff_loss = self.generator(
                    dev_sentences1, sent_len1, dev_sentences2, sent_len2,
                    batch_labels[batch_no - 1])
            else:
                scores = self.generator(dev_sentences1, sent_len1,
                                        dev_sentences2, sent_len2,
                                        batch_labels[batch_no - 1])

            n_correct += (torch.max(scores, 1)[1].view(
                dev_labels.size()).data == dev_labels.data).sum()
            n_total += len(batches[batch_no - 1])

        return 100. * n_correct / n_total
def evaluate(model, batches, batch_label, dictionary):
    """Evaluate question classifier model on test data."""
    # Turn on evaluation mode which disables dropout.
    model.eval()

    n_correct, n_total = 0, 0
    y_preds = []
    y_true = []
    for batch_no in range(len(batches)):
        test_sentences1, sent_len1, test_sentences2, sent_len2, test_labels = helper.batch_to_tensors(
            batches[batch_no], dictionary)
        if args.cuda:
            test_sentences1 = test_sentences1.cuda()
            test_sentences2 = test_sentences2.cuda()
            test_labels = test_labels.cuda()
        assert test_sentences1.size(0) == test_sentences1.size(0)

        if model.config.adversarial:
            softmax_prob, adv_loss, diff_loss = model(test_sentences1,
                                                      sent_len1,
                                                      test_sentences2,
                                                      sent_len2, batch_label)
        else:
            softmax_prob = model(test_sentences1, sent_len1, test_sentences2,
                                 sent_len2, batch_label)

        preds = torch.max(softmax_prob, 1)[1]
        y_preds.extend(preds.data.cpu().tolist())
        y_true.extend(test_labels.data.cpu().tolist())
        n_correct += (preds.view(
            test_labels.size()).data == test_labels.data).sum()
        n_total += len(batches[batch_no])

    if batch_label == 'quora':
        target_names = ['non_duplicate', 'duplicate']
    elif batch_label == 'snli' or batch_label == 'multinli':
        target_names = ['entailment', 'neutral', 'contradiction']
    print(
        classification_report(numpy.asarray(y_true),
                              numpy.asarray(y_preds),
                              target_names=target_names))

    return 100. * n_correct / n_total
Exemple #8
0
def evaluate(model, batches, dictionary, outfile=None):
    # Turn on evaluation mode which disables dropout.
    model.eval()

    n_correct, n_total = 0, 0
    y_preds, y_true, output = [], [], []
    for batch_no in range(len(batches)):
        test_sentences1, sent_len1, test_sentences2, sent_len2, test_labels = helper.batch_to_tensors(batches[batch_no],
                                                                                                      dictionary)
        if args.cuda:
            test_sentences1 = test_sentences1.cuda()
            test_sentences2 = test_sentences2.cuda()
            test_labels = test_labels.cuda()
        assert test_sentences1.size(0) == test_sentences1.size(0)

        score = model(test_sentences1, sent_len1, test_sentences2, sent_len2)
        preds = torch.max(score, 1)[1]
        if outfile:
            predictions = preds.data.cpu().tolist()
            for i in range(len(batches[batch_no])):
                output.append([batches[batch_no][i].id, predictions[i]])
        else:
            y_preds.extend(preds.data.cpu().tolist())
            y_true.extend(test_labels.data.cpu().tolist())
            n_correct += (preds.view(test_labels.size()).data == test_labels.data).sum()
            n_total += len(batches[batch_no])

    if outfile:
        target_names = ['entailment', 'neutral', 'contradiction']
        with open(outfile, 'w') as f:
            f.write('pairID,gold_label' + '\n')
            for item in output:
                f.write(str(item[0]) + ',' + target_names[item[1]] + '\n')
    else:
        return 100. * n_correct / n_total, 100. * f1_score(numpy.asarray(y_true), numpy.asarray(y_preds),
                                                           average='weighted')
Exemple #9
0
    def validate(self, dev_corpus):
        # Turn on evaluation mode which disables dropout.
        self.model.eval()

        print_every = self.config.print_every
        start = time.time()

        dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size)
        print('number of dev batches = ', len(dev_batches))

        num_batches = len(dev_batches)
        n_correct, n_total = 0, 0
        for batch_no in range(1, num_batches + 1):
            dev_sentences1, sent_len1, dev_sentences2, sent_len2, dev_labels = helper.batch_to_tensors(
                dev_batches[batch_no - 1], self.dictionary, True)
            if self.config.cuda:
                dev_sentences1 = dev_sentences1.cuda()
                dev_sentences2 = dev_sentences2.cuda()
                dev_labels = dev_labels.cuda()

            assert dev_sentences1.size(0) == dev_sentences2.size(0)

            score = self.model(dev_sentences1, sent_len1, dev_sentences2,
                               sent_len2)
            n_correct += (torch.max(score, 1)[1].view(
                dev_labels.size()).data == dev_labels.data).sum()
            n_total += len(dev_batches[batch_no - 1])

            print_acc = 100. * n_correct / n_total
            if batch_no % print_every == 0 or self.config.debug:
                p = 100.0
                print('%s (%d %d%%) (%.2f) %.2f' %
                      (helper.show_progress(start, batch_no / num_batches),
                       batch_no, batch_no / num_batches * 100, p, print_acc))

        return 100. * n_correct / n_total
Exemple #10
0
    def train(self, train_corpus):
        # Turn on training mode which enables dropout.
        self.model.train()

        # Splitting the data in batches
        train_batches = helper.batchify(train_corpus.data, self.config.batch_size)
        print('number of train batches = ', len(train_batches))

        start = time.time()
        print_acc_total = 0
        plot_acc_total = 0

        num_batches = len(train_batches)
        for batch_no in range(1, num_batches + 1):
            # Clearing out all previous gradient computations.
            self.optimizer.zero_grad()
            train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors(
                train_batches[batch_no - 1], self.dictionary)
            if self.config.cuda:
                train_sentences1 = train_sentences1.cuda()
                train_sentences2 = train_sentences2.cuda()
                train_labels = train_labels.cuda()

            assert train_sentences1.size(0) == train_sentences2.size(0)

            score = self.model(train_sentences1, sent_len1, train_sentences2, sent_len2)
            n_correct = (torch.max(score, 1)[1].view(train_labels.size()).data == train_labels.data).sum()
            loss = self.criterion(score, train_labels)
            # Important if we are using nn.DataParallel()
            if loss.size(0) > 1:
                loss = loss.mean()
            loss.backward()

            # gradient clipping (off by default)
            shrink_factor = 1
            total_norm = 0

            for p in self.model.parameters():
                if p.requires_grad:
                    p.grad.data.div_(train_sentences1.size(0))  # divide by the actual batch size
                    total_norm += p.grad.data.norm() ** 2
            total_norm = numpy.sqrt(total_norm)

            if total_norm > self.config.clip:
                shrink_factor = self.config.clip / total_norm
            current_lr = self.optimizer.param_groups[0]['lr']  # current lr (no external "lr", for adam)
            self.optimizer.param_groups[0]['lr'] = current_lr * shrink_factor  # just for update

            self.optimizer.step()
            self.optimizer.param_groups[0]['lr'] = current_lr

            print_acc_total += 100. * n_correct / len(train_batches[batch_no - 1])
            plot_acc_total += 100. * n_correct / len(train_batches[batch_no - 1])

            if batch_no % self.config.print_every == 0:
                print_acc_avg = print_acc_total / self.config.print_every
                print_acc_total = 0
                print('%s (%d %d%%) %.2f' % (
                    helper.show_progress(start, batch_no / num_batches), batch_no,
                    batch_no / num_batches * 100, print_acc_avg))

            if batch_no % self.config.plot_every == 0:
                plot_acc_avg = plot_acc_total / self.config.plot_every
                self.train_accuracies.append(plot_acc_avg)
                plot_acc_total = 0
def evaluate(model, batches, dictionary, outfile=None, selection_time=0.9318): #selection_time=0.9318 for IMDB by budget model
    # Turn on evaluation mode which disables dropout.
    model.eval()

    n_correct, n_total = 0, 0
    y_preds, y_true, output = [], [], []
    start = time.time()
    num_batches = len(batches)

    num_tokens_padded = 0
    selection_time = 0
    selected_tokens = 0

    for batch_no in range(len(batches)):
        test_sentences1, sent_len1, test_sentences2, sent_len2, test_labels = helper.batch_to_tensors(batches[batch_no],
                                                                                                      dictionary, True)
        if args.cuda:
            test_sentences1 = test_sentences1.cuda()
            test_sentences2 = test_sentences2.cuda()
            test_labels = test_labels.cuda()
        assert test_sentences1.size(0) == test_sentences1.size(0)

        selected_tokens+= sum(sent_len1)+sum(sent_len2)
        num_tokens_padded += 2*(force_min_sen_len*args.eval_batch_size)

        score = model(test_sentences1, sent_len1, test_sentences2, sent_len2)
        preds = torch.max(score, 1)[1]
        if outfile:
            predictions = preds.data.cpu().tolist()
            for i in range(len(batches[batch_no])):
                output.append([batches[batch_no][i].id, predictions[i]])
        else:
            y_preds.extend(preds.data.cpu().tolist())
            y_true.extend(test_labels.data.cpu().tolist())
            n_correct += (preds.view(test_labels.size()).data == test_labels.data).sum()
            n_total += len(batches[batch_no])

        if (batch_no+1) % args.print_every == 0:
            padded_p = 100.0 * selected_tokens/num_tokens_padded
            print_acc_avg = 100. * n_correct / n_total
            print('%s (%d %d%%) (padded %.2f) %.2f' % (
                helper.show_progress(start, (batch_no+1) / num_batches), (batch_no+1),
                (batch_no+1) / num_batches * 100, padded_p, print_acc_avg))


    now = time.time()
    s = now - start

    estimated_full_text_padded_time = (s ) * num_tokens_padded / selected_tokens
    s+=selection_time 

    print('estimated full text time padded = %s'% (helper.convert_to_minutes(estimated_full_text_padded_time)))

    padded_p = 100.0 * selected_tokens/num_tokens_padded
    padded_speed_up = 1.0*estimated_full_text_padded_time/s
    

    print_acc_avg = 100. * n_correct / n_total
    print('total: %s (%d %d%%)(padded %.2f) %.2f' % (
        helper.show_progress(start, (batch_no+1) / num_batches), (batch_no+1),
        (batch_no+1) / num_batches * 100, padded_p, print_acc_avg))
    print('estimated padded speed up =  %0.2f, selection text percentage spped up padded = %0.2f' % (padded_speed_up,  100.0/padded_p ))



    if outfile:
        target_names = ['entailment', 'neutral', 'contradiction']
        with open(outfile, 'w') as f:
            f.write('pairID,gold_label' + '\n')
            for item in output:
                f.write(str(item[0]) + ',' + target_names[item[1]] + '\n')
    else:
        return 100. * n_correct / n_total, 100. * f1_score(numpy.asarray(y_true), numpy.asarray(y_preds),
                                                           average='weighted'), s
    def train(self, train_corpus, epoch):
        # Turn on training mode which enables dropout.
        self.model.train()

        # Splitting the data in batches
        shuffle = True
        # if self.config.task == 'sst': shuffle = False
        print(shuffle)

        train_batches = helper.batchify(train_corpus.data,
                                        self.config.batch_size, shuffle)
        print('number of train batches = ', len(train_batches))

        start = time.time()
        print_acc_total = 0
        plot_acc_total = 0

        num_batches = len(train_batches)
        for batch_no in range(1, num_batches + 1):
            # Clearing out all previous gradient computations.
            self.optimizer.zero_grad()
            train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors(
                train_batches[batch_no - 1], self.dictionary)
            if self.config.cuda:
                train_sentences1 = train_sentences1.cuda()
                train_sentences2 = train_sentences2.cuda()
                train_labels = train_labels.cuda()

            assert train_sentences1.size(0) == train_sentences2.size(0)

            score = self.model(train_sentences1, sent_len1, train_sentences2,
                               sent_len2)
            n_correct = (torch.max(score, 1)[1].view(
                train_labels.size()).data == train_labels.data).sum()
            # print (' score size ', score.size(), train_labels.size())
            loss = self.criterion(score, train_labels)

            ############################ custom new_loss ############################

            # z2 = z_pred.dimshuffle((0,1,"x"))
            # logpz = - T.nnet.binary_crossentropy(probs, z2) * masks
            # logpz = self.logpz = logpz.reshape(x.shape)
            # probs = self.probs = probs.reshape(x.shape)

            # # batch
            # z = z_pred
            # self.zsum = T.sum(z, axis=0, dtype=theano.config.floatX)
            # self.zdiff = T.sum(T.abs_(z[1:]-z[:-1]), axis=0, dtype=theano.config.floatX)

            # zsum = generator.zsum
            # zdiff = generator.zdiff
            # logpz = generator.logpz

            # coherent_factor = args.sparsity * args.coherent
            # loss = self.loss = T.mean(loss_vec) #this is not needed as in cost_vec loss_vec is used
            # sparsity_cost = self.sparsity_cost = T.mean(zsum) * args.sparsity + \
            #                                      T.mean(zdiff) * coherent_factor
            # cost_vec = loss_vec + zsum * args.sparsity + zdiff * coherent_factor
            # cost_logpz = T.mean(cost_vec * T.sum(logpz, axis=0))
            # self.obj = T.mean(cost_vec)

            ############################ custom new_loss ############################

            if loss.size(0) > 1:
                loss = loss.mean()
            # print ('loss:', loss)
            loss.backward()

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
            grad_norm = clip_grad_norm(
                filter(lambda p: p.requires_grad, self.model.parameters()),
                self.config.max_norm)
            # if epoch==11:
            # print(batch_no, grad_norm)
            self.optimizer.step()

            print_acc_total += 100. * n_correct / len(
                train_batches[batch_no - 1])
            plot_acc_total += 100. * n_correct / len(
                train_batches[batch_no - 1])

            if batch_no % self.config.print_every == 0:
                print_acc_avg = print_acc_total / self.config.print_every
                print_acc_total = 0
                print('%s (%d %d%%) %.2f' %
                      (helper.show_progress(start, batch_no / num_batches),
                       batch_no, batch_no / num_batches * 100, print_acc_avg))

            if batch_no % self.config.plot_every == 0:
                plot_acc_avg = plot_acc_total / self.config.plot_every
                self.train_accuracies.append(plot_acc_avg)
                plot_acc_total = 0
Exemple #13
0
    def train(self):
        # Turn on training mode which enables dropout.
        self.generator.train()

        # Splitting the data in batches
        batches, batch_labels = [], []
        for task_name, task in self.train_corpus.items():
            train_batches = helper.batchify(task.data, self.config.batch_size)
            batches.extend(train_batches)
            batch_labels.extend([task_name] * len(train_batches))

        combined = list(zip(batches, batch_labels))
        numpy.random.shuffle(combined)
        batches[:], batch_labels[:] = zip(*combined)
        print('number of train batches = ', len(batches))

        start = time.time()
        num_back, print_acc_total, plot_acc_total = 0, 0, 0

        num_batches = len(batches)
        for batch_no in range(1, num_batches + 1):
            if self.config.use_elmo:
                train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_elmo_input(
                    batches[batch_no - 1], self.dictionary)
            else:
                train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors(
                    batches[batch_no - 1], self.dictionary)

            if self.config.cuda:
                train_sentences1 = train_sentences1.cuda()
                train_sentences2 = train_sentences2.cuda()
                train_labels = train_labels.cuda()

            assert train_sentences1.size(0) == train_sentences2.size(0)

            if self.config.adversarial:
                self.optimizerD.zero_grad()
                scores, diff_loss, shared_rep = self.generator(
                    train_sentences1, sent_len1, train_sentences2, sent_len2,
                    batch_labels[batch_no - 1])
                n_correct = (torch.max(scores, 1)[1].view(
                    train_labels.size()).data == train_labels.data).sum()
                shared_sent_rep1 = shared_rep[0]
                shared_sent_rep2 = shared_rep[1]
                # runt the discriminator to distinguish tasks
                task_prob1 = self.discriminator(
                    shared_sent_rep1.detach())  # B X num_tasks
                task_prob2 = self.discriminator(
                    shared_sent_rep2.detach())  # B X num_tasks
                comb_prob = torch.cat((task_prob1, task_prob2),
                                      0)  # 2B X num_tasks
                task_prob = torch.sum(comb_prob,
                                      0).squeeze()  # size = |num_tasks|
                adv_loss = -1 * task_prob[self.task_ids[batch_labels[batch_no -
                                                                     1]]]
                adv_loss.backward()
                # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
                clip_grad_norm(
                    filter(lambda p: p.requires_grad,
                           self.discriminator.parameters()),
                    self.config.max_norm)
                self.optimizerD.step()

                self.optimizerG.zero_grad()
                cross_entropy_loss = self.criterion(scores, train_labels)
                # runt the discriminator to distinguish tasks
                task_prob1 = self.discriminator(
                    shared_sent_rep1)  # B X num_tasks
                task_prob2 = self.discriminator(
                    shared_sent_rep2)  # B X num_tasks
                comb_prob = torch.cat((task_prob1, task_prob2),
                                      0)  # 2B X num_tasks
                task_prob = torch.sum(comb_prob,
                                      0).squeeze()  # size = |num_tasks|
                adv_loss = -1 * task_prob[self.task_ids[batch_labels[batch_no -
                                                                     1]]]
                total_loss = cross_entropy_loss + self.config.beta * adv_loss + self.config.gamma * diff_loss
                # Important if we are using nn.DataParallel()
                if total_loss.size(0) > 1:
                    total_loss = total_loss.mean()
                total_loss.backward()
                # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
                clip_grad_norm(
                    filter(lambda p: p.requires_grad,
                           self.generator.parameters()), self.config.max_norm)
                self.optimizerG.step()
            else:
                self.optimizerG.zero_grad()
                scores = self.generator(train_sentences1, sent_len1,
                                        train_sentences2, sent_len2,
                                        batch_labels[batch_no - 1])
                n_correct = (torch.max(scores, 1)[1].view(
                    train_labels.size()).data == train_labels.data).sum()
                loss = self.criterion(scores, train_labels)
                # Important if we are using nn.DataParallel()
                if loss.size(0) > 1:
                    loss = loss.mean()
                loss.backward()
                # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
                clip_grad_norm(
                    filter(lambda p: p.requires_grad,
                           self.generator.parameters()), self.config.max_norm)
                self.optimizerG.step()

            print_acc_total += 100. * n_correct / len(batches[batch_no - 1])
            plot_acc_total += 100. * n_correct / len(batches[batch_no - 1])

            if batch_no % self.config.print_every == 0:
                sys.stdout.write("\b" * num_back)
                sys.stdout.write(" " * num_back)
                sys.stdout.write("\b" * num_back)
                log_info = '%s (%d %d%%) %.2f%%' % (helper.show_progress(
                    start, batch_no /
                    num_batches), batch_no, batch_no / num_batches * 100,
                                                    print_acc_total / batch_no)
                sys.stdout.write(log_info)
                sys.stdout.flush()
                num_back = len(log_info)

            if batch_no % self.config.plot_every == 0:
                plot_acc_avg = plot_acc_total / self.config.plot_every
                self.train_accuracies.append(plot_acc_avg)
                plot_acc_total = 0

            # this releases all cache memory and becomes visible to other applications
            torch.cuda.empty_cache()
Exemple #14
0
    def train(self, train_corpus, epoch):
        # Turn on training mode which enables dropout.
        self.model.train()

        # Splitting the data in batches
        shuffle = True
        # if self.config.task == 'sst': shuffle = False
        print(shuffle)

        train_batches = helper.batchify(train_corpus.data,
                                        self.config.batch_size, shuffle)
        print('number of train batches = ', len(train_batches))

        start = time.time()
        print_acc_total = 0
        plot_acc_total = 0

        num_batches = len(train_batches)
        for batch_no in range(1, num_batches + 1):
            # Clearing out all previous gradient computations.
            self.optimizer.zero_grad()
            train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors(
                train_batches[batch_no - 1], self.dictionary)
            if self.config.cuda:
                train_sentences1 = train_sentences1.cuda()
                train_sentences2 = train_sentences2.cuda()
                train_labels = train_labels.cuda()

            assert train_sentences1.size(0) == train_sentences2.size(0)

            # print(' train label size: ', train_labels.size(), ' train data size: ', train_sentences1.size())
            # print(' labels: ', train_labels)
            score = self.model(train_sentences1)
            n_correct = (torch.max(score, 1)[1].view(
                train_labels.size()).data == train_labels.data).sum()
            # print (' score size ', score.size(), train_labels.size())
            loss = self.criterion(score, train_labels)

            if loss.size(0) > 1:
                loss = loss.mean()
            # print ('loss:', loss)
            loss.backward()

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
            grad_norm = clip_grad_norm(
                filter(lambda p: p.requires_grad, self.model.parameters()),
                self.config.max_norm)
            # if epoch==11:
            # print(batch_no, grad_norm)
            self.optimizer.step()

            print_acc_total += 100. * n_correct / len(
                train_batches[batch_no - 1])
            plot_acc_total += 100. * n_correct / len(
                train_batches[batch_no - 1])

            if batch_no % self.config.print_every == 0:
                print_acc_avg = print_acc_total / self.config.print_every
                print_acc_total = 0
                print('%s (%d %d%%) %.2f' %
                      (helper.show_progress(start, batch_no / num_batches),
                       batch_no, batch_no / num_batches * 100, print_acc_avg))

            if batch_no % self.config.plot_every == 0:
                plot_acc_avg = plot_acc_total / self.config.plot_every
                self.train_accuracies.append(plot_acc_avg)
                plot_acc_total = 0
Exemple #15
0
#### testing
file_path = args.output_base_path + args.task + '/' + args.model_file_name
print('loading selector from: ', file_path)
helper.load_model(selector, file_path, 'state_dict', args.cuda)

selector.eval()

dev_batches = helper.batchify(test_corpus.data, args.batch_size)
print('number of dev batches = ', len(dev_batches))

num_batches = len(dev_batches)
n_correct, n_total = 0, 0
with open('../bcn_output/sst/predicted_text_words_dummy.txt', 'w') as wf:
    for batch_no in range(1, num_batches + 1):
        dev_sentences1, sent_len1, dev_sentences2, sent_len2, dev_labels = helper.batch_to_tensors(
            dev_batches[batch_no - 1], dictionary, True)
        if args.cuda:
            dev_sentences1 = dev_sentences1.cuda()
            dev_sentences2 = dev_sentences2.cuda()
            dev_labels = dev_labels.cuda()

        assert dev_sentences1.size(0) == dev_sentences2.size(0)

        score = selector(dev_sentences1)
        n_correct += (torch.max(score, 1)[1].view(
            dev_labels.size()).data == dev_labels.data).sum()
        n_total += len(dev_batches[batch_no - 1])

        for (sent1, sel,
             tl) in zip(dev_sentences1,
                        torch.max(score, 1)[1].view(dev_labels.size()).data,
    def train(self):
        # Turn on training mode which enables dropout.
        self.model.train()

        # Splitting the data in batches
        batches, batch_labels = [], []
        for task_name, task in self.train_corpus.items():
            train_batches = helper.batchify(task.data, self.config.batch_size)
            batches.extend(train_batches)
            batch_labels.extend([task_name] * len(train_batches))

        combined = list(zip(batches, batch_labels))
        numpy.random.shuffle(combined)
        batches[:], batch_labels[:] = zip(*combined)
        print('number of train batches = ', len(batches))

        start = time.time()
        print_acc_total = 0
        plot_acc_total = 0
        num_back = 0

        num_batches = len(batches)
        for batch_no in range(1, num_batches + 1):
            # Clearing out all previous gradient computations.
            self.optimizer.zero_grad()
            if self.config.use_elmo:
                train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_elmo_tensors(
                    batches[batch_no - 1], self.dictionary)
            else:
                train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors(
                    batches[batch_no - 1], self.dictionary)

            if self.config.cuda:
                train_sentences1 = train_sentences1.cuda()
                train_sentences2 = train_sentences2.cuda()
                train_labels = train_labels.cuda()
            assert train_sentences1.size(0) == train_sentences2.size(0)

            score = self.model(train_sentences1, sent_len1, train_sentences2, sent_len2, batch_labels[batch_no - 1])
            n_correct = (torch.max(score, 1)[1].view(train_labels.size()).data == train_labels.data).sum()
            loss = self.criterion(score, train_labels)

            loss.backward()
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
            clip_grad_norm(filter(lambda p: p.requires_grad, self.model.parameters()), self.config.max_norm)
            self.optimizer.step()

            print_acc_total += 100. * n_correct / len(batches[batch_no - 1])
            plot_acc_total += 100. * n_correct / len(batches[batch_no - 1])

            if batch_no % self.config.print_every == 0:
                sys.stdout.write("\b" * num_back)
                sys.stdout.write(" " * num_back)
                sys.stdout.write("\b" * num_back)
                log_info = '%s (%d %d%%) %.2f' % (helper.show_progress(start, batch_no / num_batches), batch_no,
                                                  batch_no / num_batches * 100, print_acc_total / batch_no)
                sys.stdout.write(log_info)
                sys.stdout.flush()
                num_back = len(log_info)