Beispiel #1
0
    def validate(self, dev_corpus):
        # Turn on evaluation mode which disables dropout.
        self.model.eval()

        dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size)
        print('number of dev batches = ', len(dev_batches))

        dev_loss = 0
        num_batches = len(dev_batches)
        for batch_no in range(1, num_batches + 1):
            queries, docs, click_labels = helper.batch_to_tensor(dev_batches[batch_no - 1], self.dictionary,
                                                                 self.config)
            if self.config.cuda:
                # batch_size x max_query_length
                queries = queries.cuda()
                # batch_size x num_clicks_per_query x max_document_length
                docs = docs.cuda()
                # batch_size x num_clicks_per_query
                click_labels = click_labels.cuda()

            score = self.model(queries, docs)
            loss = self.compute_loss(score, click_labels)
            dev_loss += loss.data[0]

        return dev_loss / num_batches
    def validate(self, dev_corpus):
        # Turn on evaluation mode which disables dropout.
        self.model.eval()

        dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size)
        print('number of dev batches = ', len(dev_batches))

        dev_loss = 0
        num_batches = len(dev_batches)
        for batch_no in range(1, num_batches + 1):
            videos, video_len, descriptions, des_len = helper.videos_to_tensor(
                dev_batches[batch_no - 1], self.dictionary)
            if self.config.cuda:
                videos = videos.cuda(
                )  # batch_size x max_images_per_video x num_image_features
                descriptions = descriptions.cuda(
                )  # batch_size x max_description_length
                des_len = des_len.cuda()  # batch_size

            loss = self.model(videos, video_len, descriptions, des_len)
            if loss.size(0) > 1:
                loss = loss.mean()
            dev_loss += loss.data[0]

        return dev_loss / num_batches
Beispiel #3
0
    def validate(self, dev_corpus):
        # Turn on evaluation mode which disables dropout.
        self.model.eval()

        dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size)
        print('number of dev batches = ', len(dev_batches))

        num_batches = len(dev_batches)
        n_correct, n_total = 0, 0
        for batch_no in range(1, num_batches + 1):
            dev_sentences1, sent_len1, dev_sentences2, sent_len2, dev_labels, pos_sentences1, pos_sentences2 = helper.batch_to_tensors(
                dev_batches[batch_no - 1],
                self.dictionary,
                iseval=True,
                pos=self.config.pos)
            if self.config.cuda and torch.cuda.is_available():
                dev_sentences1 = dev_sentences1.cuda()
                dev_sentences2 = dev_sentences2.cuda()
                pos_sentences1 = pos_sentences1.cuda()
                pos_sentences2 = pos_sentences2.cuda()
                dev_labels = dev_labels.cuda()

            assert dev_sentences1.size(0) == dev_sentences2.size(0)

            if self.config.pos:
                score = self.model(dev_sentences1, sent_len1, dev_sentences2,
                                   sent_len2, pos_sentences1, pos_sentences2)
            else:
                score = self.model(dev_sentences1, sent_len1, dev_sentences2,
                                   sent_len2)
            n_correct += (torch.max(score, 1)[1].view(
                dev_labels.size()).data == dev_labels.data).sum()
            n_total += len(dev_batches[batch_no - 1])

        return 100. * n_correct / n_total
    def train(self, train_corpus):
        # Turn on training mode which enables dropout.
        self.model.train()

        # Splitting the data in batches
        train_batches = helper.batchify(train_corpus.data,
                                        self.config.batch_size)
        print('number of train batches = ', len(train_batches))

        start = time.time()
        print_acc_total = 0
        plot_acc_total = 0

        num_batches = len(train_batches)
        for batch_no in range(1, num_batches + 1):
            # Clearing out all previous gradient computations.
            self.optimizer.zero_grad()
            train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors(
                train_batches[batch_no - 1], self.dictionary)
            if self.config.cuda and torch.cuda.is_available():
                train_sentences1 = train_sentences1.cuda()
                train_sentences2 = train_sentences2.cuda()
                train_labels = train_labels.cuda()

            assert train_sentences1.size(0) == train_sentences2.size(0)

            score = self.model(train_sentences1, sent_len1, train_sentences2,
                               sent_len2)
            n_correct = (torch.max(score, 1)[1].view(
                train_labels.size()).data == train_labels.data).sum()

            loss = self.criterion(score, train_labels)
            # Important if we are using nn.DataParallel()
            if loss.size(0) > 1:
                loss = loss.mean()
            loss.backward()

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
            clip_grad_norm(
                filter(lambda p: p.requires_grad, self.model.parameters()),
                self.config.max_norm)
            self.optimizer.step()

            print_acc_total += 100. * n_correct / len(
                train_batches[batch_no - 1])
            plot_acc_total += 100. * n_correct / len(
                train_batches[batch_no - 1])

            if batch_no % self.config.print_every == 0:
                print_acc_avg = print_acc_total / self.config.print_every
                print_acc_total = 0
                print('%s (%d %d%%) %.2f' %
                      (helper.show_progress(start, batch_no / num_batches),
                       batch_no, batch_no / num_batches * 100, print_acc_avg))

            if batch_no % self.config.plot_every == 0:
                plot_acc_avg = plot_acc_total / self.config.plot_every
                self.train_accuracies.append(plot_acc_avg)
                plot_acc_total = 0
    def train(self):
        # Turn on training mode which enables dropout.
        self.model.train()

        # Splitting the data in batches
        batches, batch_labels = [], []
        for task_name, task in self.train_corpus.items():
            train_batches = helper.batchify(task.data, self.config.batch_size)
            batches.extend(train_batches)
            batch_labels.extend([task_name] * len(train_batches))

        combined = list(zip(batches, batch_labels))
        numpy.random.shuffle(combined)
        batches[:], batch_labels[:] = zip(*combined)
        print('number of train batches = ', len(batches))

        start = time.time()
        print_acc_total = 0
        plot_acc_total = 0
        num_back = 0

        num_batches = len(batches)
        for batch_no in range(1, num_batches + 1):
            # Clearing out all previous gradient computations.
            self.optimizer.zero_grad()
            if self.config.use_elmo:
                train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_elmo_tensors(
                    batches[batch_no - 1], self.dictionary)
            else:
                train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors(
                    batches[batch_no - 1], self.dictionary)

            if self.config.cuda:
                train_sentences1 = train_sentences1.cuda()
                train_sentences2 = train_sentences2.cuda()
                train_labels = train_labels.cuda()
            assert train_sentences1.size(0) == train_sentences2.size(0)

            score = self.model(train_sentences1, sent_len1, train_sentences2, sent_len2, batch_labels[batch_no - 1])
            n_correct = (torch.max(score, 1)[1].view(train_labels.size()).data == train_labels.data).sum()
            loss = self.criterion(score, train_labels)

            loss.backward()
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
            clip_grad_norm(filter(lambda p: p.requires_grad, self.model.parameters()), self.config.max_norm)
            self.optimizer.step()

            print_acc_total += 100. * n_correct / len(batches[batch_no - 1])
            plot_acc_total += 100. * n_correct / len(batches[batch_no - 1])

            if batch_no % self.config.print_every == 0:
                sys.stdout.write("\b" * num_back)
                sys.stdout.write(" " * num_back)
                sys.stdout.write("\b" * num_back)
                log_info = '%s (%d %d%%) %.2f' % (helper.show_progress(start, batch_no / num_batches), batch_no,
                                                  batch_no / num_batches * 100, print_acc_total / batch_no)
                sys.stdout.write(log_info)
                sys.stdout.flush()
                num_back = len(log_info)
Beispiel #6
0
    def train(self, train_corpus):
        # Turn on training mode which enables dropout.
        self.model.train()

        # splitting the data in batches
        train_batches = helper.batchify(train_corpus.data,
                                        self.config.batch_size)
        print('number of train batches = ', len(train_batches))

        start = time.time()
        print_loss_total = 0
        plot_loss_total = 0

        num_batches = len(train_batches)
        for batch_no in range(1, num_batches + 1):
            # Clearing out all previous gradient computations.
            self.optimizer.zero_grad()
            session_queries, session_query_length, rel_docs, rel_docs_length, doc_labels = helper.session_to_tensor(
                train_batches[batch_no - 1], self.dictionary)
            if self.config.cuda:
                # batch_size x session_length x max_query_length
                session_queries = session_queries.cuda()
                # batch_size x session_length
                session_query_length = session_query_length.cuda()
                # batch_size x session_length x num_rel_docs_per_query x max_doc_length
                rel_docs = rel_docs.cuda()
                # batch_size x session_length x num_rel_docs_per_query
                rel_docs_length = rel_docs_length.cuda()
                # batch_size x session_length x num_rel_docs_per_query
                doc_labels = doc_labels.cuda()

            loss = self.model(session_queries, session_query_length, rel_docs,
                              rel_docs_length, doc_labels)
            loss.backward()

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
            clip_grad_norm(
                filter(lambda p: p.requires_grad, self.model.parameters()),
                self.config.max_norm)
            self.optimizer.step()

            print_loss_total += loss.data[0]
            plot_loss_total += loss.data[0]

            if batch_no % self.config.print_every == 0:
                print_loss_avg = print_loss_total / self.config.print_every
                print_loss_total = 0
                print('%s (%d %d%%) %.4f' %
                      (helper.show_progress(start, batch_no / num_batches),
                       batch_no, batch_no / num_batches * 100, print_loss_avg))

            if batch_no % self.config.plot_every == 0:
                plot_loss_avg = plot_loss_total / self.config.plot_every
                self.train_losses.append(plot_loss_avg)
                plot_loss_total = 0
def getScoreSet1Toward2 (set1,set2,model,dictionary,goAnnotation): 
	def1,def2 = getDefinitions(set1,set2,goAnnotation,isJoin=1)
	''' reverse the sentence ordering '''
	arrString = prepareBatchReverse(def1,def2) # ''' reverse the sentence ordering '''
	test_corpus = data.Corpus2(dictionary)
	test_corpus.parse(arrString, args.tokenize)
	test_batches = helper.batchify(test_corpus.data, 2) # args.batch_size=1 
	# print (test_batches)
	# print (test_batches[0])
	score = evaluate(model, test_batches, dictionary) # @score is distance of set 1 toward set 2. distance is not symmetric 
	return score 
    def train(self, train_corpus):
        # Turn on training mode which enables dropout.
        self.model.train()

        # splitting the data in batches
        train_batches = helper.batchify(train_corpus.data,
                                        self.config.batch_size)
        print('number of train batches = ', len(train_batches))

        start = time.time()
        print_loss_total = 0
        plot_loss_total = 0

        num_batches = len(train_batches)
        for batch_no in range(1, num_batches + 1):
            # Clearing out all previous gradient computations.
            self.optimizer.zero_grad()
            videos, video_len, descriptions, des_len = helper.videos_to_tensor(
                train_batches[batch_no - 1], self.dictionary)
            if self.config.cuda:
                videos = videos.cuda(
                )  # batch_size x max_images_per_video x num_image_features
                descriptions = descriptions.cuda(
                )  # batch_size x max_description_length
                des_len = des_len.cuda()  # batch_size

            loss = self.model(videos, video_len, descriptions, des_len)
            # Important if we are using nn.DataParallel()
            if loss.size(0) > 1:
                loss = loss.mean()
            loss.backward()

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
            clip_grad_norm(
                filter(lambda p: p.requires_grad, self.model.parameters()),
                self.config.max_norm)
            self.optimizer.step()

            print_loss_total += loss.data[0]
            plot_loss_total += loss.data[0]

            if batch_no % self.config.print_every == 0:
                print_loss_avg = print_loss_total / self.config.print_every
                print_loss_total = 0
                print('%s (%d %d%%) %.4f' %
                      (helper.show_progress(start, batch_no / num_batches),
                       batch_no, batch_no / num_batches * 100, print_loss_avg))

            if batch_no % self.config.plot_every == 0:
                plot_loss_avg = plot_loss_total / self.config.plot_every
                self.train_losses.append(plot_loss_avg)
                plot_loss_total = 0
    def train(self, train_corpus):
        # Turn on training mode which enables dropout.
        self.model.train()

        # splitting the data in batches
        train_batches = helper.batchify(train_corpus.data, self.config.batch_size)
        print('number of train batches = ', len(train_batches))

        start = time.time()
        print_loss_total = 0
        plot_loss_total = 0

        num_batches = len(train_batches)
        for batch_no in range(1, num_batches + 1):
            # Clearing out all previous gradient computations.
            self.optimizer.zero_grad()
            train_queries, query_len, train_clicks, doc_len, click_labels = helper.batch_to_tensor(
                train_batches[batch_no - 1], self.dictionary)
            if self.config.cuda:
                # batch_size x max_query_length
                train_queries = train_queries.cuda()
                # batch_size x num_clicks_per_query x max_document_length
                train_clicks = train_clicks.cuda()
                # batch_size x num_clicks_per_query
                click_labels = click_labels.cuda()

            score = self.model(train_queries, query_len, train_clicks, doc_len)
            # loss = self.compute_loss(score, click_labels)
            loss = f.binary_cross_entropy_with_logits(score, click_labels)
            # Important if we are using nn.DataParallel()
            if loss.size(0) > 1:
                loss = loss.mean()
            loss.backward()

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
            clip_grad_norm(filter(lambda p: p.requires_grad, self.model.parameters()), self.config.max_norm)
            self.optimizer.step()

            print_loss_total += loss.data[0]
            plot_loss_total += loss.data[0]

            if batch_no % self.config.print_every == 0:
                print_loss_avg = print_loss_total / self.config.print_every
                print_loss_total = 0
                print('%s (%d %d%%) %.4f' % (
                    helper.show_progress(start, batch_no / num_batches), batch_no,
                    batch_no / num_batches * 100, print_loss_avg))

            if batch_no % self.config.plot_every == 0:
                plot_loss_avg = plot_loss_total / self.config.plot_every
                self.train_losses.append(plot_loss_avg)
                plot_loss_total = 0
Beispiel #10
0
    def train(self, train_corpus):
        # Turn on training mode which enables dropout.
        self.model.train()

        # splitting the data in batches
        train_batches = helper.batchify(train_corpus.data,
                                        self.config.batch_size)
        print('number of train batches = ', len(train_batches))

        start = time.time()
        print_loss_total = 0
        plot_loss_total = 0

        num_batches = len(train_batches)
        for batch_no in range(1, num_batches + 1):
            # Clearing out all previous gradient computations.
            self.optimizer.zero_grad()
            train_sessions, length, train_clicks, click_labels = helper.session_to_tensor(
                train_batches[batch_no - 1], self.dictionary)
            if self.config.cuda:
                # batch_size x session_length x max_query_length
                train_sessions = train_sessions.cuda()
                # batch_size x session_length x num_clicks_per_query x max_document_length
                train_clicks = train_clicks.cuda()
                # batch_size x session_length
                length = length.cuda()
                # batch_size x session_length x num_clicks_per_query
                click_labels = click_labels.cuda()

            loss = self.model(train_sessions, length, train_clicks,
                              click_labels)
            # Important if we are using nn.DataParallel()
            if loss.size(0) > 1:
                loss = loss.mean()
            loss.backward()
            self.optimizer.step()

            print_loss_total += loss.data[0]
            plot_loss_total += loss.data[0]

            if batch_no % self.config.print_every == 0:
                print_loss_avg = print_loss_total / self.config.print_every
                print_loss_total = 0
                print('%s (%d %d%%) %.4f' %
                      (helper.show_progress(start, batch_no / num_batches),
                       batch_no, batch_no / num_batches * 100, print_loss_avg))

            if batch_no % self.config.plot_every == 0:
                plot_loss_avg = plot_loss_total / self.config.plot_every
                self.train_losses.append(plot_loss_avg)
                plot_loss_total = 0
Beispiel #11
0
    def validate(self):
        # Turn on evaluation mode which disables dropout.
        self.generator.eval()

        # Splitting the data in batches
        batches, batch_labels = [], []
        for task_name, task in self.dev_corpus.items():
            dev_batches = helper.batchify(task.data, self.config.batch_size)
            batches.extend(dev_batches)
            batch_labels.extend([task_name] * len(dev_batches))

        combined = list(zip(batches, batch_labels))
        numpy.random.shuffle(combined)
        batches[:], batch_labels[:] = zip(*combined)
        print('number of dev batches = ', len(batches))

        num_batches = len(batches)
        n_correct, n_total = 0, 0
        for batch_no in range(1, num_batches + 1):
            if self.config.use_elmo:
                dev_sentences1, sent_len1, dev_sentences2, sent_len2, dev_labels = helper.batch_to_elmo_input(
                    batches[batch_no - 1], self.dictionary, iseval=True)
            else:
                dev_sentences1, sent_len1, dev_sentences2, sent_len2, dev_labels = helper.batch_to_tensors(
                    batches[batch_no - 1], self.dictionary, iseval=True)

            if self.config.cuda:
                dev_sentences1 = dev_sentences1.cuda()
                dev_sentences2 = dev_sentences2.cuda()
                dev_labels = dev_labels.cuda()

            assert dev_sentences1.size(0) == dev_sentences2.size(0)

            if self.config.adversarial:
                scores, adv_loss, diff_loss = self.generator(
                    dev_sentences1, sent_len1, dev_sentences2, sent_len2,
                    batch_labels[batch_no - 1])
            else:
                scores = self.generator(dev_sentences1, sent_len1,
                                        dev_sentences2, sent_len2,
                                        batch_labels[batch_no - 1])

            n_correct += (torch.max(scores, 1)[1].view(
                dev_labels.size()).data == dev_labels.data).sum()
            n_total += len(batches[batch_no - 1])

        return 100. * n_correct / n_total
    def train(self, train_corpus):
        # Turn on training mode which enables dropout.
        self.model.train()

        # splitting the data in batches
        train_batches = helper.batchify(train_corpus.data,
                                        self.config.batch_size)
        print('number of train batches = ', len(train_batches))

        start = time.time()
        print_loss_total = 0
        plot_loss_total = 0

        num_batches = len(train_batches)
        for batch_no in range(1, num_batches + 1):
            # Clearing out all previous gradient computations.
            self.optimizer.zero_grad()
            train_queries, train_docs, click_labels = helper.batch_to_tensor(
                train_batches[batch_no - 1], self.dictionary,
                self.config.max_query_length, self.config.max_doc_length)
            if self.config.cuda:
                # batch_size x max_query_length x vocab_size
                train_queries = train_queries.cuda()
                # batch_size x x num_rel_docs_per_query x max_doc_length x vocab_size
                train_docs = train_docs.cuda()
                # batch_size x num_rel_docs_per_query
                click_labels = click_labels.cuda()

            softmax_prob = self.model(train_queries, train_docs)
            loss = self.compute_loss(softmax_prob, click_labels)
            loss.backward()
            self.optimizer.step()

            print_loss_total += loss.data[0]
            plot_loss_total += loss.data[0]

            if batch_no % self.config.print_every == 0:
                print_loss_avg = print_loss_total / self.config.print_every
                print_loss_total = 0
                print('%s (%d %d%%) %.4f' %
                      (helper.show_progress(start, batch_no / num_batches),
                       batch_no, batch_no / num_batches * 100, print_loss_avg))

            if batch_no % self.config.plot_every == 0:
                plot_loss_avg = plot_loss_total / self.config.plot_every
                self.train_losses.append(plot_loss_avg)
                plot_loss_total = 0
def get_trained_model2(c, corpus, dictionary, non_zero_indices):
    model = LogisticRegression(penalty='l1',  tol=0.0001, C=c, fit_intercept=True, \
        intercept_scaling=1, solver='liblinear', max_iter=args.epochs, multi_class='ovr', verbose=0)
    train_batches = helper.batchify(corpus.data, args.batch_size)
    print('number of train batches = ', len(train_batches))

    num_batches = len(train_batches)
    n_correct, n_total = 0, 0
    for batch_no in range(1, num_batches + 1):
        if batch_no % 500 == 0:
            print(' training batch: ', batch_no, ' of ', num_batches,
                  ' percentage: ', batch_no / num_batches)
        train_sentences1, train_labels = helper.batch_to_one_hot_encoded(
            train_batches[batch_no - 1],
            dictionary,
            non_zero_indices=non_zero_indices)
        model.fit(train_sentences1, train_labels)
    return model
    def validate(self, dev_corpus):
        # Turn on evaluation mode which disables dropout.
        self.model.eval()

        dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size)
        print('number of dev batches = ', len(dev_batches))

        dev_loss = 0
        num_batches = len(dev_batches)
        for batch_no in range(1, num_batches + 1):
            session_queries, session_query_length = helper.session_to_tensor(
                dev_batches[batch_no - 1], self.dictionary, True)
            if self.config.cuda:
                session_queries = session_queries.cuda()
                session_query_length = session_query_length.cuda()

            loss = self.model(session_queries, session_query_length)
            dev_loss += loss.data[0]

        return dev_loss / num_batches
Beispiel #15
0
    def validate(self, dev_corpus):
        # Turn on evaluation mode which disables dropout.
        self.model.eval()

        dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size)
        print('number of dev batches = ', len(dev_batches))

        dev_loss = 0
        num_batches = len(dev_batches)
        for batch_no in range(1, num_batches + 1):
            dev_queries, dev_clicks, click_labels = helper.batch_to_tensor(dev_batches[batch_no - 1], self.dictionary)
            if self.config.cuda:
                dev_queries = dev_queries.cuda()
                dev_clicks = dev_clicks.cuda()
                click_labels = click_labels.cuda()

            score = self.model(dev_queries, dev_clicks)
            loss = self.compute_loss(score, click_labels)
            dev_loss += loss.data[0]

        return dev_loss / num_batches
Beispiel #16
0
    def validate(self, dev_corpus):
        # Turn on evaluation mode which disables dropout.
        self.model.eval()

        print_every = self.config.print_every
        start = time.time()

        dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size)
        print('number of dev batches = ', len(dev_batches))

        num_batches = len(dev_batches)
        n_correct, n_total = 0, 0
        for batch_no in range(1, num_batches + 1):
            dev_sentences1, sent_len1, dev_sentences2, sent_len2, dev_labels = helper.batch_to_tensors(
                dev_batches[batch_no - 1], self.dictionary, True)
            if self.config.cuda:
                dev_sentences1 = dev_sentences1.cuda()
                dev_sentences2 = dev_sentences2.cuda()
                dev_labels = dev_labels.cuda()

            assert dev_sentences1.size(0) == dev_sentences2.size(0)

            score = self.model(dev_sentences1, sent_len1, dev_sentences2,
                               sent_len2)
            n_correct += (torch.max(score, 1)[1].view(
                dev_labels.size()).data == dev_labels.data).sum()
            n_total += len(dev_batches[batch_no - 1])

            print_acc = 100. * n_correct / n_total
            if batch_no % print_every == 0 or self.config.debug:
                p = 100.0
                print('%s (%d %d%%) (%.2f) %.2f' %
                      (helper.show_progress(start, batch_no / num_batches),
                       batch_no, batch_no / num_batches * 100, p, print_acc))

        return 100. * n_correct / n_total
def eval_routine(corpus, dictionary, model, non_zero_indices=None):
    nexamples = len(corpus.data)
    dev_batches = helper.batchify(corpus.data, args.batch_size)
    print('number of train batches = ', len(dev_batches))
    total_acc = 0.0
    correct = 0.0

    num_batches = len(dev_batches)
    n_correct, n_total = 0, 0
    for batch_no in range(1, num_batches + 1):
        if batch_no % 500 == 0:
            print(' validation batch: ', batch_no, ' of ', num_batches,
                  ' percentage: ', batch_no / num_batches)
        eval_sentences, eval_labels = helper.batch_to_one_hot_encoded(
            dev_batches[batch_no - 1],
            dictionary,
            non_zero_indices=non_zero_indices)
        acc = model.score(eval_sentences, eval_labels)
        correct += acc * len(eval_labels)
        total_acc += acc
        # if batch_no%500 == 0 :print(' for this minibatch score: ', acc, ' correct: ', acc*len(eval_labels), ' of ', len(eval_labels), 'total accc: ', total_acc, ' total correct: ', correct)
    print(' Correct: ', correct, ' acc: ', correct / nexamples,
          ' sanity check: ', total_acc / num_batches)
    return correct / nexamples
    def validate(self, dev_corpus):
        # Turn on evaluation mode which disables dropout.
        self.model.eval()

        dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size)
        print('number of dev batches = ', len(dev_batches))

        dev_loss = 0
        num_batches = len(dev_batches)
        for batch_no in range(1, num_batches + 1):
            dev_queries, q_len, dev_clicks, d_len, click_labels = helper.batch_to_tensor(dev_batches[batch_no - 1],
                                                                                         self.dictionary, True)
            if self.config.cuda:
                dev_queries = dev_queries.cuda()
                dev_clicks = dev_clicks.cuda()
                click_labels = click_labels.cuda()

            score = self.model(dev_queries, q_len, dev_clicks, d_len)
            loss = f.binary_cross_entropy_with_logits(score, click_labels)
            if loss.size(0) > 1:
                loss = loss.mean()
            dev_loss += loss.data[0]

        return dev_loss / num_batches
Beispiel #19
0
    def validate(self, dev_corpus):
        # Turn on evaluation mode which disables dropout.
        self.model.eval()

        dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size)
        print('number of dev batches = ', len(dev_batches))

        dev_loss = 0
        num_batches = len(dev_batches)
        for batch_no in range(1, num_batches + 1):
            dev_sessions, length, dev_clicks, click_labels = helper.session_to_tensor(
                dev_batches[batch_no - 1], self.dictionary)
            if self.config.cuda:
                dev_sessions = dev_sessions.cuda()
                dev_clicks = dev_clicks.cuda()
                length = length.cuda()
                click_labels = click_labels.cuda()

            loss = self.model(dev_sessions, length, dev_clicks, click_labels)
            if loss.size(0) > 1:
                loss = loss.mean()
            dev_loss += loss.data[0]

        return dev_loss / num_batches
    def validate(self, dev_corpus):
        # Turn on evaluation mode which disables dropout.
        self.model.eval()

        dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size)
        print('number of dev batches = ', len(dev_batches))

        dev_loss = 0
        num_batches = len(dev_batches)
        for batch_no in range(1, num_batches + 1):
            q1_var, q1_len, q2_var, q2_len = helper.batch_to_tensor(
                dev_batches[batch_no - 1],
                self.dictionary,
                reverse=self.config.reverse,
                iseval=True)
            if self.config.cuda:
                q1_var = q1_var.cuda()  # batch_size x max_len
                q2_var = q2_var.cuda()  # batch_size x max_len
                q2_len = q2_len.cuda()  # batch_size

            loss = self.model(q1_var, q1_len, q2_var, q2_len)
            dev_loss += loss.data[0]

        return dev_loss / num_batches
    def validate(self, dev_corpus):
        # Turn on evaluation mode which disables dropout.
        self.model.eval()

        dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size)
        print('number of dev batches = ', len(dev_batches))

        dev_loss, dev_click_loss, dev_decoding_loss = 0, 0, 0
        num_batches = len(dev_batches)
        for batch_no in range(1, num_batches + 1):
            session_queries, session_query_length, rel_docs, rel_docs_length, doc_labels = helper.session_to_tensor(
                dev_batches[batch_no - 1], self.dictionary, True)
            if self.config.cuda:
                session_queries = session_queries.cuda()
                session_query_length = session_query_length.cuda()
                rel_docs = rel_docs.cuda()
                rel_docs_length = rel_docs_length.cuda()
                doc_labels = doc_labels.cuda()

            click_loss, decoding_loss = self.model(session_queries,
                                                   session_query_length,
                                                   rel_docs, rel_docs_length,
                                                   doc_labels)
            if click_loss.size(0) > 1:
                click_loss = click_loss.mean()
            if decoding_loss.size(0) > 1:
                decoding_loss = decoding_loss.mean()

            dev_click_loss += click_loss.data[0]
            dev_decoding_loss += decoding_loss.data[0]
            dev_loss += click_loss.data[0] + decoding_loss.data[0]

        print('validation loss = %.4f %.4f' %
              ((dev_click_loss / num_batches),
               (dev_decoding_loss / num_batches)))
        return dev_loss / num_batches
print('Train set size = ', len(train_corpus.data))
print('Dev set size = ', len(dev_corpus.data))
print('Vocabulary size = ', len(dictionary))

# save the dictionary object to use during testing
helper.save_object(dictionary, args.save_path + 'dictionary.p')

# embeddings_index = helper.load_word_embeddings(args.word_vectors_directory, args.word_vectors_file)
# helper.save_word_embeddings('../data/glove/', 'glove.840B.300d.q2q.txt', embeddings_index, dictionary.idx2word)

embeddings_index = helper.load_word_embeddings(args.word_vectors_directory,
                                               'glove.840B.300d.q2q.txt')
print('Number of OOV words = ', len(dictionary) - len(embeddings_index))

# Splitting the data in batches
train_batches = helper.batchify(train_corpus.data, args.batch_size)
print('Number of train batches = ', len(train_batches))
dev_batches = helper.batchify(dev_corpus.data, args.batch_size)
print('Number of dev batches = ', len(dev_batches))

# ###############################################################################
# # Build the model
# ###############################################################################

model = Sequence2Sequence(dictionary, embeddings_index, args)
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),
                       args.lr)
best_loss = -1

# for training on multiple GPUs. set multiple GPUs by setting CUDA_VISIBLE_DEVICES, ex., CUDA_VISIBLE_DEVICES=0,1
if 'CUDA_VISIBLE_DEVICES' in os.environ:
Beispiel #23
0
        if 'imdb' in args.task:
            ###############################################################################
            # Load Learning to Skim paper's Pickle file
            ###############################################################################
            train_d, dev_d, test_d = helper.get_splited_imdb_data(
                args.save_path + 'data/' + 'imdb.p')
            test_corpus.parse(test_d, task, args.max_example)

        elif task == 'multinli' and args.test != 'train':
            for partition in ['_matched', '_mismatched']:
                test_corpus.parse(
                    args.data + task + '/' + args.test + partition + '.txt',
                    task, args.max_example)
                print('[' + partition[1:] + '] dataset size = ',
                      len(test_corpus.data))
                test_batches = helper.batchify(test_corpus.data,
                                               args.batch_size)
                if args.test == 'test':
                    evaluate(model, test_batches, dictionary,
                             args.save_path + args.task + partition + '.csv')
                else:
                    test_accuracy, test_f1 = evaluate(model, test_batches,
                                                      dictionary)
                    print('[' + partition[1:] +
                          '] accuracy: %.2f%%' % test_accuracy)
                    print('[' + partition[1:] + '] f1: %.2f%%' % test_f1)
        else:
            test_corpus.parse(args.data + task + '/' + args.test + '.txt',
                              task, args.max_example)
        print('dataset size = ', len(test_corpus.data))
        test_batches = helper.batchify(test_corpus.data, args.batch_size)
        test_accuracy, test_f1, clf_report = evaluate(model, test_batches,
Beispiel #24
0
    def train(self):
        # Turn on training mode which enables dropout.
        self.generator.train()

        # Splitting the data in batches
        batches, batch_labels = [], []
        for task_name, task in self.train_corpus.items():
            train_batches = helper.batchify(task.data, self.config.batch_size)
            batches.extend(train_batches)
            batch_labels.extend([task_name] * len(train_batches))

        combined = list(zip(batches, batch_labels))
        numpy.random.shuffle(combined)
        batches[:], batch_labels[:] = zip(*combined)
        print('number of train batches = ', len(batches))

        start = time.time()
        num_back, print_acc_total, plot_acc_total = 0, 0, 0

        num_batches = len(batches)
        for batch_no in range(1, num_batches + 1):
            if self.config.use_elmo:
                train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_elmo_input(
                    batches[batch_no - 1], self.dictionary)
            else:
                train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors(
                    batches[batch_no - 1], self.dictionary)

            if self.config.cuda:
                train_sentences1 = train_sentences1.cuda()
                train_sentences2 = train_sentences2.cuda()
                train_labels = train_labels.cuda()

            assert train_sentences1.size(0) == train_sentences2.size(0)

            if self.config.adversarial:
                self.optimizerD.zero_grad()
                scores, diff_loss, shared_rep = self.generator(
                    train_sentences1, sent_len1, train_sentences2, sent_len2,
                    batch_labels[batch_no - 1])
                n_correct = (torch.max(scores, 1)[1].view(
                    train_labels.size()).data == train_labels.data).sum()
                shared_sent_rep1 = shared_rep[0]
                shared_sent_rep2 = shared_rep[1]
                # runt the discriminator to distinguish tasks
                task_prob1 = self.discriminator(
                    shared_sent_rep1.detach())  # B X num_tasks
                task_prob2 = self.discriminator(
                    shared_sent_rep2.detach())  # B X num_tasks
                comb_prob = torch.cat((task_prob1, task_prob2),
                                      0)  # 2B X num_tasks
                task_prob = torch.sum(comb_prob,
                                      0).squeeze()  # size = |num_tasks|
                adv_loss = -1 * task_prob[self.task_ids[batch_labels[batch_no -
                                                                     1]]]
                adv_loss.backward()
                # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
                clip_grad_norm(
                    filter(lambda p: p.requires_grad,
                           self.discriminator.parameters()),
                    self.config.max_norm)
                self.optimizerD.step()

                self.optimizerG.zero_grad()
                cross_entropy_loss = self.criterion(scores, train_labels)
                # runt the discriminator to distinguish tasks
                task_prob1 = self.discriminator(
                    shared_sent_rep1)  # B X num_tasks
                task_prob2 = self.discriminator(
                    shared_sent_rep2)  # B X num_tasks
                comb_prob = torch.cat((task_prob1, task_prob2),
                                      0)  # 2B X num_tasks
                task_prob = torch.sum(comb_prob,
                                      0).squeeze()  # size = |num_tasks|
                adv_loss = -1 * task_prob[self.task_ids[batch_labels[batch_no -
                                                                     1]]]
                total_loss = cross_entropy_loss + self.config.beta * adv_loss + self.config.gamma * diff_loss
                # Important if we are using nn.DataParallel()
                if total_loss.size(0) > 1:
                    total_loss = total_loss.mean()
                total_loss.backward()
                # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
                clip_grad_norm(
                    filter(lambda p: p.requires_grad,
                           self.generator.parameters()), self.config.max_norm)
                self.optimizerG.step()
            else:
                self.optimizerG.zero_grad()
                scores = self.generator(train_sentences1, sent_len1,
                                        train_sentences2, sent_len2,
                                        batch_labels[batch_no - 1])
                n_correct = (torch.max(scores, 1)[1].view(
                    train_labels.size()).data == train_labels.data).sum()
                loss = self.criterion(scores, train_labels)
                # Important if we are using nn.DataParallel()
                if loss.size(0) > 1:
                    loss = loss.mean()
                loss.backward()
                # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
                clip_grad_norm(
                    filter(lambda p: p.requires_grad,
                           self.generator.parameters()), self.config.max_norm)
                self.optimizerG.step()

            print_acc_total += 100. * n_correct / len(batches[batch_no - 1])
            plot_acc_total += 100. * n_correct / len(batches[batch_no - 1])

            if batch_no % self.config.print_every == 0:
                sys.stdout.write("\b" * num_back)
                sys.stdout.write(" " * num_back)
                sys.stdout.write("\b" * num_back)
                log_info = '%s (%d %d%%) %.2f%%' % (helper.show_progress(
                    start, batch_no /
                    num_batches), batch_no, batch_no / num_batches * 100,
                                                    print_acc_total / batch_no)
                sys.stdout.write(log_info)
                sys.stdout.flush()
                num_back = len(log_info)

            if batch_no % self.config.plot_every == 0:
                plot_acc_avg = plot_acc_total / self.config.plot_every
                self.train_accuracies.append(plot_acc_avg)
                plot_acc_total = 0

            # this releases all cache memory and becomes visible to other applications
            torch.cuda.empty_cache()
        multinli_test_mismatched = data.Corpus(args.data + 'multinli/', dictionary)
        multinli_test_mismatched.parse('test_mismatched.txt', 'multinli', args.tokenize, is_test_corpus=True)
        print('mutinli test[mismatched] set size = ', len(multinli_test_mismatched.data))
        tasks.append(('multinli', 3))

    if tasks:
        model = Generator(dictionary, embeddings_index, args, tasks)
        print(model)
        if args.cuda:
            model = model.cuda()
        helper.load_model_states_from_checkpoint(model, args.save_path + 'model_best.pth.tar', 'state_dict_G',
                                                 args.cuda)
        print('vocabulary size = ', len(dictionary))

        if 'quora' in args.task:
            test_batches = helper.batchify(quora_test.data, args.batch_size)
            test_accuracy = evaluate(model, test_batches, 'quora', dictionary)
            print('quora test accuracy: %f%%' % test_accuracy)

        if 'snli' in args.task:
            test_batches = helper.batchify(snli_test.data, args.batch_size)
            test_accuracy = evaluate(model, test_batches, 'snli', dictionary)
            print('snli test accuracy: %f%%' % test_accuracy)

        if 'multinli' in args.task:
            # test matched part
            test_batches = helper.batchify(multinli_test_matched.data, args.batch_size)
            evaluate(model, test_batches, 'multinli', dictionary, args.save_path + 'multinli_matched.csv')
            # test mismatched part
            test_batches = helper.batchify(multinli_test_mismatched.data, args.batch_size)
            evaluate(model, test_batches, 'multinli', dictionary, args.save_path + 'multinli_mismatched.csv')
Beispiel #26
0
    def train(self, train_corpus):
        # Turn on training mode which enables dropout.
        self.model.train()

        # Splitting the data in batches
        train_batches = helper.batchify(train_corpus.data, self.config.batch_size)
        print('number of train batches = ', len(train_batches))

        start = time.time()
        print_acc_total = 0
        plot_acc_total = 0

        num_batches = len(train_batches)
        for batch_no in range(1, num_batches + 1):
            # Clearing out all previous gradient computations.
            self.optimizer.zero_grad()
            train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors(
                train_batches[batch_no - 1], self.dictionary)
            if self.config.cuda:
                train_sentences1 = train_sentences1.cuda()
                train_sentences2 = train_sentences2.cuda()
                train_labels = train_labels.cuda()

            assert train_sentences1.size(0) == train_sentences2.size(0)

            score = self.model(train_sentences1, sent_len1, train_sentences2, sent_len2)
            n_correct = (torch.max(score, 1)[1].view(train_labels.size()).data == train_labels.data).sum()
            loss = self.criterion(score, train_labels)
            # Important if we are using nn.DataParallel()
            if loss.size(0) > 1:
                loss = loss.mean()
            loss.backward()

            # gradient clipping (off by default)
            shrink_factor = 1
            total_norm = 0

            for p in self.model.parameters():
                if p.requires_grad:
                    p.grad.data.div_(train_sentences1.size(0))  # divide by the actual batch size
                    total_norm += p.grad.data.norm() ** 2
            total_norm = numpy.sqrt(total_norm)

            if total_norm > self.config.clip:
                shrink_factor = self.config.clip / total_norm
            current_lr = self.optimizer.param_groups[0]['lr']  # current lr (no external "lr", for adam)
            self.optimizer.param_groups[0]['lr'] = current_lr * shrink_factor  # just for update

            self.optimizer.step()
            self.optimizer.param_groups[0]['lr'] = current_lr

            print_acc_total += 100. * n_correct / len(train_batches[batch_no - 1])
            plot_acc_total += 100. * n_correct / len(train_batches[batch_no - 1])

            if batch_no % self.config.print_every == 0:
                print_acc_avg = print_acc_total / self.config.print_every
                print_acc_total = 0
                print('%s (%d %d%%) %.2f' % (
                    helper.show_progress(start, batch_no / num_batches), batch_no,
                    batch_no / num_batches * 100, print_acc_avg))

            if batch_no % self.config.plot_every == 0:
                plot_acc_avg = plot_acc_total / self.config.plot_every
                self.train_accuracies.append(plot_acc_avg)
                plot_acc_total = 0
    def train(self, train_corpus, epoch):
        # Turn on training mode which enables dropout.
        self.model.train()

        # Splitting the data in batches
        shuffle = True
        # if self.config.task == 'sst': shuffle = False
        print(shuffle)

        train_batches = helper.batchify(train_corpus.data,
                                        self.config.batch_size, shuffle)
        print('number of train batches = ', len(train_batches))

        start = time.time()
        print_acc_total = 0
        plot_acc_total = 0

        num_batches = len(train_batches)
        for batch_no in range(1, num_batches + 1):
            # Clearing out all previous gradient computations.
            self.optimizer.zero_grad()
            train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors(
                train_batches[batch_no - 1], self.dictionary)
            if self.config.cuda:
                train_sentences1 = train_sentences1.cuda()
                train_sentences2 = train_sentences2.cuda()
                train_labels = train_labels.cuda()

            assert train_sentences1.size(0) == train_sentences2.size(0)

            score = self.model(train_sentences1, sent_len1, train_sentences2,
                               sent_len2)
            n_correct = (torch.max(score, 1)[1].view(
                train_labels.size()).data == train_labels.data).sum()
            # print (' score size ', score.size(), train_labels.size())
            loss = self.criterion(score, train_labels)

            ############################ custom new_loss ############################

            # z2 = z_pred.dimshuffle((0,1,"x"))
            # logpz = - T.nnet.binary_crossentropy(probs, z2) * masks
            # logpz = self.logpz = logpz.reshape(x.shape)
            # probs = self.probs = probs.reshape(x.shape)

            # # batch
            # z = z_pred
            # self.zsum = T.sum(z, axis=0, dtype=theano.config.floatX)
            # self.zdiff = T.sum(T.abs_(z[1:]-z[:-1]), axis=0, dtype=theano.config.floatX)

            # zsum = generator.zsum
            # zdiff = generator.zdiff
            # logpz = generator.logpz

            # coherent_factor = args.sparsity * args.coherent
            # loss = self.loss = T.mean(loss_vec) #this is not needed as in cost_vec loss_vec is used
            # sparsity_cost = self.sparsity_cost = T.mean(zsum) * args.sparsity + \
            #                                      T.mean(zdiff) * coherent_factor
            # cost_vec = loss_vec + zsum * args.sparsity + zdiff * coherent_factor
            # cost_logpz = T.mean(cost_vec * T.sum(logpz, axis=0))
            # self.obj = T.mean(cost_vec)

            ############################ custom new_loss ############################

            if loss.size(0) > 1:
                loss = loss.mean()
            # print ('loss:', loss)
            loss.backward()

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
            grad_norm = clip_grad_norm(
                filter(lambda p: p.requires_grad, self.model.parameters()),
                self.config.max_norm)
            # if epoch==11:
            # print(batch_no, grad_norm)
            self.optimizer.step()

            print_acc_total += 100. * n_correct / len(
                train_batches[batch_no - 1])
            plot_acc_total += 100. * n_correct / len(
                train_batches[batch_no - 1])

            if batch_no % self.config.print_every == 0:
                print_acc_avg = print_acc_total / self.config.print_every
                print_acc_total = 0
                print('%s (%d %d%%) %.2f' %
                      (helper.show_progress(start, batch_no / num_batches),
                       batch_no, batch_no / num_batches * 100, print_acc_avg))

            if batch_no % self.config.plot_every == 0:
                plot_acc_avg = plot_acc_total / self.config.plot_every
                self.train_accuracies.append(plot_acc_avg)
                plot_acc_total = 0
                                      is_test_corpus=True)
        print('mutinli dev[mismatched] set size = ',
              len(multinli_dev_mismatched.data))
        tasks.append(('multinli', 3))

    if tasks:
        model = Generator(dictionary, embeddings_index, args, tasks)
        if args.cuda:
            model = model.cuda()
        helper.load_model_states_from_checkpoint(
            model, args.save_path + 'model_best.pth.tar', 'state_dict_G',
            args.cuda)
        print('vocabulary size = ', len(dictionary))

        if 'quora' in args.task:
            dev_batches = helper.batchify(quora_dev.data, args.batch_size)
            dev_accuracy = evaluate(model, dev_batches, 'quora', dictionary)
            print('quora dev accuracy: %f%%' % dev_accuracy)

        if 'snli' in args.task:
            dev_batches = helper.batchify(snli_dev.data, args.batch_size)
            dev_accuracy = evaluate(model, dev_batches, 'snli', dictionary)
            print('snli dev accuracy: %f%%' % dev_accuracy)

        if 'multinli' in args.task:
            # test matched part
            dev_batches = helper.batchify(multinli_dev_matched.data,
                                          args.batch_size)
            dev_accuracy = evaluate(model, dev_batches, 'multinli', dictionary)
            print('mutinli [matched] dev accuracy: %f%%' % dev_accuracy)
            # test mismatched part
def main():
    ###############################################################################
    # Load data
    ###############################################################################

    dictionary = data.Dictionary()
    train_corpus = data.Corpus(dictionary)
    dev_corpus = data.Corpus(dictionary)
    test_corpus = data.Corpus(dictionary)

    task_names = ['snli', 'multinli'] if args.task == 'allnli' else [args.task]
    for task in task_names:
        skip_first_line = True if task == 'sick' else False
        train_corpus.parse(task,
                           args.data,
                           'train.txt',
                           args.tokenize,
                           num_examples=args.max_example,
                           skip_first_line=skip_first_line)
        if task == 'multinli':
            dev_corpus.parse(task, args.data, 'dev_matched.txt', args.tokenize)
            dev_corpus.parse(task, args.data, 'dev_mismatched.txt',
                             args.tokenize)
            test_corpus.parse(task,
                              args.data,
                              'test_matched.txt',
                              args.tokenize,
                              is_test_corpus=False)
            test_corpus.parse(task,
                              args.data,
                              'test_mismatched.txt',
                              args.tokenize,
                              is_test_corpus=False)
        else:
            dev_corpus.parse(task,
                             args.data,
                             'dev.txt',
                             args.tokenize,
                             skip_first_line=skip_first_line)
            test_corpus.parse(task,
                              args.data,
                              'test.txt',
                              args.tokenize,
                              is_test_corpus=False,
                              skip_first_line=skip_first_line)

    print('train set size = ', len(train_corpus.data))
    print('development set size = ', len(dev_corpus.data))
    print('test set size = ', len(test_corpus.data))
    print('vocabulary size = ', len(dictionary))

    # save the dictionary object to use during testing
    helper.save_object(dictionary,
                       args.save_path + args.task + '_dictionary.pkl')

    embeddings_index = helper.load_word_embeddings(args.word_vectors_directory,
                                                   args.word_vectors_file,
                                                   dictionary.word2idx)
    print('number of OOV words = ', len(dictionary) - len(embeddings_index))

    # ###############################################################################
    # # Build the model
    # ###############################################################################

    model = SentenceClassifier(dictionary, embeddings_index, args)
    optim_fn, optim_params = helper.get_optimizer(args.optimizer)
    optimizer = optim_fn(filter(lambda p: p.requires_grad, model.parameters()),
                         **optim_params)
    best_acc = 0

    if args.cuda:
        model = model.cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_acc = checkpoint['best_acc']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # ###############################################################################
    # # Train the model
    # ###############################################################################

    train = Train(model, optimizer, dictionary, embeddings_index, args,
                  best_acc)
    bestmodel = train.train_epochs(train_corpus, dev_corpus, args.start_epoch,
                                   args.epochs)
    test_batches = helper.batchify(test_corpus.data, args.batch_size)
    if 'multinli' in task_names:
        print(
            'Skipping evaluating best model. Evaluate using the test script.')
    else:
        test_accuracy, test_f1 = evaluate(bestmodel, test_batches, dictionary)
        print('accuracy: %.2f%%' % test_accuracy)
        print('f1: %.2f%%' % test_f1)
Beispiel #30
0
    def train(self, train_corpus, epoch):
        # Turn on training mode which enables dropout.
        self.model.train()

        # Splitting the data in batches
        shuffle = True
        # if self.config.task == 'sst': shuffle = False
        print(shuffle)

        train_batches = helper.batchify(train_corpus.data,
                                        self.config.batch_size, shuffle)
        print('number of train batches = ', len(train_batches))

        start = time.time()
        print_acc_total = 0
        plot_acc_total = 0

        num_batches = len(train_batches)
        for batch_no in range(1, num_batches + 1):
            # Clearing out all previous gradient computations.
            self.optimizer.zero_grad()
            train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors(
                train_batches[batch_no - 1], self.dictionary)
            if self.config.cuda:
                train_sentences1 = train_sentences1.cuda()
                train_sentences2 = train_sentences2.cuda()
                train_labels = train_labels.cuda()

            assert train_sentences1.size(0) == train_sentences2.size(0)

            # print(' train label size: ', train_labels.size(), ' train data size: ', train_sentences1.size())
            # print(' labels: ', train_labels)
            score = self.model(train_sentences1)
            n_correct = (torch.max(score, 1)[1].view(
                train_labels.size()).data == train_labels.data).sum()
            # print (' score size ', score.size(), train_labels.size())
            loss = self.criterion(score, train_labels)

            if loss.size(0) > 1:
                loss = loss.mean()
            # print ('loss:', loss)
            loss.backward()

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
            grad_norm = clip_grad_norm(
                filter(lambda p: p.requires_grad, self.model.parameters()),
                self.config.max_norm)
            # if epoch==11:
            # print(batch_no, grad_norm)
            self.optimizer.step()

            print_acc_total += 100. * n_correct / len(
                train_batches[batch_no - 1])
            plot_acc_total += 100. * n_correct / len(
                train_batches[batch_no - 1])

            if batch_no % self.config.print_every == 0:
                print_acc_avg = print_acc_total / self.config.print_every
                print_acc_total = 0
                print('%s (%d %d%%) %.2f' %
                      (helper.show_progress(start, batch_no / num_batches),
                       batch_no, batch_no / num_batches * 100, print_acc_avg))

            if batch_no % self.config.plot_every == 0:
                plot_acc_avg = plot_acc_total / self.config.plot_every
                self.train_accuracies.append(plot_acc_avg)
                plot_acc_total = 0