Example #1
0
def evaluate_split(model, processor, args, split='dev'):
    evaluator = BertEvaluator(model, processor, args, split)
    start_time = time.time()
    accuracy, precision, recall, f1, avg_loss = evaluator.get_scores(silent=True)[0]
    print("Inference time", time.time() - start_time)
    print('\n' + LOG_HEADER)
    print(LOG_TEMPLATE.format(split.upper(), accuracy, precision, recall, f1, avg_loss))
Example #2
0
    def train(self):
        train_features = convert_examples_to_features(self.train_examples,
                                                      self.args.max_seq_length,
                                                      self.tokenizer)

        print("Number of examples: ", len(self.train_examples))
        print("Batch size:", self.args.batch_size)
        print("Num of steps:", self.num_train_optimization_steps)

        all_input_ids = torch.tensor([f.input_ids for f in train_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in train_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in train_features],
                                       dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in train_features],
                                     dtype=torch.long)
        train_data = TensorDataset(all_input_ids, all_input_mask,
                                   all_segment_ids, all_label_ids)
        if self.args.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            train_sampler = DistributedSampler(train_data)

        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=self.args.batch_size)

        for epoch in trange(int(self.args.epochs), desc="Epoch"):
            self.train_epoch(train_dataloader)
            dev_evaluator = BertEvaluator(self.model,
                                          self.processor,
                                          self.args,
                                          split='dev')
            dev_acc, dev_precision, dev_recall, dev_f1, dev_loss = dev_evaluator.get_scores(
            )[0]

            # Print validation results
            tqdm.write(self.log_header)
            tqdm.write(
                self.log_template.format(epoch + 1, self.iterations, epoch + 1,
                                         self.args.epochs, dev_acc,
                                         dev_precision, dev_recall, dev_f1,
                                         dev_loss))

            # Update validation results
            if dev_f1 > self.best_dev_f1:
                self.unimproved_iters = 0
                self.best_dev_f1 = dev_f1
                torch.save(self.model, self.snapshot_path)

            else:
                self.unimproved_iters += 1
                if self.unimproved_iters >= self.args.patience:
                    self.early_stop = True
                    tqdm.write(
                        "Early Stopping. Epoch: {}, Best Dev F1: {}".format(
                            epoch, self.best_dev_f1))
                    break
Example #3
0
def evaluate_split(model, processor, tokenizer, args, split='dev'):
    evaluator = BertEvaluator(model, processor, tokenizer, args, split)
    accuracy, precision, recall, f1, avg_loss = evaluator.get_scores(
        silent=True)[0]
    print('\n' + LOG_HEADER)
    print(
        LOG_TEMPLATE.format(split.upper(), accuracy, precision, recall, f1,
                            avg_loss))
Example #4
0
def evaluate_split(model, processor, args, split='dev'):
    root = Path('out/bert')
    root.mkdir(exist_ok=True)
    evaluator = BertEvaluator(model, processor, args, split)
    start_time = time.time()
    accuracy, precision, recall, f1, avg_loss, f1_mac, hamming_loss, jacc, predicted_labels, target_labels = evaluator.get_scores(
        silent=True)[0]
    '''print("Inference time", time.time() - start_time)
    print('\n' + LOG_HEADER)
    print(LOG_TEMPLATE.format(split.upper(), accuracy, precision, recall, f1, avg_loss, f1_mac, hamming_loss, jacc))'''
    do_personality_analysis([predicted_labels, target_labels], split, root)
Example #5
0
def ensemble_acc(model,
                 processor,
                 tokenizer,
                 args,
                 final_pred,
                 label,
                 split='dev'):
    evaluator = BertEvaluator(model, processor, tokenizer, args, split)
    accuracy, precision, recall, f1 = evaluator.get_accuracy(final_pred,
                                                             label,
                                                             silent=True)
    return accuracy, precision, recall, f1
Example #6
0
def evaluate(model, processor, args, last_bert_layers=-1, ngram_range=(1, 1)):

    train_evaluator = BertEvaluator(model, processor, args, args.train_name)
    dev_evaluator = BertEvaluator(model, processor, args, args.dev_name)
    tst_evaluator = BertEvaluator(model, processor, args, args.test_name)

    start_time = time.time()
    train_layers, train_labels = train_evaluator.get_bert_layers(
        silent=True, last_bert_layers=last_bert_layers)
    dev_layers, dev_labels = dev_evaluator.get_bert_layers(
        silent=True, last_bert_layers=last_bert_layers)
    tst_layers, tst_labels = tst_evaluator.get_bert_layers(
        silent=True, last_bert_layers=last_bert_layers)

    train_xf, dev_xf, test_xf = get_feature_vector(
        (train_evaluator, dev_evaluator, tst_evaluator),
        ngram_range=ngram_range,
        max_seq_len=args.max_seq_length)

    # train
    train_xf = train_xf.toarray()
    train_layers = train_layers.cpu().data.numpy()
    train_x = np.concatenate((train_layers, train_xf), axis=1)

    #dev
    dev_xf = dev_xf.toarray()
    dev_layers = dev_layers.cpu().data.numpy()
    dev_x = np.concatenate((dev_layers, dev_xf), axis=1)

    #test
    test_xf = test_xf.toarray()
    tst_layers = tst_layers.cpu().data.numpy()
    test_x = np.concatenate((tst_layers, test_xf), axis=1)

    #train, tst, dev = (train_x, train_labels), (test_x, tst_labels), (dev_x, dev_labels)
    train, tst, dev = (train_xf, train_labels), (test_xf,
                                                 tst_labels), (dev_xf,
                                                               dev_labels)
    #print('train labels length', len(train_labels), train_labels[0])
    #train, tst, dev = (train_layers, train_labels), (tst_layers, tst_labels), (dev_layers, dev_labels)
    scatter_plot(train, dev, tst)
    #train, tst, dev = (train_both_models, train_labels), (tst_both_models, tst_labels), (dev_both_models, dev_labels)

    print('train, test shape : ', train[0].shape, tst[0].shape)
    print("Inference time", time.time() - start_time)
    r_test = classification(train, tst)
    r_dev = classification(train, dev)
    print('\n' + LOG_HEADER)
    print(
        LOG_TEMPLATE.format("DEV", r_dev['acc'], r_dev['pr'], r_dev['rc'],
                            r_dev['f1']))
    print(
        LOG_TEMPLATE.format("TEST", r_test['acc'], r_test['pr'], r_test['rc'],
                            r_test['f1']))
Example #7
0
def evaluate_split(model, processor, args, split='dev'):
    evaluator = BertEvaluator(model, processor, args, split)
    start_time = time.time()
    accuracy, precision, recall, f1, avg_loss, f1_mac, hamming_loss, jacc, predicted_values, target_values = evaluator.get_scores(
        silent=True)[0]
    print("Inference time", time.time() - start_time)
    print('\n' + LOG_HEADER)
    print(
        LOG_TEMPLATE.format(split.upper(), accuracy, precision, recall, f1,
                            avg_loss, f1_mac, hamming_loss, jacc))

    model_name = args.save_path.replace('model_checkpoints/', '')
    path = Path(args.save_path.replace('model_checkpoints', 'out'))
    path = path / args.dataset
    path.mkdir(exist_ok=True)
    print('Saving prediction files in ', path)
    np.save(path / f'predicted_{model_name}_{split}.npy', predicted_values)
    np.save(path / f'target_{model_name}_{split}.npy', target_values)
Example #8
0
def evaluate_split(model, processor, tokenizer, args, save_file, split='dev'):
    evaluator = BertEvaluator(model, processor, tokenizer, args, split)
    scores, score_names = evaluator.get_scores(silent=True)
    if args.is_regression:
        rmse, kendall, pearson, spearman, pearson_spearman, avg_loss = scores[:
                                                                              6]
        print('\n' + LOG_HEADER_REG)
        print(
            LOG_TEMPLATE_REG.format(split.upper(), rmse, kendall, pearson,
                                    spearman, pearson_spearman, avg_loss))
    else:
        precision, recall, f1, accuracy, avg_loss = scores[:5]
        print('\n' + LOG_HEADER_CLASS)
        print(
            LOG_TEMPLATE_CLASS.format(split.upper(), accuracy, precision,
                                      recall, f1, avg_loss))

    scores_dict = dict(zip(score_names, scores))
    with open(save_file, 'w') as f:
        f.write(json.dumps(scores_dict))
Example #9
0
    def train_layer_qroup(self, dataloader, to_freeze_layer, model_path):
        self.train_epoch(dataloader, freez_layer=to_freeze_layer)
        dev_evaluator = BertEvaluator(self.model,
                                      self.processor,
                                      self.args,
                                      split='dev')
        dev_acc, dev_precision, dev_recall, dev_f1, dev_loss, dev_f1_macro, dev_hamming_loss, dev_jaccard_score, dev_predicted_labels, dev_target_labels = \
        dev_evaluator.get_scores()[0]

        # Print validation results
        tqdm.write(self.log_header)
        tqdm.write(
            self.log_template.format(1, self.iterations, 1, self.args.epochs,
                                     dev_acc, dev_precision, dev_recall,
                                     dev_f1, dev_loss, dev_f1_macro,
                                     dev_hamming_loss, dev_jaccard_score))

        torch.save(self.model, model_path / f'{to_freeze_layer}.pt')
        # update learning rate
        for groups in self.optimizer.param_groups:
            lr = groups['lr'] if 'lr' in groups else self.args.lr
            groups['lr'] = 2e-5
Example #10
0
def ensemble_cal(model, processor, tokenizer, args, split='dev'):
    evaluator = BertEvaluator(model, processor, tokenizer, args, split)
    label, prediction = evaluator.get_pred(silent=True)
    return label, prediction
Example #11
0
class BertTrainer(object):
    def __init__(self, model, optimizer, processor, args):
        self.args = args
        self.model = model
        self.optimizer = optimizer
        self.processor = processor
        self.train_examples = self.processor.get_train_examples(args.data_dir)
        self.tokenizer = BertTokenizer.from_pretrained(
            args.model, is_lowercase=args.is_lowercase)

        timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        self.snapshot_path = os.path.join(self.args.save_path,
                                          self.processor.NAME,
                                          '%s.pt' % timestamp)

        self.num_train_optimization_steps = int(
            len(self.train_examples) / args.batch_size /
            args.gradient_accumulation_steps) * args.epochs
        if args.local_rank != -1:
            self.num_train_optimization_steps = args.num_train_optimization_steps // torch.distributed.get_world_size(
            )
        self.log_header = 'Epoch Iteration Progress   Dev/Acc.  Dev/Hamm.  Dev/Jacc.   Dev/Prec Dev/Rec Dev/micro-F1 Dev/F1  Dev/Loss'
        self.log_template = ' '.join(
            '{:>5.0f},{:>9.0f},{:>6.0f}/{:<5.0f} {:>6.4f},{:>8.4f},{:8.4f},{:8.4f},{:>8.4f},{:8.4f},{:8.4f},{:10.4f}'
            .split(','))

        self.iterations, self.nb_tr_steps, self.tr_loss = 0, 0, 0
        self.best_dev_f1, self.unimproved_iters = 0, 0
        self.early_stop = False

    def train_epoch(self, train_dataloader):
        for step, batch in enumerate(tqdm(train_dataloader, desc="Training")):
            self.model.train()
            batch = tuple(t.to(self.args.device) for t in batch)
            input_ids, input_mask, segment_ids, label_ids = batch
            logits = self.model(input_ids, segment_ids, input_mask)

            if self.args.is_multilabel:
                if self.args.fp16:
                    loss = F.binary_cross_entropy_with_logits(
                        logits, label_ids.half())
                else:
                    loss = F.binary_cross_entropy_with_logits(
                        logits, label_ids.float())
            else:
                loss = F.cross_entropy(logits, torch.argmax(label_ids, dim=1))

            if self.args.n_gpu > 1:
                loss = loss.mean()
            if self.args.gradient_accumulation_steps > 1:
                loss = loss / self.args.gradient_accumulation_steps

            if self.args.fp16:
                self.optimizer.backward(loss)
            else:
                loss.backward()

            self.tr_loss += loss.item()
            self.nb_tr_steps += 1
            if (step + 1) % self.args.gradient_accumulation_steps == 0:
                if self.args.fp16:
                    lr_this_step = self.args.lr * warmup_linear(
                        self.iterations / self.num_train_optimization_steps,
                        self.args.warmup_proportion)
                    for param_group in self.optimizer.param_groups:
                        param_group['lr'] = lr_this_step
                self.optimizer.step()
                self.optimizer.zero_grad()
                self.iterations += 1

    def train(self):
        if self.args.is_hierarchical:
            train_features = convert_examples_to_hierarchical_features(
                self.train_examples, self.args.max_seq_length, self.tokenizer)
        else:
            train_features = convert_examples_to_features(
                self.train_examples, self.args.max_seq_length, self.tokenizer)

        unpadded_input_ids = [f.input_ids for f in train_features]
        unpadded_input_mask = [f.input_mask for f in train_features]
        unpadded_segment_ids = [f.segment_ids for f in train_features]

        if self.args.is_hierarchical:
            pad_input_matrix(unpadded_input_ids, self.args.max_doc_length)
            pad_input_matrix(unpadded_input_mask, self.args.max_doc_length)
            pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length)

        print("Number of examples: ", len(self.train_examples))
        print("Batch size:", self.args.batch_size)
        print("Num of steps:", self.num_train_optimization_steps)

        padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long)
        padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long)
        padded_segment_ids = torch.tensor(unpadded_segment_ids,
                                          dtype=torch.long)
        label_ids = torch.tensor([f.label_id for f in train_features],
                                 dtype=torch.long)

        train_data = TensorDataset(padded_input_ids, padded_input_mask,
                                   padded_segment_ids, label_ids)

        if self.args.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            train_sampler = DistributedSampler(train_data)

        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=self.args.batch_size)

        for epoch in trange(int(self.args.epochs), desc="Epoch"):
            self.train_epoch(train_dataloader)
            self.dev_evaluator = BertEvaluator(self.model,
                                               self.processor,
                                               self.args,
                                               split='dev')
            dev_acc, dev_hamming, dev_jaccard, dev_precision, dev_recall, dev_f1_micro, dev_f1_macro, dev_loss = self.dev_evaluator.get_scores(
            )[0]
            # Print validation results
            tqdm.write(self.log_header)
            tqdm.write(
                self.log_template.format(epoch + 1, self.iterations, epoch + 1,
                                         self.args.epochs, dev_acc,
                                         dev_hamming, dev_jaccard,
                                         dev_precision, dev_recall,
                                         dev_f1_micro, dev_f1_macro, dev_loss))

            # Update validation results
            if dev_f1_micro > self.best_dev_f1:
                self.unimproved_iters = 0
                self.best_dev_f1 = dev_f1_micro
                torch.save(self.model, self.snapshot_path)

            else:
                self.unimproved_iters += 1
                if self.unimproved_iters >= self.args.patience:
                    self.early_stop = True
                    tqdm.write(
                        "Early Stopping. Epoch: {}, Best Dev F1: {}".format(
                            epoch, self.best_dev_f1))
                    break
Example #12
0
def evaluate_split(model, processor, tokenizer, args, split='dev'):
    evaluator = BertEvaluator(model, processor, tokenizer, args, split)
    scores, metric_names = evaluator.get_scores(silent=True)
    print('Evaluation metrics for')
    print(metric_names)
    print(scores)
Example #13
0
    def train_gradually(self):
        if self.args.is_hierarchical:
            train_features = convert_examples_to_hierarchical_features(
                self.train_examples, self.args.max_seq_length, self.tokenizer)
        else:
            train_features = convert_examples_to_features(
                self.train_examples, self.args.max_seq_length, self.tokenizer)

        unpadded_input_ids = [f.input_ids for f in train_features]
        unpadded_input_mask = [f.input_mask for f in train_features]
        unpadded_segment_ids = [f.segment_ids for f in train_features]

        if self.args.is_hierarchical:
            pad_input_matrix(unpadded_input_ids, self.args.max_doc_length)
            pad_input_matrix(unpadded_input_mask, self.args.max_doc_length)
            pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length)

        print("Number of examples: ", len(self.train_examples))
        print("Batch size:", self.args.batch_size)
        print("Num of steps:", self.num_train_optimization_steps)

        padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long)
        padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long)
        padded_segment_ids = torch.tensor(unpadded_segment_ids,
                                          dtype=torch.long)
        label_ids = torch.tensor([f.label_id for f in train_features],
                                 dtype=torch.long)

        train_data = TensorDataset(padded_input_ids, padded_input_mask,
                                   padded_segment_ids, label_ids)

        if self.args.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            train_sampler = DistributedSampler(train_data)

        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=self.args.batch_size)

        # train gradually
        model_path = self.snapshot_path.split('/')[0:-1]
        model_path = Path('/'.join(model_path))
        # freeze all layers except classifier

        self.train_layer_qroup(train_dataloader,
                               to_freeze_layer='classifier',
                               model_path=model_path)

        # freeze all layers expect pooler and its subsequents
        '''self.train_layer_qroup(train_dataloader, to_freeze_layer='bert.pooler', model_path=model_path)
        for i in range(11,-1, -1):
            self.train_layer_qroup(train_dataloader, to_freeze_layer='bert.encoder.layer.'+str(i), model_path=model_path)'''

        self.unfreez_all()

        for epoch in trange(int(self.args.epochs), desc="Epoch"):
            self.train_epoch(train_dataloader)
            dev_evaluator = BertEvaluator(self.model,
                                          self.processor,
                                          self.args,
                                          split='dev')
            dev_acc, dev_precision, dev_recall, dev_f1, dev_loss, dev_f1_macro, dev_hamming_loss, dev_jaccard_score, dev_predicted_labels, dev_target_labels = dev_evaluator.get_scores(
            )[0]

            # Print validation results
            tqdm.write(self.log_header)
            tqdm.write(
                self.log_template.format(epoch + 1, self.iterations, epoch + 1,
                                         self.args.epochs, dev_acc,
                                         dev_precision, dev_recall, dev_f1,
                                         dev_loss, dev_f1_macro,
                                         dev_hamming_loss, dev_jaccard_score))

            # Update validation results
            if dev_f1 > self.best_dev_f1:
                self.unimproved_iters = 0
                self.best_dev_f1 = dev_f1
                torch.save(self.model, self.snapshot_path)

            else:
                self.unimproved_iters += 1
                if self.unimproved_iters >= self.args.patience:
                    self.early_stop = True
                    tqdm.write(
                        "Early Stopping. Epoch: {}, Best Dev F1: {}".format(
                            epoch, self.best_dev_f1))
                    break
Example #14
0
    def train(self):
        if self.args.is_hierarchical:
            train_features = convert_examples_to_hierarchical_features(
                self.train_examples, self.args.max_seq_length, self.tokenizer)
        else:
            train_features = convert_examples_to_features(
                self.train_examples, self.args.max_seq_length, self.tokenizer)

        unpadded_input_ids = [f.input_ids for f in train_features]
        unpadded_input_mask = [f.input_mask for f in train_features]
        unpadded_segment_ids = [f.segment_ids for f in train_features]

        if self.args.is_hierarchical:
            pad_input_matrix(unpadded_input_ids, self.args.max_doc_length)
            pad_input_matrix(unpadded_input_mask, self.args.max_doc_length)
            pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length)

        print("Number of examples: ", len(self.train_examples))
        print("Batch size:", self.args.batch_size)
        print("Num of steps:", self.num_train_optimization_steps)

        padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long)
        padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long)
        padded_segment_ids = torch.tensor(unpadded_segment_ids,
                                          dtype=torch.long)
        label_ids = torch.tensor([f.label_id for f in train_features],
                                 dtype=torch.long)

        train_data = TensorDataset(padded_input_ids, padded_input_mask,
                                   padded_segment_ids, label_ids)

        if self.args.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            train_sampler = DistributedSampler(train_data)

        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=self.args.batch_size)

        for epoch in trange(int(self.args.epochs), desc="Epoch"):
            loss_epoch = self.train_epoch(train_dataloader)
            dev_evaluator = BertEvaluator(self.model,
                                          self.processor,
                                          self.args,
                                          split='dev')
            dev_acc, dev_precision, dev_recall, dev_f1, dev_loss, dev_f1_macro, dev_hamming_loss, dev_jaccard_score, dev_predicted_labels, dev_target_labels = dev_evaluator.get_scores(
            )[0]

            # Print validation results
            tqdm.write(self.log_header)
            tqdm.write(
                self.log_template.format(epoch + 1, self.iterations, epoch + 1,
                                         self.args.epochs, dev_acc,
                                         dev_precision, dev_recall, dev_f1,
                                         dev_loss, dev_f1_macro,
                                         dev_hamming_loss, dev_jaccard_score,
                                         loss_epoch))

            if self.args.early_on_f1:
                if dev_recall != 1:
                    dev_measure = dev_f1
                else:
                    dev_measure = 0
                measure_name = 'F1'
            else:
                dev_measure = dev_acc
                measure_name = 'Balanced Acc'

            # Update validation results
            if dev_measure > self.best_dev_measure:
                self.unimproved_iters = 0
                self.best_dev_measure = dev_measure
                torch.save(self.model, self.snapshot_path)

            else:
                self.unimproved_iters += 1
                if self.unimproved_iters >= self.args.patience:
                    self.early_stop = True
                    print("Early Stopping. Epoch: {}, Best {}: {}".format(
                        epoch, measure_name, self.best_dev_measure))
                    break
Example #15
0
    def train(self):
        if self.args.is_hierarchical:
            train_features = convert_examples_to_hierarchical_features(
                self.train_examples, self.args.max_seq_length, self.tokenizer)
        else:
            train_features = convert_examples_to_features(
                self.train_examples, self.args.max_seq_length, self.tokenizer)

        unpadded_input_ids = [f.input_ids for f in train_features]
        unpadded_input_mask = [f.input_mask for f in train_features]
        unpadded_segment_ids = [f.segment_ids for f in train_features]

        if self.args.is_hierarchical:
            pad_input_matrix(unpadded_input_ids, self.args.max_doc_length)
            pad_input_matrix(unpadded_input_mask, self.args.max_doc_length)
            pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length)

        print("Number of examples: ", len(self.train_examples))
        print("Batch size:", self.args.batch_size)
        print("Num of steps:", self.num_train_optimization_steps)

        padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long)
        padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long)
        padded_segment_ids = torch.tensor(unpadded_segment_ids,
                                          dtype=torch.long)
        label_ids = torch.tensor([f.label_id for f in train_features],
                                 dtype=torch.long)

        train_data = TensorDataset(padded_input_ids, padded_input_mask,
                                   padded_segment_ids, label_ids)

        if self.args.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            train_sampler = DistributedSampler(train_data)

        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=self.args.batch_size)

        # results for graphing learning curves
        results = []
        iterator = trange(int(self.args.epochs), desc="Epoch")
        for epoch in iterator:
            self.train_epoch(train_dataloader)
            dev_evaluator = BertEvaluator(self.model,
                                          self.processor,
                                          self.args,
                                          split='dev')
            dev_acc, dev_precision, dev_recall, dev_f1, dev_loss = dev_evaluator.get_scores(
            )[0]

            # Print validation results
            tqdm.write(self.log_header)
            tqdm.write(
                self.log_template.format(epoch + 1, self.iterations, epoch + 1,
                                         self.args.epochs, dev_acc,
                                         dev_precision, dev_recall, dev_f1,
                                         dev_loss))

            results.append([
                epoch + 1, dev_acc, dev_precision, dev_recall, dev_f1, dev_loss
            ])

            # Update validation results
            if dev_f1 > self.best_dev_f1:
                self.unimproved_iters = 0
                self.best_dev_f1 = dev_f1
                torch.save(self.model, self.snapshot_path)

            else:
                self.unimproved_iters += 1
                if self.unimproved_iters >= self.args.patience:
                    self.early_stop = True
                    tqdm.write(
                        "Early Stopping. Epoch: {}, Best Dev F1: {}".format(
                            epoch, self.best_dev_f1))
                    iterator.close()
                    break

        # create learning curves
        results_frame = pd.DataFrame(data=np.array(results),
                                     columns=['Epoch', 'Accuracy', 'Precision', 'Recall', 'F1', 'Loss']) \
            .set_index('Epoch')

        ax_acc = results_frame[['Accuracy', 'Precision', 'Recall',
                                'F1']].plot()
        ax_loss = results_frame[['Loss']].plot()

        ax_acc.get_figure().savefig('accuracy_curves.png')
        ax_loss.get_figure().savefig('loss_curves.png')
Example #16
0
    def train(self):
        if self.args.is_hierarchical:
            train_features = convert_examples_to_hierarchical_features(
                self.train_examples, self.args.max_seq_length, self.tokenizer)
        else:
            train_features = convert_examples_to_features(
                self.train_examples,
                self.args.max_seq_length,
                self.tokenizer,
                use_guid=True,
                is_regression=self.args.is_regression)

        unpadded_input_ids = [f.input_ids for f in train_features]
        unpadded_input_mask = [f.input_mask for f in train_features]
        unpadded_segment_ids = [f.segment_ids for f in train_features]

        if self.args.is_hierarchical:
            pad_input_matrix(unpadded_input_ids, self.args.max_doc_length)
            pad_input_matrix(unpadded_input_mask, self.args.max_doc_length)
            pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length)

        print("Number of examples: ", len(self.train_examples))
        print("Batch size:", self.args.batch_size)
        print("Num of steps:", self.num_train_optimization_steps)

        padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long)
        padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long)
        padded_segment_ids = torch.tensor(unpadded_segment_ids,
                                          dtype=torch.long)

        if self.args.is_regression:
            label_ids = torch.tensor([f.label_id for f in train_features],
                                     dtype=torch.float)
        else:
            label_ids = torch.tensor([f.label_id for f in train_features],
                                     dtype=torch.long)

        train_data = TensorDataset(padded_input_ids, padded_input_mask,
                                   padded_segment_ids, label_ids)

        train_sampler = RandomSampler(train_data)
        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=self.args.batch_size)

        print('Begin training: ',
              datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
        start_time = time.monotonic()
        for epoch in trange(int(self.args.epochs), desc="Epoch"):
            self.train_epoch(train_dataloader)
            print('Train loss: ', self.tr_loss)
            if epoch == 0:
                self.initial_tr_loss = self.tr_loss
            if self.args.evaluate_dev:
                dev_evaluator = BertEvaluator(self.model,
                                              self.processor,
                                              self.tokenizer,
                                              self.args,
                                              split='dev')
                dev_scores, dev_score_names = dev_evaluator.get_scores()
                dev_metric = dev_scores[dev_score_names.index(
                    self.args.eval_metric)]
                if self.args.is_regression:
                    dev_rmse, dev_kendall, dev_pearson, dev_spearman, dev_pearson_spearman, dev_loss = dev_scores[:
                                                                                                                  6]

                    # Print validation results
                    tqdm.write(self.log_header_regression)
                    tqdm.write(
                        self.log_template_regression.format(
                            epoch + 1, self.iterations, epoch + 1,
                            self.args.epochs, dev_rmse, dev_kendall,
                            dev_pearson, dev_spearman, dev_pearson_spearman,
                            dev_loss))

                else:
                    dev_precision, dev_recall, dev_f1, dev_acc, dev_loss = dev_scores[:
                                                                                      5]

                    # Print validation results
                    tqdm.write(self.log_header_classification)
                    tqdm.write(
                        self.log_template_classification.format(
                            epoch + 1, self.iterations, epoch + 1,
                            self.args.epochs, dev_acc, dev_precision,
                            dev_recall, dev_f1, dev_loss))

                # Update validation results
                dev_improved = self.check_dev_improved(dev_metric)
                if dev_improved:
                    self.unimproved_iters = 0
                    self.best_dev_metric = dev_metric
                    torch.save(self.model, self.snapshot_path)

                else:
                    self.unimproved_iters += 1
                    if self.unimproved_iters >= self.args.patience:
                        self.early_stop = True
                        tqdm.write(
                            "Early Stopping. Epoch: {}, Best Dev {}: {}".
                            format(epoch, self.args.eval_metric,
                                   self.best_dev_metric))
                        break
            if self.args.evaluate_test:
                if epoch == self.patience_training:
                    loss_percent = (self.initial_tr_loss -
                                    self.tr_loss) / self.initial_tr_loss
                    if loss_percent <= self.minimum_loss_percent_decrease:
                        self.training_converged = False
                        tqdm.write(
                            "Training failed to converge. Epoch: {}, Loss percent: {}"
                            .format(epoch, loss_percent))
                        break
        end_time = time.monotonic()

        # save model at end of training
        # when evaluating on test
        if self.args.evaluate_test:
            torch.save(self.model, self.snapshot_path)
        print('End training: ',
              datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
        print('Time elapsed: ', end_time - start_time)