예제 #1
0
    def train(self):
        train_features = convert_examples_to_features(self.train_examples,
                                                      self.args.max_seq_length,
                                                      self.tokenizer)

        print("Number of examples: ", len(self.train_examples))
        print("Batch size:", self.args.batch_size)
        print("Num of steps:", self.num_train_optimization_steps)

        all_input_ids = torch.tensor([f.input_ids for f in train_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in train_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in train_features],
                                       dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in train_features],
                                     dtype=torch.long)
        train_data = TensorDataset(all_input_ids, all_input_mask,
                                   all_segment_ids, all_label_ids)
        if self.args.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            train_sampler = DistributedSampler(train_data)

        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=self.args.batch_size)

        for epoch in trange(int(self.args.epochs), desc="Epoch"):
            self.train_epoch(train_dataloader)
            dev_evaluator = BertEvaluator(self.model,
                                          self.processor,
                                          self.args,
                                          split='dev')
            dev_acc, dev_precision, dev_recall, dev_f1, dev_loss = dev_evaluator.get_scores(
            )[0]

            # Print validation results
            tqdm.write(self.log_header)
            tqdm.write(
                self.log_template.format(epoch + 1, self.iterations, epoch + 1,
                                         self.args.epochs, dev_acc,
                                         dev_precision, dev_recall, dev_f1,
                                         dev_loss))

            # Update validation results
            if dev_f1 > self.best_dev_f1:
                self.unimproved_iters = 0
                self.best_dev_f1 = dev_f1
                torch.save(self.model, self.snapshot_path)

            else:
                self.unimproved_iters += 1
                if self.unimproved_iters >= self.args.patience:
                    self.early_stop = True
                    tqdm.write(
                        "Early Stopping. Epoch: {}, Best Dev F1: {}".format(
                            epoch, self.best_dev_f1))
                    break
예제 #2
0
    def get_scores(self, silent=False):

        eval_features = convert_examples_to_features(self.eval_examples, self.args.max_seq_length, self.tokenizer)

        unpadded_input_ids = [f.input_ids for f in eval_features]
        unpadded_input_mask = [f.input_mask for f in eval_features]
        unpadded_segment_ids = [f.segment_ids for f in eval_features]

        padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long)
        padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long)
        padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long)
        label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long)

        eval_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids)
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=self.args.train_batch_size,drop_last = True)

        self.model.eval()

        total_loss = 0
        nb_eval_steps, nb_eval_examples = 0, 0
        predicted_labels, target_labels = list(), list()

        for input_ids, input_mask, segment_ids,label_ids in tqdm(eval_dataloader, desc="Evaluating", disable=silent):
            input_ids = input_ids.to(self.args.device)
            input_mask = input_mask.to(self.args.device)
            segment_ids = segment_ids.to(self.args.device)
            label_ids = label_ids.to(self.args.device)

            with torch.no_grad():
                logits = self.model(input_ids.view(-1, 256), segment_ids.view(-1, 256), input_mask.view(-1, 256))

            if self.args.is_multilabel:
                predicted_labels.extend(F.sigmoid(logits).round().long().cpu().detach().numpy())
                target_labels.extend(label_ids.cpu().detach().numpy())
                loss = F.binary_cross_entropy_with_logits(logits, label_ids.float(), size_average=False)
            else:
                predicted_labels.extend(torch.argmax(logits, dim=1).cpu().detach().numpy())
                target_labels.extend(torch.argmax(label_ids, dim=1).cpu().detach().numpy())
                loss = F.cross_entropy(logits, torch.argmax(label_ids, dim=1))

            if self.args.n_gpu > 1:
                loss = loss.mean()
            if self.args.gradient_accumulation_steps > 1:
                loss = loss / self.args.gradient_accumulation_steps
            total_loss += loss.item()

            nb_eval_examples += input_ids.size(0)
            nb_eval_steps += 1

        predicted_labels, target_labels = np.array(predicted_labels), np.array(target_labels)
        accuracy = metrics.accuracy_score(target_labels, predicted_labels)
        precision = metrics.precision_score(target_labels, predicted_labels, average='micro')
        recall = metrics.recall_score(target_labels, predicted_labels, average='micro')
        f1 = metrics.f1_score(target_labels, predicted_labels, average='micro')
        avg_loss = total_loss / nb_eval_steps

        return [accuracy, precision, recall, f1, avg_loss], ['accuracy', 'precision', 'recall', 'f1', 'avg_loss']
예제 #3
0
    def get_bert_layers(self, silent=False, last_bert_layers=-1):
        if self.args.is_hierarchical:
            eval_features = convert_examples_to_hierarchical_features(
                self.eval_examples, self.args.max_seq_length, self.tokenizer)
        else:
            eval_features = convert_examples_to_features(
                self.eval_examples, self.args.max_seq_length, self.tokenizer)

        unpadded_input_ids = [f.input_ids for f in eval_features]
        unpadded_input_mask = [f.input_mask for f in eval_features]
        unpadded_segment_ids = [f.segment_ids for f in eval_features]

        if self.args.is_hierarchical:
            pad_input_matrix(unpadded_input_ids, self.args.max_doc_length)
            pad_input_matrix(unpadded_input_mask, self.args.max_doc_length)
            pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length)

        padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long)
        padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long)
        padded_segment_ids = torch.tensor(unpadded_segment_ids,
                                          dtype=torch.long)
        label_ids = torch.tensor([f.label_id for f in eval_features],
                                 dtype=torch.long)

        eval_data = TensorDataset(padded_input_ids, padded_input_mask,
                                  padded_segment_ids, label_ids)
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data,
                                     sampler=eval_sampler,
                                     batch_size=self.args.batch_size)

        self.model.eval()

        bert_layers_l, label_ids_l = [], []

        for input_ids, input_mask, segment_ids, label_ids in tqdm(
                eval_dataloader, desc="Evaluating", disable=silent):
            input_ids = input_ids.to(self.args.device)
            input_mask = input_mask.to(self.args.device)
            segment_ids = segment_ids.to(self.args.device)
            label_ids = label_ids.to(self.args.device)

            with torch.no_grad():
                bert_layers = self.model.get_bert_embedding(
                    input_ids,
                    segment_ids,
                    input_mask,
                    last_bert_layers=last_bert_layers)

            label_ids = torch.argmax(label_ids, dim=1).cpu().detach().numpy()
            bert_layers_l.extend(bert_layers)
            label_ids_l.extend(label_ids)
        bert_layers_l = torch.stack(bert_layers_l, dim=0)
        #label_ids_l = torch.stack(label_ids_l, dim=0)
        return bert_layers_l, label_ids_l
예제 #4
0
    def get_scores(self, silent=False):
        if self.args.is_hierarchical:
            eval_features = convert_examples_to_hierarchical_features(
                self.eval_examples, self.args.max_seq_length, self.tokenizer)
        else:
            eval_features = convert_examples_to_features(
                self.eval_examples,
                self.args.max_seq_length,
                self.tokenizer,
                use_guid=True,
                is_regression=self.args.is_regression)

        unpadded_input_ids = [f.input_ids for f in eval_features]
        unpadded_input_mask = [f.input_mask for f in eval_features]
        unpadded_segment_ids = [f.segment_ids for f in eval_features]

        if self.args.is_hierarchical:
            pad_input_matrix(unpadded_input_ids, self.args.max_doc_length)
            pad_input_matrix(unpadded_input_mask, self.args.max_doc_length)
            pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length)

        padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long)
        padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long)
        padded_segment_ids = torch.tensor(unpadded_segment_ids,
                                          dtype=torch.long)
        if self.args.is_regression:
            label_ids = torch.tensor([f.label_id for f in eval_features],
                                     dtype=torch.float)
        else:
            label_ids = torch.tensor([f.label_id for f in eval_features],
                                     dtype=torch.long)
        doc_ids = torch.tensor([f.guid for f in eval_features],
                               dtype=torch.long)

        eval_data = TensorDataset(padded_input_ids, padded_input_mask,
                                  padded_segment_ids, label_ids, doc_ids)
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data,
                                     sampler=eval_sampler,
                                     batch_size=self.args.batch_size)

        self.model.eval()

        total_loss = 0
        nb_eval_steps, nb_eval_examples = 0, 0
        predicted_labels, target_labels, target_doc_ids = list(), list(), list(
        )

        for input_ids, input_mask, segment_ids, label_ids, doc_ids in tqdm(
                eval_dataloader, desc="Evaluating", disable=silent):
            input_ids = input_ids.to(self.args.device)
            input_mask = input_mask.to(self.args.device)
            segment_ids = segment_ids.to(self.args.device)
            label_ids = label_ids.to(self.args.device)
            target_doc_ids.extend(doc_ids.tolist())

            with torch.no_grad():
                logits = self.model(input_ids=input_ids,
                                    attention_mask=input_mask,
                                    token_type_ids=segment_ids)[0]

            if self.args.is_multilabel:
                predicted_labels.extend(
                    F.sigmoid(logits).round().long().cpu().detach().numpy())
                target_labels.extend(label_ids.cpu().detach().numpy())
                # if self.args.pos_weights:
                #     pos_weights = [float(w) for w in self.args.pos_weights.split(',')]
                #     pos_weight = torch.FloatTensor(pos_weights)
                # else:
                #     pos_weight = torch.ones([self.args.num_labels])
                if self.args.loss == 'cross-entropy':
                    criterion = torch.nn.BCEWithLogitsLoss(size_average=False)
                    loss = criterion(logits.cpu(), label_ids.float().cpu())
                elif self.args.loss == 'mse':
                    criterion = torch.nn.MSELoss(size_average=False)
                    m = torch.nn.Sigmoid()
                    loss = criterion(m(logits.cpu()), label_ids.float().cpu())
            else:
                if self.args.num_labels > 2:
                    predicted_labels.extend(
                        torch.argmax(logits, dim=1).cpu().detach().numpy())
                    target_labels.extend(label_ids.cpu().detach().numpy())
                    loss = F.cross_entropy(logits,
                                           torch.argmax(label_ids, dim=1))
                else:
                    if self.args.is_regression:
                        predicted_labels.extend(
                            logits.view(-1).cpu().detach().numpy())
                        target_labels.extend(
                            label_ids.view(-1).cpu().detach().numpy())
                        criterion = torch.nn.MSELoss()
                        loss = criterion(
                            logits.view(-1).cpu(),
                            label_ids.view(-1).cpu())
                    else:
                        predicted_labels.extend(
                            torch.argmax(logits, dim=1).cpu().detach().numpy())
                        target_labels.extend(label_ids.cpu().detach().numpy())
                        loss_fct = torch.nn.CrossEntropyLoss()
                        loss = loss_fct(logits.view(-1, self.args.num_labels),
                                        label_ids.view(-1))

            if self.args.n_gpu > 1:
                loss = loss.mean()
            if self.args.gradient_accumulation_steps > 1:
                loss = loss / self.args.gradient_accumulation_steps
            total_loss += loss.item()

            nb_eval_examples += input_ids.size(0)
            nb_eval_steps += 1

        avg_loss = total_loss / nb_eval_steps
        predicted_label_sets = [
            predicted_label.tolist() for predicted_label in predicted_labels
        ]
        target_label_sets = [
            target_label.tolist() for target_label in target_labels
        ]

        if self.args.is_regression:

            rmse, kendall, pearson, spearman, pearson_spearman = evaluate_for_regression(
                target_labels, predicted_labels)
            score_values = [
                rmse.tolist(), kendall, pearson, spearman, pearson_spearman,
                avg_loss,
                list(
                    zip(target_doc_ids, target_label_sets,
                        predicted_label_sets))
            ]
            score_names = [
                METRIC_RMSE, METRIC_KENDALL, METRIC_PEARSON, METRIC_SPEARMAN,
                METRIC_PEARSON_SPEARMAN, 'avg_loss',
                'label_set_info (id/gold/pred)'
            ]

        else:
            hamming_loss = metrics.hamming_loss(target_labels,
                                                predicted_labels)

            predicted_labels, target_labels = np.array(
                predicted_labels), np.array(target_labels)
            cm = metrics.multilabel_confusion_matrix(target_labels,
                                                     predicted_labels)
            accuracy = metrics.accuracy_score(target_labels, predicted_labels)

            if self.args.num_labels == 2:
                precision = metrics.precision_score(target_labels,
                                                    predicted_labels,
                                                    average='binary')
                recall = metrics.recall_score(target_labels,
                                              predicted_labels,
                                              average='binary')
                f1 = evaluate_with_metric(target_labels, predicted_labels,
                                          METRIC_F1_BINARY)
            else:
                precision_micro = metrics.precision_score(target_labels,
                                                          predicted_labels,
                                                          average='micro')
                recall_micro = metrics.recall_score(target_labels,
                                                    predicted_labels,
                                                    average='micro')
                f1_micro = metrics.f1_score(target_labels,
                                            predicted_labels,
                                            average='micro')
                f1_macro = evaluate_with_metric(target_labels,
                                                predicted_labels,
                                                METRIC_F1_MACRO)
                precision_macro = metrics.precision_score(target_labels,
                                                          predicted_labels,
                                                          average='macro')
                recall_macro = metrics.recall_score(target_labels,
                                                    predicted_labels,
                                                    average='macro')

                precision_class, recall_class, f1_class, support_class = metrics.precision_recall_fscore_support(
                    target_labels, predicted_labels)

            if self.args.num_labels == 2:
                score_values = [
                    precision, recall, f1, accuracy, avg_loss, hamming_loss,
                    cm.tolist(),
                    list(
                        zip(target_doc_ids, target_label_sets,
                            predicted_label_sets))
                ]
                score_names = [
                    'precision', 'recall', 'f1', 'accuracy', 'avg_loss',
                    'hamming_loss', 'confusion_matrix',
                    'label_set_info (id/gold/pred)'
                ]
            else:
                score_values = [
                    precision_macro, recall_macro, f1_macro, accuracy,
                    avg_loss, hamming_loss, precision_micro, recall_micro,
                    f1_micro,
                    precision_class.tolist(),
                    recall_class.tolist(),
                    f1_class.tolist(),
                    support_class.tolist(),
                    cm.tolist(),
                    list(
                        zip(target_doc_ids, target_label_sets,
                            predicted_label_sets))
                ]
                score_names = [
                    'precision_macro', 'recall_macro', METRIC_F1_MACRO,
                    'accuracy', 'avg_loss', 'hamming_loss', 'precision_micro',
                    'recall_micro', 'f1_micro', 'precision_class',
                    'recall_class', 'f1_class', 'support_class',
                    'confusion_matrix', 'label_set_info (id/gold/pred)'
                ]
        return score_values, score_names
예제 #5
0
    def train_gradually(self):
        if self.args.is_hierarchical:
            train_features = convert_examples_to_hierarchical_features(
                self.train_examples, self.args.max_seq_length, self.tokenizer)
        else:
            train_features = convert_examples_to_features(
                self.train_examples, self.args.max_seq_length, self.tokenizer)

        unpadded_input_ids = [f.input_ids for f in train_features]
        unpadded_input_mask = [f.input_mask for f in train_features]
        unpadded_segment_ids = [f.segment_ids for f in train_features]

        if self.args.is_hierarchical:
            pad_input_matrix(unpadded_input_ids, self.args.max_doc_length)
            pad_input_matrix(unpadded_input_mask, self.args.max_doc_length)
            pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length)

        print("Number of examples: ", len(self.train_examples))
        print("Batch size:", self.args.batch_size)
        print("Num of steps:", self.num_train_optimization_steps)

        padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long)
        padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long)
        padded_segment_ids = torch.tensor(unpadded_segment_ids,
                                          dtype=torch.long)
        label_ids = torch.tensor([f.label_id for f in train_features],
                                 dtype=torch.long)

        train_data = TensorDataset(padded_input_ids, padded_input_mask,
                                   padded_segment_ids, label_ids)

        if self.args.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            train_sampler = DistributedSampler(train_data)

        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=self.args.batch_size)

        # train gradually
        model_path = self.snapshot_path.split('/')[0:-1]
        model_path = Path('/'.join(model_path))
        # freeze all layers except classifier

        self.train_layer_qroup(train_dataloader,
                               to_freeze_layer='classifier',
                               model_path=model_path)

        # freeze all layers expect pooler and its subsequents
        '''self.train_layer_qroup(train_dataloader, to_freeze_layer='bert.pooler', model_path=model_path)
        for i in range(11,-1, -1):
            self.train_layer_qroup(train_dataloader, to_freeze_layer='bert.encoder.layer.'+str(i), model_path=model_path)'''

        self.unfreez_all()

        for epoch in trange(int(self.args.epochs), desc="Epoch"):
            self.train_epoch(train_dataloader)
            dev_evaluator = BertEvaluator(self.model,
                                          self.processor,
                                          self.args,
                                          split='dev')
            dev_acc, dev_precision, dev_recall, dev_f1, dev_loss, dev_f1_macro, dev_hamming_loss, dev_jaccard_score, dev_predicted_labels, dev_target_labels = dev_evaluator.get_scores(
            )[0]

            # Print validation results
            tqdm.write(self.log_header)
            tqdm.write(
                self.log_template.format(epoch + 1, self.iterations, epoch + 1,
                                         self.args.epochs, dev_acc,
                                         dev_precision, dev_recall, dev_f1,
                                         dev_loss, dev_f1_macro,
                                         dev_hamming_loss, dev_jaccard_score))

            # Update validation results
            if dev_f1 > self.best_dev_f1:
                self.unimproved_iters = 0
                self.best_dev_f1 = dev_f1
                torch.save(self.model, self.snapshot_path)

            else:
                self.unimproved_iters += 1
                if self.unimproved_iters >= self.args.patience:
                    self.early_stop = True
                    tqdm.write(
                        "Early Stopping. Epoch: {}, Best Dev F1: {}".format(
                            epoch, self.best_dev_f1))
                    break
예제 #6
0
    def train(self):
        if self.args.is_hierarchical:
            train_features = convert_examples_to_hierarchical_features(
                self.train_examples, self.args.max_seq_length, self.tokenizer)
        else:
            train_features = convert_examples_to_features(
                self.train_examples, self.args.max_seq_length, self.tokenizer)

        unpadded_input_ids = [f.input_ids for f in train_features]
        unpadded_input_mask = [f.input_mask for f in train_features]
        unpadded_segment_ids = [f.segment_ids for f in train_features]

        if self.args.is_hierarchical:
            pad_input_matrix(unpadded_input_ids, self.args.max_doc_length)
            pad_input_matrix(unpadded_input_mask, self.args.max_doc_length)
            pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length)

        print("Number of examples: ", len(self.train_examples))
        print("Batch size:", self.args.batch_size)
        print("Num of steps:", self.num_train_optimization_steps)

        padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long)
        padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long)
        padded_segment_ids = torch.tensor(unpadded_segment_ids,
                                          dtype=torch.long)
        label_ids = torch.tensor([f.label_id for f in train_features],
                                 dtype=torch.long)

        train_data = TensorDataset(padded_input_ids, padded_input_mask,
                                   padded_segment_ids, label_ids)

        if self.args.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            train_sampler = DistributedSampler(train_data)

        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=self.args.batch_size)

        for epoch in trange(int(self.args.epochs), desc="Epoch"):
            loss_epoch = self.train_epoch(train_dataloader)
            dev_evaluator = BertEvaluator(self.model,
                                          self.processor,
                                          self.args,
                                          split='dev')
            dev_acc, dev_precision, dev_recall, dev_f1, dev_loss, dev_f1_macro, dev_hamming_loss, dev_jaccard_score, dev_predicted_labels, dev_target_labels = dev_evaluator.get_scores(
            )[0]

            # Print validation results
            tqdm.write(self.log_header)
            tqdm.write(
                self.log_template.format(epoch + 1, self.iterations, epoch + 1,
                                         self.args.epochs, dev_acc,
                                         dev_precision, dev_recall, dev_f1,
                                         dev_loss, dev_f1_macro,
                                         dev_hamming_loss, dev_jaccard_score,
                                         loss_epoch))

            if self.args.early_on_f1:
                if dev_recall != 1:
                    dev_measure = dev_f1
                else:
                    dev_measure = 0
                measure_name = 'F1'
            else:
                dev_measure = dev_acc
                measure_name = 'Balanced Acc'

            # Update validation results
            if dev_measure > self.best_dev_measure:
                self.unimproved_iters = 0
                self.best_dev_measure = dev_measure
                torch.save(self.model, self.snapshot_path)

            else:
                self.unimproved_iters += 1
                if self.unimproved_iters >= self.args.patience:
                    self.early_stop = True
                    print("Early Stopping. Epoch: {}, Best {}: {}".format(
                        epoch, measure_name, self.best_dev_measure))
                    break
예제 #7
0
    def train(self):
        if self.args.is_hierarchical:
            train_features = convert_examples_to_hierarchical_features(
                self.train_examples, self.args.max_seq_length, self.tokenizer)
        else:
            train_features = convert_examples_to_features(
                self.train_examples, self.args.max_seq_length, self.tokenizer)

        unpadded_input_ids = [f.input_ids for f in train_features]
        unpadded_input_mask = [f.input_mask for f in train_features]
        unpadded_segment_ids = [f.segment_ids for f in train_features]

        if self.args.is_hierarchical:
            pad_input_matrix(unpadded_input_ids, self.args.max_doc_length)
            pad_input_matrix(unpadded_input_mask, self.args.max_doc_length)
            pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length)

        print("Number of examples: ", len(self.train_examples))
        print("Batch size:", self.args.batch_size)
        print("Num of steps:", self.num_train_optimization_steps)

        padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long)
        padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long)
        padded_segment_ids = torch.tensor(unpadded_segment_ids,
                                          dtype=torch.long)
        label_ids = torch.tensor([f.label_id for f in train_features],
                                 dtype=torch.long)

        train_data = TensorDataset(padded_input_ids, padded_input_mask,
                                   padded_segment_ids, label_ids)

        if self.args.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            train_sampler = DistributedSampler(train_data)

        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=self.args.batch_size)

        # results for graphing learning curves
        results = []
        iterator = trange(int(self.args.epochs), desc="Epoch")
        for epoch in iterator:
            self.train_epoch(train_dataloader)
            dev_evaluator = BertEvaluator(self.model,
                                          self.processor,
                                          self.args,
                                          split='dev')
            dev_acc, dev_precision, dev_recall, dev_f1, dev_loss = dev_evaluator.get_scores(
            )[0]

            # Print validation results
            tqdm.write(self.log_header)
            tqdm.write(
                self.log_template.format(epoch + 1, self.iterations, epoch + 1,
                                         self.args.epochs, dev_acc,
                                         dev_precision, dev_recall, dev_f1,
                                         dev_loss))

            results.append([
                epoch + 1, dev_acc, dev_precision, dev_recall, dev_f1, dev_loss
            ])

            # Update validation results
            if dev_f1 > self.best_dev_f1:
                self.unimproved_iters = 0
                self.best_dev_f1 = dev_f1
                torch.save(self.model, self.snapshot_path)

            else:
                self.unimproved_iters += 1
                if self.unimproved_iters >= self.args.patience:
                    self.early_stop = True
                    tqdm.write(
                        "Early Stopping. Epoch: {}, Best Dev F1: {}".format(
                            epoch, self.best_dev_f1))
                    iterator.close()
                    break

        # create learning curves
        results_frame = pd.DataFrame(data=np.array(results),
                                     columns=['Epoch', 'Accuracy', 'Precision', 'Recall', 'F1', 'Loss']) \
            .set_index('Epoch')

        ax_acc = results_frame[['Accuracy', 'Precision', 'Recall',
                                'F1']].plot()
        ax_loss = results_frame[['Loss']].plot()

        ax_acc.get_figure().savefig('accuracy_curves.png')
        ax_loss.get_figure().savefig('loss_curves.png')
예제 #8
0
    def get_pred(self, silent=False):
        if self.args.is_hierarchical:
            eval_features = convert_examples_to_hierarchical_features(
                self.eval_examples, self.args.max_seq_length, self.tokenizer)
        else:
            eval_features = convert_examples_to_features(
                self.eval_examples, self.args.max_seq_length, self.tokenizer)

        unpadded_input_ids = [f.input_ids for f in eval_features]
        unpadded_input_mask = [f.input_mask for f in eval_features]
        unpadded_segment_ids = [f.segment_ids for f in eval_features]

        if self.args.is_hierarchical:
            pad_input_matrix(unpadded_input_ids, self.args.max_doc_length)
            pad_input_matrix(unpadded_input_mask, self.args.max_doc_length)
            pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length)

        padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long)
        padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long)
        padded_segment_ids = torch.tensor(unpadded_segment_ids,
                                          dtype=torch.long)
        label_ids = torch.tensor([f.label_id for f in eval_features],
                                 dtype=torch.long)

        eval_data = TensorDataset(padded_input_ids, padded_input_mask,
                                  padded_segment_ids, label_ids)
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data,
                                     sampler=eval_sampler,
                                     batch_size=self.args.batch_size)

        self.model.eval()

        total_loss = 0
        nb_eval_steps, nb_eval_examples = 0, 0
        predicted_labels, target_labels = list(), list()

        output_preds = None
        for input_ids, input_mask, segment_ids, label_ids in tqdm(
                eval_dataloader, desc="Evaluating", disable=silent):
            input_ids = input_ids.to(self.args.device)
            input_mask = input_mask.to(self.args.device)
            segment_ids = segment_ids.to(self.args.device)
            label_ids = label_ids.to(self.args.device)

            with torch.no_grad():
                logits = self.model(input_ids, input_mask, segment_ids)[0]

            if output_preds is None:
                output_preds = logits.cpu().detach().numpy()
                if self.args.is_multilabel:
                    target_labels = label_ids.cpu().detach().numpy()
                else:
                    target_labels = torch.argmax(label_ids,
                                                 dim=1).cpu().detach().numpy()
            else:
                output_preds = np.append(output_preds,
                                         logits.cpu().detach().numpy(),
                                         axis=0)
                if self.args.is_multilabel:
                    target_labels = np.append(target_labels,
                                              label_ids.cpu().detach().numpy(),
                                              axis=0)
                else:
                    target_labels = np.append(
                        target_labels,
                        torch.argmax(label_ids, dim=1).cpu().detach().numpy(),
                        axis=0)

        #     if self.args.is_multilabel:
        #         predicted_labels.extend(F.sigmoid(logits).round().long().cpu().detach().numpy())
        #         target_labels.extend(label_ids.cpu().detach().numpy())
        #         loss = F.binary_cross_entropy_with_logits(logits, label_ids.float(), size_average=False)
        #     else:
        #         predicted_labels.extend(torch.argmax(logits, dim=1).cpu().detach().numpy())
        #         target_labels.extend(torch.argmax(label_ids, dim=1).cpu().detach().numpy())
        #         loss = F.cross_entropy(logits, torch.argmax(label_ids, dim=1))
        #
        #     if self.args.n_gpu > 1:
        #         loss = loss.mean()
        #     if self.args.gradient_accumulation_steps > 1:
        #         loss = loss / self.args.gradient_accumulation_steps
        #     total_loss += loss.item()
        #
        #     nb_eval_examples += input_ids.size(0)
        #     nb_eval_steps += 1
        #
        # predicted_labels, target_labels = np.array(predicted_labels), np.array(target_labels)
        # accuracy = metrics.accuracy_score(target_labels, predicted_labels)
        # precision = metrics.precision_score(target_labels, predicted_labels, average='micro')
        # recall = metrics.recall_score(target_labels, predicted_labels, average='micro')
        # f1 = metrics.f1_score(target_labels, predicted_labels, average='micro')
        # avg_loss = total_loss / nb_eval_steps
        # 需要把每一个的label id和logits都 concate起来
        return target_labels, output_preds
    def get_scores(self, silent=False):
        eval_features = convert_examples_to_features(self.eval_examples,
                                                     self.args.max_seq_length,
                                                     self.tokenizer,
                                                     use_guid=True)

        unpadded_input_ids = [f.input_ids for f in eval_features]
        unpadded_input_mask = [f.input_mask for f in eval_features]
        unpadded_segment_ids = [f.segment_ids for f in eval_features]

        padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long)
        padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long)
        padded_segment_ids = torch.tensor(unpadded_segment_ids,
                                          dtype=torch.long)
        label_ids_fine = torch.tensor([f.label_id for f in eval_features],
                                      dtype=torch.long)
        doc_ids = torch.tensor([f.guid for f in eval_features],
                               dtype=torch.long)

        eval_data = TensorDataset(padded_input_ids, padded_input_mask,
                                  padded_segment_ids, label_ids_fine, doc_ids)
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data,
                                     sampler=eval_sampler,
                                     batch_size=self.args.batch_size)

        self.model.eval()

        total_loss_fine, total_loss_coarse = 0, 0
        nb_eval_steps, nb_eval_examples = 0, 0
        predicted_labels_coarse, predicted_labels_fine = list(), list()
        target_labels_coarse, target_labels_fine = list(), list()
        target_doc_ids = list()

        for input_ids, input_mask, segment_ids, label_ids_fine, doc_ids in tqdm(
                eval_dataloader, desc="Evaluating", disable=silent):
            input_ids = input_ids.to(self.args.device)
            input_mask = input_mask.to(self.args.device)
            segment_ids = segment_ids.to(self.args.device)
            label_ids_fine = label_ids_fine.to(self.args.device)
            target_doc_ids.extend(doc_ids.tolist())

            with torch.no_grad():
                logits_coarse, logits_fine = self.model(
                    input_ids=input_ids,
                    attention_mask=input_mask,
                    token_type_ids=segment_ids)

            preds_coarse = torch.sigmoid(
                logits_coarse).round().long().cpu().detach().numpy()
            predicted_labels_coarse.extend(preds_coarse)
            # get coarse labels from the fine labels
            label_ids_coarse = get_coarse_labels(
                label_ids_fine, self.args.num_coarse_labels,
                self.args.parent_to_child_index_map, self.args.device)

            target_labels_coarse.extend(
                label_ids_coarse.cpu().detach().numpy())

            # mask fine predictions using coarse predictions
            preds_fine = torch.sigmoid(
                logits_fine).round().long().cpu().detach().numpy()
            mask_fine = get_fine_mask(torch.Tensor(preds_coarse),
                                      self.args.parent_to_child_index_map)
            preds_fine[~mask_fine] = 0
            predicted_labels_fine.extend(preds_fine)

            target_labels_fine.extend(label_ids_fine.cpu().detach().numpy())

            if self.args.loss == 'cross-entropy':
                criterion = torch.nn.BCEWithLogitsLoss(reduction='sum')
                loss_fine = criterion(logits_fine.cpu(),
                                      label_ids_fine.float().cpu())
                loss_coarse = criterion(logits_coarse.cpu(),
                                        label_ids_coarse.float().cpu())
            elif self.args.loss == 'mse':
                criterion = torch.nn.MSELoss(reduction='sum')
                m = torch.nn.Sigmoid()
                loss_fine = criterion(m(logits_fine.cpu()),
                                      label_ids_fine.float().cpu())
                loss_coarse = criterion(m(logits_coarse.cpu()),
                                        label_ids_coarse.float().cpu())

            total_loss_fine += loss_fine.item()
            total_loss_coarse += loss_coarse.item()

            nb_eval_examples += input_ids.size(0)
            nb_eval_steps += 1

        metrics_fine = get_metrics(target_labels_fine, predicted_labels_fine,
                                   target_doc_ids, total_loss_fine,
                                   nb_eval_steps)
        metrics_coarse = get_metrics(target_labels_coarse,
                                     predicted_labels_coarse, target_doc_ids,
                                     total_loss_coarse, nb_eval_steps)

        metric_names = [
            'precision_macro', 'recall_macro', 'f1_macro', 'accuracy',
            'avg_loss', 'hamming_loss', 'precision_micro', 'recall_micro',
            'f1_micro', 'precision_class', 'recall_class', 'f1_class',
            'support_class', 'confusion_matrix', 'id_gold_pred'
        ]

        metric_names_fine = [name + '_fine' for name in metric_names]
        metric_names_coarse = [name + '_coarse' for name in metric_names]
        return [metrics_fine,
                metric_names_fine], [metrics_coarse, metric_names_coarse]
예제 #10
0
    def get_scores(self, silent=False):
        if self.args.is_hierarchical:
            eval_features = convert_examples_to_hierarchical_features(
                self.eval_examples, self.args.max_seq_length, self.tokenizer)
        else:
            eval_features = convert_examples_to_features(
                self.eval_examples, self.args.max_seq_length, self.tokenizer)

        unpadded_input_ids = [f.input_ids for f in eval_features]
        unpadded_input_mask = [f.input_mask for f in eval_features]
        unpadded_segment_ids = [f.segment_ids for f in eval_features]

        if self.args.is_hierarchical:
            pad_input_matrix(unpadded_input_ids, self.args.max_doc_length)
            pad_input_matrix(unpadded_input_mask, self.args.max_doc_length)
            pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length)

        padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long)
        padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long)
        padded_segment_ids = torch.tensor(unpadded_segment_ids,
                                          dtype=torch.long)
        label_ids = torch.tensor([f.label_id for f in eval_features],
                                 dtype=torch.long)

        eval_data = TensorDataset(padded_input_ids, padded_input_mask,
                                  padded_segment_ids, label_ids)
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data,
                                     sampler=eval_sampler,
                                     batch_size=self.args.batch_size)

        self.model.eval()

        total_loss = 0
        nb_eval_steps, nb_eval_examples = 0, 0
        predicted_labels, target_labels = list(), list()

        for input_ids, input_mask, segment_ids, label_ids in tqdm(
                eval_dataloader, desc="Evaluating", disable=silent):
            input_ids = input_ids.to(self.args.device)
            input_mask = input_mask.to(self.args.device)
            segment_ids = segment_ids.to(self.args.device)
            label_ids = label_ids.to(self.args.device)

            with torch.no_grad():
                logits = self.model(input_ids, segment_ids, input_mask)
                if isinstance(logits, tuple):
                    logits, _ = logits

            if self.args.is_multilabel:
                predicted_labels.extend(
                    F.sigmoid(logits).round().long().cpu().detach().numpy())
                target_labels.extend(label_ids.cpu().detach().numpy())
                loss = F.binary_cross_entropy_with_logits(logits,
                                                          label_ids.float(),
                                                          size_average=False)
                average, average_mac = 'micro', 'macro'

            else:
                predicted_labels.extend(
                    torch.argmax(logits, dim=1).cpu().detach().numpy())
                target_labels.extend(
                    torch.argmax(label_ids, dim=1).cpu().detach().numpy())
                loss = F.cross_entropy(logits, torch.argmax(label_ids, dim=1))
                average, average_mac = 'binary', 'binary'

            if self.args.n_gpu > 1:
                loss = loss.mean()
            if self.args.gradient_accumulation_steps > 1:
                loss = loss / self.args.gradient_accumulation_steps
            total_loss += loss.item()

            nb_eval_examples += input_ids.size(0)
            nb_eval_steps += 1

        predicted_labels, target_labels = np.array(predicted_labels), np.array(
            target_labels)
        accuracy = metrics.balanced_accuracy_score(target_labels,
                                                   predicted_labels)
        #accuracy = metrics.accuracy_score(target_labels, predicted_labels)
        precision = metrics.precision_score(target_labels,
                                            predicted_labels,
                                            average=average)
        recall = metrics.recall_score(target_labels,
                                      predicted_labels,
                                      average=average)
        avg_loss = total_loss / nb_eval_steps

        hamming_loss = metrics.hamming_loss(target_labels, predicted_labels)
        jaccard_score = metrics.jaccard_score(target_labels,
                                              predicted_labels,
                                              average=average)
        f1_micro = metrics.f1_score(target_labels,
                                    predicted_labels,
                                    average=average)
        f1_macro = metrics.f1_score(target_labels,
                                    predicted_labels,
                                    average=average_mac)

        return [accuracy, precision, recall, f1_micro, avg_loss, f1_macro, hamming_loss, jaccard_score, predicted_labels, target_labels],\
               ['accuracy', 'precision', 'recall', 'f1_micro', 'avg_loss', 'f1_macro', 'hamming_loss', 'jaccard', 'predicted_labels', 'target_labels']
예제 #11
0
    def get_scores(self, silent=False):
        if self.args.is_hierarchical:
            eval_features = convert_examples_to_hierarchical_features(
                self.eval_examples, self.args.max_seq_length, self.tokenizer)
        else:
            if 'longformer' in self.args.model:
                eval_features = convert_examples_to_features_long(
                    self.eval_examples, self.args.max_seq_length,
                    self.tokenizer)
            elif 'reformer' in self.args.model:
                eval_features = convert_examples_to_features_long(
                    self.eval_examples, self.args.max_seq_length,
                    self.tokenizer, 'reformer')
            else:
                eval_features = convert_examples_to_features(
                    self.eval_examples, self.args.max_seq_length,
                    self.tokenizer)

        unpadded_input_ids = [f.input_ids for f in eval_features]
        unpadded_input_mask = [f.input_mask for f in eval_features]
        unpadded_segment_ids = [f.segment_ids for f in eval_features]

        if self.args.is_hierarchical:
            pad_input_matrix(unpadded_input_ids, self.args.max_doc_length)
            pad_input_matrix(unpadded_input_mask, self.args.max_doc_length)
            pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length)

        padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long)
        padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long)
        padded_segment_ids = torch.tensor(unpadded_segment_ids,
                                          dtype=torch.long)
        label_ids = torch.tensor([f.label_id for f in eval_features],
                                 dtype=torch.long)

        eval_data = TensorDataset(padded_input_ids, padded_input_mask,
                                  padded_segment_ids, label_ids)
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data,
                                     sampler=eval_sampler,
                                     batch_size=self.args.batch_size)

        self.model.eval()

        total_loss = 0
        nb_eval_steps, nb_eval_examples = 0, 0
        predicted_labels, target_labels = list(), list()

        for input_ids, input_mask, segment_ids, label_ids in tqdm(
                eval_dataloader, desc="Evaluating", disable=silent):
            input_ids = input_ids.to(self.args.device)
            input_mask = input_mask.to(self.args.device)
            segment_ids = segment_ids.to(self.args.device)
            label_ids = label_ids.to(self.args.device)

            with torch.no_grad():
                logits = self.model(input_ids, input_mask, segment_ids)[0]

            if self.args.is_multilabel:
                predicted_labels.extend(
                    F.sigmoid(logits).round().long().cpu().detach().numpy())
                target_labels.extend(label_ids.cpu().detach().numpy())
                loss = F.binary_cross_entropy_with_logits(logits,
                                                          label_ids.float(),
                                                          size_average=False)
            else:
                predicted_labels.extend(
                    torch.argmax(logits, dim=1).cpu().detach().numpy())
                target_labels.extend(
                    torch.argmax(label_ids, dim=1).cpu().detach().numpy())
                loss = F.cross_entropy(logits, torch.argmax(label_ids, dim=1))

            if self.args.n_gpu > 1:
                loss = loss.mean()
            if self.args.gradient_accumulation_steps > 1:
                loss = loss / self.args.gradient_accumulation_steps
            total_loss += loss.item()

            nb_eval_examples += input_ids.size(0)
            nb_eval_steps += 1

        predicted_labels, target_labels = np.array(predicted_labels), np.array(
            target_labels)

        if self.dump_predictions:
            pickle.dump((predicted_labels, target_labels),
                        open(
                            os.path.join(
                                self.args.data_dir, self.args.dataset,
                                '{}_{}_{}_{}_predictions.p'.format(
                                    self.split, self.args.model,
                                    self.args.training_file,
                                    self.args.max_seq_length)), 'wb'))

        accuracy = metrics.accuracy_score(target_labels, predicted_labels)
        precision = metrics.precision_score(target_labels,
                                            predicted_labels,
                                            average='micro')
        recall = metrics.recall_score(target_labels,
                                      predicted_labels,
                                      average='micro')
        f1 = metrics.f1_score(target_labels, predicted_labels, average='micro')
        avg_loss = total_loss / nb_eval_steps

        if self.dump_predictions:
            pickle.dump(
                ([accuracy, precision, recall, f1, avg_loss
                  ], ['accuracy', 'precision', 'recall', 'f1', 'avg_loss']),
                open(
                    os.path.join(
                        self.args.data_dir, self.args.dataset,
                        '{}_{}_{}_{}_metrics.p'.format(
                            self.split, self.args.model,
                            self.args.training_file,
                            self.args.max_seq_length)), 'wb'))

        return [accuracy, precision, recall, f1, avg_loss
                ], ['accuracy', 'precision', 'recall', 'f1', 'avg_loss']
예제 #12
0
    def get_scores(self, silent=False):
        
        if self.args.is_hierarchical:
            eval_features = convert_examples_to_hierarchical_features(
                self.eval_examples, self.args.max_seq_length, self.tokenizer)
        else:
            eval_features = convert_examples_to_features(
                self.eval_examples, self.args.max_seq_length, self.tokenizer)

        unpadded_input_ids = [f.input_ids for f in eval_features]
        unpadded_input_mask = [f.input_mask for f in eval_features]
        unpadded_segment_ids = [f.segment_ids for f in eval_features]

        if self.args.is_hierarchical:
            pad_input_matrix(unpadded_input_ids, self.args.max_doc_length)
            pad_input_matrix(unpadded_input_mask, self.args.max_doc_length)
            pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length)

        padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long)
        padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long)
        padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long)
        label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long)

        eval_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids)
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=self.args.batch_size)

        self.model.eval()

        total_loss = 0
        nb_eval_steps, nb_eval_examples = 0, 0
        predicted_labels, target_labels = list(), list()
        start_time = time.time()
        for input_ids, input_mask, segment_ids, label_ids in tqdm(eval_dataloader, desc="Evaluating", disable=silent):
            input_ids = input_ids.to(self.args.device)
            input_mask = input_mask.to(self.args.device)
            segment_ids = segment_ids.to(self.args.device)
            label_ids = label_ids.to(self.args.device)

            with torch.no_grad():
                logits = self.model(input_ids, input_mask, segment_ids)[0]

            if self.args.is_multilabel:
                predicted_labels.extend(F.sigmoid(logits).round().long().cpu().detach().numpy())
                target_labels.extend(label_ids.cpu().detach().numpy())
                loss = F.binary_cross_entropy_with_logits(logits, label_ids.float(), size_average=False)
            else:
                predicted_labels.extend(torch.argmax(logits, dim=1).cpu().detach().numpy())
                target_labels.extend(torch.argmax(label_ids, dim=1).cpu().detach().numpy())
                loss = F.cross_entropy(logits, torch.argmax(label_ids, dim=1))

            if self.args.n_gpu > 1:
                loss = loss.mean()
            if self.args.gradient_accumulation_steps > 1:
                loss = loss / self.args.gradient_accumulation_steps
            total_loss += loss.item()

            nb_eval_examples += input_ids.size(0)
            nb_eval_steps += 1

        predicted_labels, target_labels = np.array(predicted_labels), np.array(target_labels)
        accuracy_real = metrics.accuracy_score(target_labels, predicted_labels)
        accuracy_offset1 = metrics.accuracy_score(target_labels+1, predicted_labels)
        accuracy_offset2 = metrics.accuracy_score(target_labels-1, predicted_labels)
        all_accuracy = accuracy_real + accuracy_offset1 + accuracy_offset2
        precision = metrics.precision_score(target_labels, predicted_labels, average='micro')
        recall = metrics.recall_score(target_labels, predicted_labels, average='micro')
        f1 = metrics.f1_score(target_labels, predicted_labels, average='micro')
        mse = metrics.mean_squared_error(target_labels, predicted_labels)

        avg_loss = total_loss / nb_eval_steps
        print("Evaluation Time: {}".format(time.time()-start_time))
        return [accuracy_real, precision, recall, f1, avg_loss, mse], ['accuracy', 'precision', 'recall', 'f1', 'avg_loss', 'mse']
예제 #13
0
    def get_scores(self, silent=False):
        if self.args.is_hierarchical:
            eval_features = convert_examples_to_hierarchical_features(
                self.eval_examples, self.args.max_seq_length, self.tokenizer)
        else:
            eval_features = convert_examples_to_features(
                self.eval_examples, self.args.max_seq_length, self.tokenizer)

        unpadded_input_ids = [f.input_ids for f in eval_features]
        unpadded_input_mask = [f.input_mask for f in eval_features]
        unpadded_segment_ids = [f.segment_ids for f in eval_features]

        if self.args.is_hierarchical:
            pad_input_matrix(unpadded_input_ids, self.args.max_doc_length)
            pad_input_matrix(unpadded_input_mask, self.args.max_doc_length)
            pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length)

        padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long)
        padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long)
        padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long)
        label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long)

        eval_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids)
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=self.args.batch_size)

        self.model.eval()

        total_loss = 0
        nb_eval_steps, nb_eval_examples = 0, 0
        predicted_labels, target_labels = list(), list()

        for input_ids, input_mask, segment_ids, label_ids in tqdm(eval_dataloader, desc="Evaluating", disable=silent):
            input_ids = input_ids.to(self.args.device)
            input_mask = input_mask.to(self.args.device)
            segment_ids = segment_ids.to(self.args.device)
            label_ids = label_ids.to(self.args.device)

            with torch.no_grad():
                logits = self.model(input_ids, segment_ids, input_mask)

            if self.args.is_multilabel:
                predicted_labels.extend(F.softmax(logits, dim=1).cpu().detach().numpy())
                # print(F.softmax(logits).cpu().detach().numpy())
                target_labels.extend(label_ids.cpu().detach().numpy())
                loss = F.binary_cross_entropy_with_logits(logits, label_ids.float(), size_average=False)
            else:
                predicted_labels.extend(torch.argmax(logits, dim=1).cpu().detach().numpy())
                target_labels.extend(torch.argmax(label_ids, dim=1).cpu().detach().numpy())
                loss = F.cross_entropy(logits, torch.argmax(label_ids, dim=1))

            if self.args.n_gpu > 1:
                loss = loss.mean()
            if self.args.gradient_accumulation_steps > 1:
                loss = loss / self.args.gradient_accumulation_steps
            total_loss += loss.item()

            nb_eval_examples += input_ids.size(0)
            nb_eval_steps += 1

        if self.args.is_multilabel:
            score_method = 'weighted'
            pos_label = None
        else:
            score_method = 'binary'
            pos_label = 1

        # np.savetxt('predicted_untransformed.csv', predicted_labels, delimiter=',')
        predicted_labels, target_labels = np.array(predicted_labels), np.array(target_labels)
        predicted_labels = (predicted_labels == predicted_labels.max(axis=1, keepdims=True)).astype(int)

        accuracy = metrics.accuracy_score(target_labels, predicted_labels)
        precision = metrics.precision_score(target_labels, predicted_labels, average=score_method, pos_label=pos_label)
        recall = metrics.recall_score(target_labels, predicted_labels, average=score_method, pos_label=pos_label)
        f1 = metrics.f1_score(target_labels, predicted_labels, average=score_method, pos_label=pos_label)
        avg_loss = total_loss / nb_eval_steps

        predicted_labels = np.apply_along_axis(lambda x: ''.join(x), 1, predicted_labels.astype(str))
        target_labels = np.apply_along_axis(lambda x: ''.join(x), 1, target_labels.astype(str))
        # predictions = np.hstack([predicted_labels, target_labels])

        # np.savetxt('predictions.csv', predictions, delimiter=',')
        x = np.random.randint(1000)
        with open('predictions_{}.txt'.format(x), 'w') as f:
            pred = pd.DataFrame(
                {
                    'predicted': predicted_labels,
                    'target': target_labels
                }
            )
            pred.to_csv(f)

        return [accuracy, precision, recall, f1, avg_loss], ['accuracy', 'precision', 'recall', 'f1', 'avg_loss']
    def train(self):
        if self.args.is_hierarchical:
            train_features = convert_examples_to_hierarchical_features(
                self.train_examples, self.args.max_seq_length, self.tokenizer)
        else:
            train_features = convert_examples_to_features(
                self.train_examples,
                self.args.max_seq_length,
                self.tokenizer,
                use_guid=True)

        unpadded_input_ids = [f.input_ids for f in train_features]
        unpadded_input_mask = [f.input_mask for f in train_features]
        unpadded_segment_ids = [f.segment_ids for f in train_features]

        if self.args.is_hierarchical:
            pad_input_matrix(unpadded_input_ids, self.args.max_doc_length)
            pad_input_matrix(unpadded_input_mask, self.args.max_doc_length)
            pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length)

        print("Number of examples: ", len(self.train_examples))
        print("Batch size:", self.args.batch_size)
        print("Num of steps:", self.num_train_optimization_steps)

        padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long)
        padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long)
        padded_segment_ids = torch.tensor(unpadded_segment_ids,
                                          dtype=torch.long)
        label_ids = torch.tensor([f.label_id for f in train_features],
                                 dtype=torch.long)

        train_data = TensorDataset(padded_input_ids, padded_input_mask,
                                   padded_segment_ids, label_ids)

        train_sampler = RandomSampler(train_data)
        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=self.args.batch_size)

        print('Begin training: ',
              datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
        start_time = time.monotonic()
        for epoch in trange(int(self.args.epochs), desc="Epoch"):
            self.train_epoch(train_dataloader)
            print('COARSE Train loss: ', self.tr_loss_coarse)
            print('FINE Train loss: ', self.tr_loss_fine)
            if epoch == 0:
                self.initial_tr_loss_fine = self.tr_loss_fine
            if self.args.evaluate_dev:
                dev_evaluator = BertHierarchicalEvaluator(self.model,
                                                          self.processor,
                                                          self.tokenizer,
                                                          self.args,
                                                          split='dev')
                scores_fine, scores_coarse = dev_evaluator.get_scores(
                    silent=True)
                dev_precision_fine, dev_recall_fine, dev_f1_fine, dev_acc_fine, dev_loss_fine = scores_fine[
                    0][:5]
                dev_precision_coarse, dev_recall_coarse, dev_f1_coarse, dev_acc_coarse, dev_loss_coarse = scores_coarse[
                    0][:5]

                # Print validation results
                tqdm.write('COARSE: ' + self.log_header)
                tqdm.write(
                    self.log_template.format(epoch + 1, self.iterations,
                                             epoch + 1, self.args.epochs,
                                             dev_acc_coarse,
                                             dev_precision_coarse,
                                             dev_recall_coarse, dev_f1_coarse,
                                             dev_loss_coarse))
                tqdm.write('FINE: ' + self.log_header)
                tqdm.write(
                    self.log_template.format(epoch + 1, self.iterations,
                                             epoch + 1, self.args.epochs,
                                             dev_acc_fine, dev_precision_fine,
                                             dev_recall_fine, dev_f1_fine,
                                             dev_loss_fine))

                # Update validation results
                if dev_f1_fine > self.best_dev_f1:
                    self.unimproved_iters = 0
                    self.best_dev_f1 = dev_f1_fine
                    torch.save(self.model, self.snapshot_path)

                else:
                    self.unimproved_iters += 1
                    if self.unimproved_iters >= self.args.patience:
                        self.early_stop = True
                        tqdm.write(
                            "Early Stopping. Epoch: {}, Best Dev {}: {}".
                            format(epoch, self.args.eval_metric,
                                   self.best_dev_f1))
                        break
            if self.args.evaluate_test:
                # when evaluating on test, we can't use dev
                # so check train loss is converging
                if epoch == self.patience_training:
                    loss_percent = (
                        self.initial_tr_loss_fine -
                        self.tr_loss_fine) / self.initial_tr_loss_fine
                    if loss_percent <= self.minimum_loss_percent_decrease:
                        self.training_converged = False
                        tqdm.write(
                            "Training failed to converge. Epoch: {}, Loss percent: {}"
                            .format(epoch, loss_percent))
                        break
        end_time = time.monotonic()
        # save model at end of training
        # when evaluating on test
        if self.args.evaluate_test:
            torch.save(self.model, self.snapshot_path)
        print('End training: ',
              datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
        print('Time elapsed: ', end_time - start_time)
예제 #15
0
    def train(self):
        if self.args.is_hierarchical:
            train_features = convert_examples_to_hierarchical_features(
                self.train_examples, self.args.max_seq_length, self.tokenizer)
        else:
            train_features = convert_examples_to_features(
                self.train_examples,
                self.args.max_seq_length,
                self.tokenizer,
                use_guid=True,
                is_regression=self.args.is_regression)

        unpadded_input_ids = [f.input_ids for f in train_features]
        unpadded_input_mask = [f.input_mask for f in train_features]
        unpadded_segment_ids = [f.segment_ids for f in train_features]

        if self.args.is_hierarchical:
            pad_input_matrix(unpadded_input_ids, self.args.max_doc_length)
            pad_input_matrix(unpadded_input_mask, self.args.max_doc_length)
            pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length)

        print("Number of examples: ", len(self.train_examples))
        print("Batch size:", self.args.batch_size)
        print("Num of steps:", self.num_train_optimization_steps)

        padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long)
        padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long)
        padded_segment_ids = torch.tensor(unpadded_segment_ids,
                                          dtype=torch.long)

        if self.args.is_regression:
            label_ids = torch.tensor([f.label_id for f in train_features],
                                     dtype=torch.float)
        else:
            label_ids = torch.tensor([f.label_id for f in train_features],
                                     dtype=torch.long)

        train_data = TensorDataset(padded_input_ids, padded_input_mask,
                                   padded_segment_ids, label_ids)

        train_sampler = RandomSampler(train_data)
        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=self.args.batch_size)

        print('Begin training: ',
              datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
        start_time = time.monotonic()
        for epoch in trange(int(self.args.epochs), desc="Epoch"):
            self.train_epoch(train_dataloader)
            print('Train loss: ', self.tr_loss)
            if epoch == 0:
                self.initial_tr_loss = self.tr_loss
            if self.args.evaluate_dev:
                dev_evaluator = BertEvaluator(self.model,
                                              self.processor,
                                              self.tokenizer,
                                              self.args,
                                              split='dev')
                dev_scores, dev_score_names = dev_evaluator.get_scores()
                dev_metric = dev_scores[dev_score_names.index(
                    self.args.eval_metric)]
                if self.args.is_regression:
                    dev_rmse, dev_kendall, dev_pearson, dev_spearman, dev_pearson_spearman, dev_loss = dev_scores[:
                                                                                                                  6]

                    # Print validation results
                    tqdm.write(self.log_header_regression)
                    tqdm.write(
                        self.log_template_regression.format(
                            epoch + 1, self.iterations, epoch + 1,
                            self.args.epochs, dev_rmse, dev_kendall,
                            dev_pearson, dev_spearman, dev_pearson_spearman,
                            dev_loss))

                else:
                    dev_precision, dev_recall, dev_f1, dev_acc, dev_loss = dev_scores[:
                                                                                      5]

                    # Print validation results
                    tqdm.write(self.log_header_classification)
                    tqdm.write(
                        self.log_template_classification.format(
                            epoch + 1, self.iterations, epoch + 1,
                            self.args.epochs, dev_acc, dev_precision,
                            dev_recall, dev_f1, dev_loss))

                # Update validation results
                dev_improved = self.check_dev_improved(dev_metric)
                if dev_improved:
                    self.unimproved_iters = 0
                    self.best_dev_metric = dev_metric
                    torch.save(self.model, self.snapshot_path)

                else:
                    self.unimproved_iters += 1
                    if self.unimproved_iters >= self.args.patience:
                        self.early_stop = True
                        tqdm.write(
                            "Early Stopping. Epoch: {}, Best Dev {}: {}".
                            format(epoch, self.args.eval_metric,
                                   self.best_dev_metric))
                        break
            if self.args.evaluate_test:
                if epoch == self.patience_training:
                    loss_percent = (self.initial_tr_loss -
                                    self.tr_loss) / self.initial_tr_loss
                    if loss_percent <= self.minimum_loss_percent_decrease:
                        self.training_converged = False
                        tqdm.write(
                            "Training failed to converge. Epoch: {}, Loss percent: {}"
                            .format(epoch, loss_percent))
                        break
        end_time = time.monotonic()

        # save model at end of training
        # when evaluating on test
        if self.args.evaluate_test:
            torch.save(self.model, self.snapshot_path)
        print('End training: ',
              datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
        print('Time elapsed: ', end_time - start_time)