Python BertForTokenClassification.eval 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pytorch_pretrained_bert

메소드/함수: eval

hotexamples.com에서의 예제들: 5

Python BertForTokenClassification.eval - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pytorch_pretrained_bert.BertForTokenClassification.eval에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

from_pretrained(30)

BertForTokenClassification(7)

eval(5)

to(5)

load_state_dict(4)

cuda(2)

half(2)

예제 #1

파일 보기

def load_model():
    config = BertConfig.from_json_file(MODEL_CONFIG_PATH)
    model = BertForTokenClassification(config, num_labels=len(LABELS))
    state_dict = torch.load(MODEL_PATH)
    model.load_state_dict(state_dict)
    model.eval()
    return (
        model,
        BertTokenizer.from_pretrained("bert-base-uncased", do_lower_case=True),
    )

예제 #2

파일 보기

파일: modeling_test.py 프로젝트: arjunnlp/hedwig-anlp

 def create_bert_for_token_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
     model = BertForTokenClassification(config=config, num_labels=self.num_labels)
     model.eval()
     loss = model(input_ids, token_type_ids, input_mask, token_labels)
     logits = model(input_ids, token_type_ids, input_mask)
     outputs = {
         "loss": loss,
         "logits": logits,
     }
     return outputs

예제 #3

파일 보기

def evaluate(args:Dict):
    model_root = args['--model-root'] if args['--model-root'] else './models'
    print("load model from {}".format(model_root), file=sys.stderr)

    dataLoader = sentence.Sentence(args['--test-src'])

    device = torch.device("cuda:0" if args['--cuda'] else "cpu")

    output_model_file = os.path.join(model_root, "model_file.bin")
    output_config_file = os.path.join(model_root, "config_file.bin")
    output_vocab_file = os.path.join(model_root, "vocab.txt")
    config = BertConfig.from_json_file(output_config_file)
    model = BertForTokenClassification(config,num_labels=len(dataLoader.tag2idx))
    state_dict = torch.load(output_model_file)
    model.load_state_dict(state_dict)
    tokenizer = BertTokenizer(output_vocab_file, do_lower_case=False)

    tokenized_texts = [tokenizer.tokenize(sent) for sent in dataLoader.sentences]

    if args['--cuda']:
        model = model.to(torch.device("cuda:0"))

    MAX_LEN = int(args['--max-len'])

    input_ids_test = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
                              maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")

    tags_test = pad_sequences([[dataLoader.tag2idx.get(l) for l in lab] for lab in dataLoader.labels],
                         maxlen=MAX_LEN, value=dataLoader.tag2idx["O"], padding="post",
                         dtype="long", truncating="post")

    attention_masks_test = [[float(i > 0) for i in ii] for ii in input_ids_test]

    for i, inp in enumerate(input_ids_test):
        if (102 not in inp):
            inp[-1] = 102
            tags_test[i][-1] = dataLoader.tag2idx.get("O")

    te_inputs = torch.tensor(input_ids_test).to(torch.int64)
    te_tags = torch.tensor(tags_test).to(torch.int64)
    te_masks = torch.tensor(attention_masks_test)

    test_data = TensorDataset(te_inputs, te_masks, te_tags)
    test_sampler = SequentialSampler(test_data)
    test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=int(args['--batch-size']))

    model.eval()
    predictions = []
    true_labels = []
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0
    for batch in test_dataloader:
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch

        with torch.no_grad():
            tmp_eval_loss = model(b_input_ids, token_type_ids=None,
                                  attention_mask=b_input_mask, labels=b_labels)
            logits = model(b_input_ids, token_type_ids=None,
                           attention_mask=b_input_mask)

        logits = logits.detach().cpu().numpy()
        predictions.extend([list(p) for p in np.argmax(logits, axis=2)])
        label_ids = b_labels.to('cpu').numpy()
        true_labels.append(label_ids)
        tmp_eval_accuracy = flat_accuracy(logits, label_ids)

        eval_loss += tmp_eval_loss.mean().item()
        eval_accuracy += tmp_eval_accuracy

        nb_eval_examples += b_input_ids.size(0)
        nb_eval_steps += 1

    pred_tags = [[dataLoader.tags_vals[p_i] for p_i in p] for p in predictions]
    test_tags = [[dataLoader.tags_vals[l_ii] for l_ii in l_i] for l in true_labels for l_i in l]

    tags_test_fin = list()
    for l in tags_test:
        temp_tag = list()
        for l_i in l:
            temp_tag.append(dataLoader.tags_vals[l_i])
        tags_test_fin.append(temp_tag)

    print("Test loss: {}".format(eval_loss / nb_eval_steps))
    print("Test Accuracy: {}".format(eval_accuracy / nb_eval_steps))
    print("Test F1-Score: {}".format(f1_score(tags_test_fin, pred_tags)))

    print(classification_report(tags_test_fin, pred_tags))

    print("Number of Test sentences: ", len(tags_test_fin))

예제 #4

파일 보기

파일: run.py 프로젝트: artkh24/allnews-am

def evaluate(args: Dict):
    model_root = args['--model-root'] if args['--model-root'] else './models'
    print("load model from {}".format(model_root), file=sys.stderr)

    labels = get_labels(args['--test-src'] + "/labels.txt")

    device = torch.device("cuda:0" if args['--cuda'] else "cpu")

    output_model_file = os.path.join(model_root, "model_file.bin")
    output_config_file = os.path.join(model_root, "config_file.bin")
    output_vocab_file = os.path.join(model_root, "vocab.txt")
    config = BertConfig.from_json_file(output_config_file)
    model = BertForTokenClassification(config, num_labels=len(labels))
    state_dict = torch.load(output_model_file)
    model.load_state_dict(state_dict)
    tokenizer = BertTokenizer(output_vocab_file, do_lower_case=False)

    if args['--cuda']:
        model = model.to(torch.device("cuda:0"))

    MAX_LEN = int(args['--max-len'])
    testdataset = loadData(tokenizer, args['--test-src'], 'test', MAX_LEN,
                           labels)

    test_sampler = SequentialSampler(testdataset)
    test_dataloader = DataLoader(testdataset,
                                 sampler=test_sampler,
                                 batch_size=int(args['--batch-size']))

    model.eval()
    predictions = []
    true_labels = []
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0
    for batch in test_dataloader:
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, _, b_labels = batch

        with torch.no_grad():
            tmp_eval_loss = model(b_input_ids,
                                  token_type_ids=None,
                                  attention_mask=b_input_mask,
                                  labels=b_labels)
            logits = model(b_input_ids,
                           token_type_ids=None,
                           attention_mask=b_input_mask)

        logits = logits.detach().cpu().numpy()
        predictions.extend([list(p) for p in np.argmax(logits, axis=2)])
        label_ids = b_labels.to('cpu').numpy()
        true_labels.append(label_ids)
        tmp_eval_accuracy = flat_accuracy(logits, label_ids)

        eval_loss += tmp_eval_loss.mean().item()
        eval_accuracy += tmp_eval_accuracy

        nb_eval_examples += b_input_ids.size(0)
        nb_eval_steps += 1

    label_map = {i: label for i, label in enumerate(labels)}
    pred_tags = [[label_map[p_i] for p_i in p] for p in predictions]
    test_tags = [[label_map[l_ii] for l_ii in l_i] for l in true_labels
                 for l_i in l]

    print("Test loss: {}".format(eval_loss / nb_eval_steps))
    print("Test Accuracy: {}".format(eval_accuracy / nb_eval_steps))
    print("Test F1-Score: {}".format(f1_score(test_tags, pred_tags)))

예제 #5

파일 보기

class DimensionBertNer(object):
    def __init__(self, model_weight_filename=None):
        """
        Load an instance of BERT model for dimension classification.
        """
        self.num_labels = len(DimensionDataset.label2idx)
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")

        logging.info('*** Instantiate model ***')
        if model_weight_filename:
            config = BertConfig(vocab_size_or_config_json_file=30522,
                                hidden_size=768,
                                num_hidden_layers=12,
                                num_attention_heads=12,
                                intermediate_size=3072)

            self.model = BertForTokenClassification(config, self.num_labels)

            logging.info('*** Loading model weights ***')
            self.model.load_state_dict(
                torch.load(model_weight_filename, map_location=self.device))
        else:
            # load bert pretrained with empty token classification top layers
            self.model = BertForTokenClassification.from_pretrained(
                "bert-base-uncased", num_labels=self.num_labels)

        logging.info('*** Loading tokenizer ***')
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    def flat_accuracy(preds, labels):
        """ Simple accuracy on a token level comparable to the accuracy in keras. """
        pred_flat = np.argmax(preds, axis=2).flatten()
        labels_flat = labels.flatten()
        return np.sum(pred_flat == labels_flat) / len(labels_flat)

    def predict(self, lines_to_predict, max_tokens=24):
        """
        Returns dimension dict for each text line of the given lines_to_predict param.

        :param lines_to_predict: list of text to decode
        :param max_tokens: maximum tokens per sentence
        :return: Return dimension for the specified sample index. ex: {'W': 640, 'H':480}
        """
        # build the data loader
        bs = min(64, len(lines_to_predict))
        dataset = DimensionDataset(self.tokenizer,
                                   lines_to_predict=lines_to_predict,
                                   max_tokens=max_tokens)
        dataset_tensor = torch.tensor(dataset).type(torch.LongTensor)
        dataloader = data.DataLoader(dataset_tensor,
                                     batch_size=bs,
                                     shuffle=False)

        self.model.to(self.device)
        self.model.eval()

        predictions_ids = []

        for batch in dataloader:
            # permute the tensor to go from shape (batch size, 3, max_tokens) to (3, batch size, max tokens)
            batch = batch.permute(1, 0, 2)

            # add batch to gpu
            batch = tuple(t.to(self.device) for t in batch)
            batch_input_ids, batch_input_mask, _ = batch

            with torch.no_grad():
                logits = self.model(batch_input_ids,
                                    token_type_ids=None,
                                    attention_mask=batch_input_mask)

            logits = logits.detach().cpu().numpy()

            predictions_ids.extend(
                [list(p) for p in np.argmax(logits, axis=2)])

        # convert prediction indexes in labels. Resulting in a list of shape [nb_samples, max_tokens]
        predictions_labels = [[
            DimensionDataset.labels[class_idx] for class_idx in pred
        ] for pred in predictions_ids]

        # set the predicted labels (class id and label)
        dataset.set_labels(predictions_ids, predictions_labels)
        #logging.info(dataset)

        predicted_dim = [
            dataset.get_item_dimension(i) for i in range(len(dataset))
        ]
        #logging.info(predicted_dim)

        return predicted_dim