Exemple #1
0
    def _compute(self, predictions, references, suffix=False):
        report = classification_report(y_true=references,
                                       y_pred=predictions,
                                       suffix=suffix,
                                       output_dict=True)
        report.pop("macro avg")
        report.pop("weighted avg")
        overall_score = report.pop("micro avg")

        scores = {}
        for type_name, score in report.items():
            scores[type_name]["precision"] = score["precision"]
            scores[type_name]["recall"] = score["recall"]
            scores[type_name]["f1"] = score["f1-score"]
            scores[type_name]["number"] = score["support"]

        scores["overall_precision"] = overall_score["precision"]
        scores["overall_recall"] = overall_score["recall"]
        scores["overall_f1"] = overall_score["f1-score"]
        scores["overall_accuracy"] = accuracy_score(y_true=references,
                                                    y_pred=predictions)

        return scores
Exemple #2
0
def model_evaluate_roberta(model, data, label, tag2id, batch_size,
                           seq_len_list):
    id2tag = {value: key for key, value in tag2id.items()}
    pred_logits = model.predict(data, batch_size=batch_size)[0]
    # pred shape [batch_size, max_len]
    preds = np.argmax(pred_logits, axis=2).tolist()

    assert len(preds) == len(seq_len_list)
    # get predcit label
    predict_label = []
    target_label = []
    for i in range(len(preds)):
        pred = preds[i][1:]
        temp = []
        true_label = label[i][:min(seq_len_list[i], len(pred))]
        for j in range(min(seq_len_list[i], len(pred))):
            temp.append(id2tag[pred[j]])
        assert len(temp) == len(true_label)
        target_label.append(true_label)
        predict_label.append(temp)

    # 计算 precision, recall, f1_score
    precision = precision_score(target_label,
                                predict_label,
                                average="macro",
                                zero_division=0)
    recall = recall_score(target_label,
                          predict_label,
                          average="macro",
                          zero_division=0)
    f1 = f1_score(target_label,
                  predict_label,
                  average="macro",
                  zero_division=0)
    logger.info(classification_report(target_label, predict_label))
    return precision, recall, f1
Exemple #3
0
    def evaluate(self, data, labels):
        """Evaluate the performance of ner model.

        Args:
            data: list of tokenized texts (, like ``[['我', '是', '中', '国', '人']]``
            labels: list of list of str, the corresponding label strings

        """
        features, y = self.preprocessor.prepare_input(data, labels)
        pred_probs = self.model.predict(features)

        lengths = [
            min(len(label), pred_prob.shape[0])
            for label, pred_prob in zip(labels, pred_probs)
        ]
        y_pred = self.preprocessor.label_decode(pred_probs, lengths)

        r = metrics.recall_score(labels, y_pred)
        p = metrics.precision_score(labels, y_pred)
        f1 = metrics.f1_score(labels, y_pred)

        print('Recall: {}, Precision: {}, F1: {}'.format(r, p, f1))
        print(metrics.classification_report(labels, y_pred))
        return f1
Exemple #4
0
def benchmark_flair_mdl():
    tagger = load_flair_ner_model()

    start = time.time()

    flair_sentences = []
    for i, sentence in enumerate(sentences_tokens):
        flair_sentence = Sentence()

        for token_txt in sentence:
            flair_sentence.add_token(Token(token_txt))
        flair_sentences.append(flair_sentence)

    tagger.predict(flair_sentences, verbose=True)
    predictions = [[tok.tags['ner'].value for tok in fs] for fs in flair_sentences]

    print("Made predictions on {} sentences and {} tokens in {}s".format(
        num_sentences, num_tokens, time.time() - start)
    )

    assert len(predictions) == num_sentences

    print(classification_report(sentences_entities, remove_miscs(predictions),
                                    digits=4))
Exemple #5
0
def train(args, strategy, train_dataset, tokenizer, model, num_train_examples,
          labels, train_batch_size, pad_token_label_id):
    if args["max_steps"] > 0:
        num_train_steps = args["max_steps"] * args[
            "gradient_accumulation_steps"]
        args["num_train_epochs"] = 1
    else:
        num_train_steps = (math.ceil(num_train_examples / train_batch_size) //
                           args["gradient_accumulation_steps"] *
                           args["num_train_epochs"])

    writer = tf.summary.create_file_writer("/tmp/mylogs")

    with strategy.scope():
        loss_fct = tf.keras.losses.SparseCategoricalCrossentropy(
            from_logits=True, reduction=tf.keras.losses.Reduction.NONE)
        optimizer = create_optimizer(args["learning_rate"], num_train_steps,
                                     args["warmup_steps"])

        if args["fp16"]:
            optimizer = tf.keras.mixed_precision.experimental.LossScaleOptimizer(
                optimizer, "dynamic")

        loss_metric = tf.keras.metrics.Mean(name="loss", dtype=tf.float32)
        gradient_accumulator = GradientAccumulator()

    logging.info("***** Running training *****")
    logging.info("  Num examples = %d", num_train_examples)
    logging.info("  Num Epochs = %d", args["num_train_epochs"])
    logging.info("  Instantaneous batch size per device = %d",
                 args["per_device_train_batch_size"])
    logging.info(
        "  Total train batch size (w. parallel, distributed & accumulation) = %d",
        train_batch_size * args["gradient_accumulation_steps"],
    )
    logging.info("  Gradient Accumulation steps = %d",
                 args["gradient_accumulation_steps"])
    logging.info("  Total training steps = %d", num_train_steps)

    model.summary()

    @tf.function
    def apply_gradients():
        grads_and_vars = []

        for gradient, variable in zip(gradient_accumulator.gradients,
                                      model.trainable_variables):
            if gradient is not None:
                scaled_gradient = gradient / (
                    args["n_device"] * args["gradient_accumulation_steps"])
                grads_and_vars.append((scaled_gradient, variable))
            else:
                grads_and_vars.append((gradient, variable))

        optimizer.apply_gradients(grads_and_vars, args["max_grad_norm"])
        gradient_accumulator.reset()

    @tf.function
    def train_step(train_features, train_labels):
        def step_fn(train_features, train_labels):
            inputs = {
                "attention_mask": train_features["attention_mask"],
                "training": True
            }

            if "token_type_ids" in train_features:
                inputs["token_type_ids"] = train_features["token_type_ids"]

            with tf.GradientTape() as tape:
                logits = model(train_features["input_ids"], **inputs)[0]
                active_loss = tf.reshape(train_labels,
                                         (-1, )) != pad_token_label_id
                active_logits = tf.boolean_mask(
                    tf.reshape(logits, (-1, len(labels))), active_loss)
                active_labels = tf.boolean_mask(
                    tf.reshape(train_labels, (-1, )), active_loss)
                cross_entropy = loss_fct(active_labels, active_logits)
                loss = tf.reduce_sum(cross_entropy) * (1.0 / train_batch_size)
                grads = tape.gradient(loss, model.trainable_variables)

                gradient_accumulator(grads)

            return cross_entropy

        per_example_losses = strategy.experimental_run_v2(step_fn,
                                                          args=(train_features,
                                                                train_labels))
        mean_loss = strategy.reduce(tf.distribute.ReduceOp.MEAN,
                                    per_example_losses,
                                    axis=0)

        return mean_loss

    current_time = datetime.datetime.now()
    train_iterator = master_bar(range(args["num_train_epochs"]))
    global_step = 0
    logging_loss = 0.0

    for epoch in train_iterator:
        epoch_iterator = progress_bar(train_dataset,
                                      total=num_train_steps,
                                      parent=train_iterator,
                                      display=args["n_device"] > 1)
        step = 1

        with strategy.scope():
            for train_features, train_labels in epoch_iterator:
                loss = train_step(train_features, train_labels)

                if step % args["gradient_accumulation_steps"] == 0:
                    strategy.experimental_run_v2(apply_gradients)

                    loss_metric(loss)

                    global_step += 1

                    if args["logging_steps"] > 0 and global_step % args[
                            "logging_steps"] == 0:
                        # Log metrics
                        if (
                                args["n_device"] == 1
                                and args["evaluate_during_training"]
                        ):  # Only evaluate when single GPU otherwise metrics may not average well
                            y_true, y_pred, eval_loss = evaluate(
                                args,
                                strategy,
                                model,
                                tokenizer,
                                labels,
                                pad_token_label_id,
                                mode="dev")
                            report = metrics.classification_report(y_true,
                                                                   y_pred,
                                                                   digits=4)

                            logging.info("Eval at step " + str(global_step) +
                                         "\n" + report)
                            logging.info("eval_loss: " + str(eval_loss))

                            precision = metrics.precision_score(y_true, y_pred)
                            recall = metrics.recall_score(y_true, y_pred)
                            f1 = metrics.f1_score(y_true, y_pred)

                            with writer.as_default():
                                tf.summary.scalar("eval_loss", eval_loss,
                                                  global_step)
                                tf.summary.scalar("precision", precision,
                                                  global_step)
                                tf.summary.scalar("recall", recall,
                                                  global_step)
                                tf.summary.scalar("f1", f1, global_step)

                        lr = optimizer.learning_rate
                        learning_rate = lr(step)

                        with writer.as_default():
                            tf.summary.scalar("lr", learning_rate, global_step)
                            tf.summary.scalar(
                                "loss", (loss_metric.result() - logging_loss) /
                                args["logging_steps"], global_step)

                        logging_loss = loss_metric.result()

                    with writer.as_default():
                        tf.summary.scalar("loss",
                                          loss_metric.result(),
                                          step=step)

                    if args["save_steps"] > 0 and global_step % args[
                            "save_steps"] == 0:
                        # Save model checkpoint
                        output_dir = os.path.join(
                            args["output_dir"],
                            "checkpoint-{}".format(global_step))

                        if not os.path.exists(output_dir):
                            os.makedirs(output_dir)

                        model.save_pretrained(output_dir)
                        logging.info("Saving model checkpoint to %s",
                                     output_dir)

                train_iterator.child.comment = f"loss : {loss_metric.result()}"
                step += 1

        train_iterator.write(f"loss epoch {epoch + 1}: {loss_metric.result()}")

        loss_metric.reset_states()

    logging.info("  Training took time = {}".format(datetime.datetime.now() -
                                                    current_time))
Exemple #6
0
def main(args):
    with open(args.cache_dir / "vocab.pkl", "rb") as f:
        vocab: Vocab = pickle.load(f)

    tag_idx_path = args.cache_dir / "tag2idx.json"
    tag2idx: Dict[str, int] = json.loads(tag_idx_path.read_text())

    data_paths = {split: args.data_dir / f"{split}.json" for split in SPLITS}
    data = {
        split: json.loads(path.read_text())
        for split, path in data_paths.items()
    }
    datasets: Dict[str, SeqSlotDataset] = {
        split: SeqSlotDataset(split_data, vocab, tag2idx, args.max_len)
        for split, split_data in data.items()
    }
    # TODO: crecate DataLoader for train / dev datasets
    dataloaders = {
        split: DataLoader(dataset,
                          batch_size=args.batch_size,
                          shuffle=True,
                          collate_fn=dataset.collate_fn)
        for split, dataset in datasets.items()
    }
    # COMPLETE

    embeddings = torch.load(args.cache_dir / "embeddings.pt")

    # TODO: init model and move model to target device(cpu / gpu)
    model = SeqSlot(embeddings, args.hidden_size, args.num_layers,
                    args.dropout, args.bidirectional,
                    datasets[TRAIN].num_classes)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    # COMPLETE

    # TODO: init optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    # COMPLETE

    epoch_pbar = trange(args.num_epoch, desc="Epoch")
    for epoch in epoch_pbar:
        # TODO: Training loop - iterate over train dataloader and update model weights
        size = len(dataloaders[TRAIN].dataset)
        loss_fn = torch.nn.CrossEntropyLoss()
        model.train()
        for batch_num, batch in enumerate(dataloaders[TRAIN]):
            encoded = batch["encoded"]
            tag = batch["tag"]
            lens = batch["lens"]
            if torch.cuda.is_available():
                encoded = encoded.cuda()
                tag = tag.cuda()
            pred = model(encoded, lens)
            pred = pred.view(-1, pred.shape[-1])
            tag = tag.reshape(-1)
            loss = loss_fn(pred, tag)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch_num % 50 == 0:
                loss, current = loss, batch_num * len(encoded)
                print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
        # COMPLETE
        # TODO: Evaluation loop - calculate accuracy and save model weights
        loss, size = 0, 0
        y_true = []
        y_pred = []
        model.eval()
        with torch.no_grad():
            for batch_num, batch in enumerate(dataloaders[DEV]):
                encoded = batch["encoded"]
                tag = batch["tag"]
                lens = batch["lens"]
                if torch.cuda.is_available():
                    encoded = encoded.cuda()
                    tag = tag.cuda()
                pred = model(encoded, lens)
                pred = pred.view(-1, pred.shape[-1])
                tag = tag.reshape(-1)
                loss += loss_fn(pred, tag)
                pred_tag = torch.argmax(pred, dim=1)
                pred_tag = pred_tag.view(-1, len(encoded))
                tag = tag.view(-1, len(encoded))

                size += len(encoded)
                tran_pred = pred_tag.t()
                tran_true = tag.t()
                tran_pred = [
                    list(
                        map(datasets[DEV].idx2tag,
                            (tran_pred[i][:lens[i]]).tolist()))
                    for i in range(len(tran_pred))
                ]
                tran_true = [
                    list(
                        map(datasets[DEV].idx2tag,
                            (tran_true[i][:lens[i]]).tolist()))
                    for i in range(len(tran_true))
                ]
                for i in range(len(tran_pred)):
                    y_pred.append(tran_pred[i])
                    y_true.append(tran_true[i])

        report = classification_report(y_pred,
                                       y_true,
                                       mode='strict',
                                       scheme=IOB2)
        print(report)
        join_correct = correct = 0
        join_count = count = 0
        for i in range(len(y_pred)):
            join = True
            for j in range(len(y_pred[i])):
                count += 1
                if y_pred[i][j] == y_true[i][j]:
                    correct += 1
                else:
                    join = False
            join_count += 1
            if join:
                join_correct += 1
        accuracy = correct / count
        join_ac = join_correct / join_count
        loss /= size
        print(
            f"Dev Error: \n Accuracy: {(100*accuracy):>0.1f}%, Avg loss: {loss:>8f} JoinAC: {(100*join_ac):>0.1f}% \n"
        )
    torch.save(model, args.ckpt_dir / "best.pt")
Exemple #7
0
def evaluate_on_model(ds_X_word, ds_X_char, ds_y):
    # load padding length
    padding_len = load_dict_after('padding_len.json')
    max_len = min(padding_len['max_len'], MAX_LEN)
    max_len_char = min(padding_len['max_len_char'], MAX_LEN_CHAR)

    # load the model in terms of CRF as output layer or Dense as output layer
    model = load_saved_model()

    # prepare the tags in terms of multiple output model, or single output
    y = []
    if USE_CRF and MULTI_OUT:
        y = convert_to_multi_output(ds_y, max_len)
    else:
        for ds in ds_y:
            y.extend(ds)

    X_word = []
    X_char = []

    for x_word in ds_X_word:
        X_word.extend(x_word)

    for x_char in ds_X_char:
        X_char.extend(x_char)

    X_word = np.array(X_word, dtype="float32")
    X_char = np.array(X_char, dtype="float32")

    print(model.metrics_names)

    if MULTI_OUT:
        scores = model.evaluate([X_word, X_char], [
            np.array(y[0], dtype="float32").reshape(len(X_word), max_len, 1),
            np.array(y[1], dtype="float32").reshape(len(X_word), max_len, 1),
            np.array(y[2], dtype="float32").reshape(len(X_word), max_len, 1),
            np.array(y[3], dtype="float32").reshape(len(X_word), max_len, 1),
            np.array(y[4], dtype="float32").reshape(len(X_word), max_len, 1)
        ],
                                verbose=1)
        print(scores)
    else:  # single output
        # get scores for test sets from each data-set
        for i in range(len(ds_X_word)):
            scores = model.evaluate([ds_X_word[i], ds_X_char[i]],
                                    np.array(ds_y[i], dtype="float32").reshape(
                                        len(ds_X_word[i]), max_len, 1),
                                    verbose=1)
            print(scores)

    if MULTI_OUT:
        test_pred = model.predict([X_word, X_char])

        for i in range(len(ds_X_word)):
            tag2idx = load_dict_after('tag2idx' + str(i) + '.json')
            n_tags = len(tag2idx)
            idx2tag = flip_dict(tag2idx)
            conv_pred = []
            conv_gold = []
            for sentence_tag in test_pred[i]:
                p = np.argmax(sentence_tag, axis=-1)
                p = [idx2tag[tag_idx] for tag_idx in p]
                conv_pred.append(p)
            for sentence_tag in y[i]:
                sentence_tag = [idx2tag[tag_idx] for tag_idx in sentence_tag]
                conv_gold.append(sentence_tag)

            print("F1-score: {:.1%}".format(f1_score(conv_gold, conv_pred)))
            print(classification_report(conv_gold, conv_pred))
    else:
        for i in range(len(ds_X_word)):
            x_word = ds_X_word[
                i]  # all the sentences in word-indexed form for dataset i
            x_char = ds_X_char[
                i]  # all the sentences in character-indexed form for dataset i
            y_sen = ds_y[i]  # all the corresponding tags of the sentences
            y_sen = np.array(y_sen)

            #predict
            test_pred = model.predict([
                np.array(x_word, dtype="float32"),
                np.array(x_char, dtype="float32")
            ])
            tag2idx = load_dict_after('tag2idx.json')
            n_tags = len(tag2idx)
            idx2tag = flip_dict(tag2idx)
            conv_pred = [
            ]  # list to store the predicted tags, converted from indices
            conv_gold = [
            ]  # list to store the actual/ gold tags, converted from indices

            for sentence_tag in test_pred:
                p = np.argmax(
                    sentence_tag, axis=-1
                )  # for each word, get the tag with maximum probabiliity out of all the possible tags
                p = [idx2tag[tag_idx]
                     for tag_idx in p]  # convert each tag from indice to name
                conv_pred.append(p)

            for sentence_tag in y_sen:
                sentence_tag = [idx2tag[tag_idx] for tag_idx in sentence_tag]
                conv_gold.append(sentence_tag)

            print("F1-score: {:.1%}".format(f1_score(conv_gold, conv_pred)))
            print(classification_report(conv_gold, conv_pred))
Exemple #8
0
def run_ner_w_args(args):
    if args.server_ip and args.server_port:
        # Distant debugging - see
        # https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd
        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    processors = {"ner": NerProcessor}

    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        n_gpu = torch.cuda.device_count()
    else:
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        n_gpu = 1
        # Initializes the distributed backend which will take care of
        # sychronizing nodes/GPUs
        torch.distributed.init_process_group(backend='nccl')
    logger.info(
        "device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".
        format(device, n_gpu, bool(args.local_rank != -1), args.fp16))

    if args.gradient_accumulation_steps < 1:
        raise ValueError(
            "Invalid gradient_accumulation_steps parameter: {}, should be >= 1"
            .format(args.gradient_accumulation_steps))

    args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if n_gpu > 0:
        torch.cuda.manual_seed_all(args.seed)

    if not args.do_train and not args.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    # if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train:
    # raise ValueError("Output directory ({}) already exists and is not empty.".format(args.output_dir))
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    task_name = args.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()
    label_list = processor.get_labels()
    num_labels = len(label_list) + 1

    tokenizer = BertTokenizer.from_pretrained(args.bert_model,
                                              do_lower_case=args.do_lower_case)

    train_examples = None
    num_train_optimization_steps = None
    if args.do_train:
        train_examples = processor.get_train_examples(args.data_dir)
        num_train_optimization_steps = int(
            len(train_examples) / args.train_batch_size /
            args.gradient_accumulation_steps) * args.num_train_epochs
        if args.local_rank != -1:
            num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size(
            )

    # Prepare model
    cache_dir = args.cache_dir if args.cache_dir else os.path.join(
        str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{}'.format(
            args.local_rank))
    model = BertForNer.from_pretrained(args.bert_model,
                                       cache_dir=cache_dir,
                                       config_dir=args.config_dir,
                                       num_labels=num_labels,
                                       config=args.config)

    model_to_save = model.module if hasattr(model, 'module') else model
    # print(model_to_save.config, cache_dir)
    # print(args.config_dir, args.config)
    # exit()

    if args.fp16:
        model.half()
    model.to(device)
    if args.local_rank != -1:
        try:
            from apex.parallel import DistributedDataParallel as DDP
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training."
            )

        model = DDP(model)
    elif n_gpu > 1:
        model = torch.nn.DataParallel(model)

    if args.do_train:
        param_optimizer = list(model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.01
        }, {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        }]
        if args.fp16:
            try:
                from apex.optimizers import FP16_Optimizer
                from apex.optimizers import FusedAdam
            except ImportError:
                raise ImportError(
                    "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training."
                )

            optimizer = FusedAdam(optimizer_grouped_parameters,
                                  lr=args.learning_rate,
                                  bias_correction=False,
                                  max_grad_norm=1.0)
            if args.loss_scale == 0:
                optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
            else:
                optimizer = FP16_Optimizer(optimizer,
                                           static_loss_scale=args.loss_scale)

        else:
            optimizer = BertAdam(optimizer_grouped_parameters,
                                 lr=args.learning_rate,
                                 warmup=args.warmup_proportion,
                                 t_total=num_train_optimization_steps)

    # def resolve_opt(pre_model_path, optimizer):
    #     opt_path = os.path.join(args.bert_model, "opt.pth")
    #     if os.path.exists(opt_path):
    #         optimizer.load_state_dict( torch.load( opt_path ) )
    #     return optimizer

    # optimizer = resolve_opt(args.bert_model, optimizer)

    global_step = 0
    nb_tr_steps = 0
    tr_loss = 0
    label_map = {i: label for i, label in enumerate(label_list, 1)}

    if args.do_train:
        train_features = convert_examples_to_features(train_examples,
                                                      label_list,
                                                      args.max_seq_length,
                                                      tokenizer)
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(train_examples))
        logger.info("  Batch size = %d", args.train_batch_size)
        logger.info("  Num steps = %d", num_train_optimization_steps)
        all_input_ids = torch.tensor([f.input_ids for f in train_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in train_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in train_features],
                                       dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in train_features],
                                     dtype=torch.long)
        all_valid_ids = torch.tensor([f.valid_ids for f in train_features],
                                     dtype=torch.long)
        all_lmask_ids = torch.tensor([f.label_mask for f in train_features],
                                     dtype=torch.long)
        train_data = TensorDataset(all_input_ids, all_input_mask,
                                   all_segment_ids, all_label_ids,
                                   all_valid_ids, all_lmask_ids)
        # train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
        if args.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            train_sampler = DistributedSampler(train_data)
        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=args.train_batch_size)

        model.train()

        def warmup_linear(progress, warmup):
            if progress < warmup:
                return progress / warmup
            return max((progress - 1.) / (warmup - 1.), 0.)

        for _ in trange(int(args.num_train_epochs), desc="Epoch"):
            tr_loss = 0
            nb_tr_examples, nb_tr_steps = 0, 0
            for step, batch in enumerate(
                    tqdm(train_dataloader, desc="Iteration")):
                batch = tuple(t.to(device) for t in batch)
                input_ids, input_mask, segment_ids, label_ids, valid_ids, l_mask = batch
                loss = model(input_ids, segment_ids, input_mask, label_ids,
                             valid_ids, l_mask)
                # input_ids, input_mask, segment_ids, label_ids = batch
                # loss = model(input_ids, segment_ids, input_mask, label_ids)
                if n_gpu > 1:
                    loss = loss.mean()  # mean() to average on multi-gpu.
                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps

                if args.fp16:
                    optimizer.backward(loss)
                else:
                    loss.backward()

                tr_loss += loss.item()
                nb_tr_examples += input_ids.size(0)
                nb_tr_steps += 1
                if (step + 1) % args.gradient_accumulation_steps == 0:
                    if args.fp16:
                        # modify learning rate with special warm up BERT uses
                        # if args.fp16 is False, BertAdam is used that handles
                        # this automatically
                        lr_this_step = args.learning_rate * \
                            warmup_linear(global_step / num_train_optimization_steps, args.warmup_proportion)
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr_this_step
                    optimizer.step()
                    optimizer.zero_grad()
                    global_step += 1

        # Save a trained model and the associated configuration
        model_to_save = model.module if hasattr(
            model, 'module') else model  # Only save the model it-self
        output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME)
        torch.save(model_to_save.state_dict(), output_model_file)
        # Save optimizer
        output_optimizer_file = os.path.join(args.output_dir, "opt.pth")
        torch.save(optimizer.state_dict(), output_optimizer_file)
        output_config_file = os.path.join(args.output_dir, CONFIG_NAME)
        tokenizer.save_vocabulary(args.output_dir)
        with open(output_config_file, 'w') as f:
            f.write(model_to_save.config.to_json_string())
        label_map = {i: label for i, label in enumerate(label_list, 1)}
        model_config = {
            "bert_model": args.bert_model,
            "do_lower": args.do_lower_case,
            "max_seq_length": args.max_seq_length,
            "num_labels": len(label_list) + 1,
            "label_map": label_map
        }
        json.dump(
            model_config,
            open(os.path.join(args.output_dir, "model_config.json"), "w"))
        # Load a trained model and config that you have fine-tuned
    else:
        # output_config_file = os.path.join(args.output_dir, CONFIG_NAME)
        # output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME)
        # config = BertConfig(output_config_file)
        # model = BertForTokenClassification(config, num_labels=num_labels)
        # model.load_state_dict(torch.load(output_model_file))
        model = BertForNer.from_pretrained(args.bert_model,
                                           num_labels=num_labels)
        tokenizer = BertTokenizer.from_pretrained(
            args.bert_model, do_lower_case=args.do_lower_case)
    model.to(device)

    if args.do_eval and (args.local_rank == -1
                         or torch.distributed.get_rank() == 0):
        eval_examples = processor.get_dev_examples(args.data_dir)
        eval_features = convert_examples_to_features(eval_examples, label_list,
                                                     args.max_seq_length,
                                                     tokenizer)
        logger.info("***** Running evaluation *****")
        logger.info("  Num examples = %d", len(eval_examples))
        logger.info("  Batch size = %d", args.eval_batch_size)
        all_input_ids = torch.tensor([f.input_ids for f in eval_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in eval_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in eval_features],
                                       dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in eval_features],
                                     dtype=torch.long)
        all_valid_ids = torch.tensor([f.valid_ids for f in eval_features],
                                     dtype=torch.long)
        all_lmask_ids = torch.tensor([f.label_mask for f in eval_features],
                                     dtype=torch.long)
        eval_data = TensorDataset(all_input_ids, all_input_mask,
                                  all_segment_ids, all_label_ids,
                                  all_valid_ids, all_lmask_ids)
        # eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
        # Run prediction for full data
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data,
                                     sampler=eval_sampler,
                                     batch_size=args.eval_batch_size)
        model.eval()
        eval_loss, eval_accuracy = 0, 0
        nb_eval_steps, nb_eval_examples = 0, 0
        y_true = []
        y_pred = []
        label_map = {i: label for i, label in enumerate(label_list, 1)}
        # for input_ids, input_mask, segment_ids, label_ids in
        # tqdm(eval_dataloader, desc="Evaluating"):
        for input_ids, input_mask, segment_ids, label_ids, valid_ids, l_mask in tqdm(
                eval_dataloader, desc="Evaluating"):
            input_ids = input_ids.to(device)
            input_mask = input_mask.to(device)
            segment_ids = segment_ids.to(device)
            valid_ids = valid_ids.to(device)
            label_ids = label_ids.to(device)
            l_mask = l_mask.to(device)

            with torch.no_grad():
                logits = model(input_ids,
                               segment_ids,
                               input_mask,
                               valid_ids=valid_ids,
                               attention_mask_label=l_mask)

            logits = torch.argmax(F.log_softmax(logits, dim=2), dim=2)
            logits = logits.detach().cpu().numpy()
            label_ids = label_ids.to('cpu').numpy()
            input_mask = input_mask.to('cpu').numpy()

            for i, label in enumerate(label_ids):
                temp_1 = []
                temp_2 = []
                for j, m in enumerate(label):
                    if j == 0:
                        continue
                    elif label_ids[i][j] == 11:
                        y_true.append(temp_1)
                        y_pred.append(temp_2)
                        break
                    else:
                        temp_1.append(label_map[label_ids[i][j]])
                        temp_2.append(label_map[logits[i][j]])

        loss = tr_loss / global_step if args.do_train else None
        result = dict()
        result['loss'] = loss
        report = classification_report(y_true, y_pred, digits=4)
        logger.info("\n%s", report)
        print(report)
        result['f1'] = f1_score(y_true, y_pred)
        output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
        with open(output_eval_file, "w") as writer:
            logger.info("***** Eval results *****")
            logger.info("\n%s", report)
            # writer.write(report)
            for key in sorted(result.keys()):
                writer.write("%s = %s\n" % (key, str(result[key])))
        return result
Exemple #9
0
 def test_classification_report(self):
     print(classification_report(self.y_true, self.y_pred))
Exemple #10
0
        val_batch_labels = label_ids.to("cpu").numpy()
        predictions.extend(val_batch_preds)
        true_labels.extend(val_batch_labels)

        tmp_eval_accuracy = flat_accuracy(val_batch_labels, val_batch_preds)

        eval_loss += tmp_eval_loss.mean().item()
        eval_accuracy += tmp_eval_accuracy

        nb_eval_examples += b_input_ids.size(0)
        nb_eval_steps += 1

    # Evaluate loss, acc, conf. matrix, and class. report on devset
    pred_tags = [[tag2name[i] for i in predictions]]
    valid_tags = [[tag2name[i] for i in true_labels]]
    cl_report = classification_report(valid_tags, pred_tags)
    eval_loss = eval_loss / nb_eval_steps
    tmp_accuracy = accuracy_score(valid_tags, pred_tags)
    if tmp_accuracy > dev_best_acc:
        dev_best_acc = tmp_accuracy
        model_to_save = model.module if hasattr(model, 'module') else model  # Only save the model it-self

        output_model_file = os.path.join(bert_out_address, "pytorch_model.bin")
        output_config_file = os.path.join(bert_out_address, "config.json")

        torch.save(model_to_save.state_dict(), output_model_file)
        model_to_save.config.to_json_file(output_config_file)

    # Report metrics
    f1 = f1_score(valid_tags, pred_tags)
    if f1 > dev_best_f1:
def evaluate(args, model, tokenizer, ngram_dict, processor, label_list):
    num_labels = len(label_list) + 1
    eval_dataset = load_examples(args,
                                 tokenizer,
                                 ngram_dict,
                                 processor,
                                 label_list,
                                 mode="test")
    # Run prediction for full data
    eval_sampler = SequentialSampler(eval_dataset)
    eval_dataloader = DataLoader(eval_dataset,
                                 sampler=eval_sampler,
                                 batch_size=args.eval_batch_size)

    # Eval!
    logger.info("***** Running evaluation *****")
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", args.eval_batch_size)

    model.eval()
    y_true = []
    y_pred = []
    label_map = {i: label for i, label in enumerate(label_list, 1)}
    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        batch = tuple(t.to(args.device) for t in batch)
        input_ids, input_mask, segment_ids, label_ids, ngram_ids, ngram_positions, \
        ngram_lengths, ngram_seg_ids, ngram_masks, valid_ids, l_mask = batch

        with torch.no_grad():
            logits = model(input_ids,
                           token_type_ids=None,
                           attention_mask=None,
                           labels=None,
                           valid_ids=valid_ids,
                           attention_mask_label=None,
                           ngram_ids=ngram_ids,
                           ngram_positions=ngram_positions)

        logits = torch.argmax(F.log_softmax(logits, dim=2), dim=2)
        logits = logits.detach().cpu().numpy()
        label_ids = label_ids.detach().cpu().numpy()

        for i, label in enumerate(label_ids):
            for j, m in enumerate(label):
                if j == 0:
                    continue
                if label_ids[i][j] == num_labels - 1:
                    break
                y_true.append(label_map[label_ids[i][j]])
                y_pred.append(label_map[logits[i][j]])
    if args.task_name == 'cwsmsra' or args.task_name == 'cwspku':
        #evaluating CWS
        result = cws_evaluate_word_PRF(y_pred, y_true)
        logger.info("=======entity level========")
        logger.info(
            "\n%s",
            ', '.join("%s: %s" % (key, val) for key, val in result.items()))
        logger.info("=======entity level========")
    else:
        #evaluating NER, POS
        report = classification_report(y_true, y_pred, digits=4)
        f = f1_score(y_true, y_pred)
        result = {"report": report, "f1": f}
        logger.info("=======entity level========")
        logger.info(report)
        logger.info("=======entity level========")
    return result
Exemple #12
0
def show_ner_report(labels, preds):
    return classification_report(labels, preds, suffix=True)
Exemple #13
0
def show_ner_report(labels, preds):
    return seqeval_metrics.classification_report(labels, preds, suffix=True)
def main():

    logging.basicConfig(format='%(asctime)s - %(levelname)s -   %(message)s',
                        datefmt='%m/%d/%Y ',
                        level=logging.INFO)
    logger = logging.getLogger(__name__)

    parser = argparse.ArgumentParser()
    # Required parameters
    parser.add_argument("--data",
                        default=None,
                        type=str,
                        required=True,
                        help="Directory which has the data files for the task")
    parser.add_argument(
        "--output",
        default=None,
        type=str,
        required=True,
        help=
        "The output directory where the model predictions and checkpoints will be written."
    )
    parser.add_argument("--overwrite",
                        default=False,
                        type=bool,
                        help="Set it to True to overwrite output directory")

    args = parser.parse_args()

    if os.path.exists(args.output) and os.listdir(
            args.output) and not args.overwrite:
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Set the overwrite flag to overwrite"
            .format(args.output))
    if not os.path.exists(args.output):
        os.makedirs(args.output)

    train_batch_size = 32
    valid_batch_size = 64
    test_batch_size = 64

    # padding sentences and labels to max_length of 128
    max_seq_len = 128
    EMBEDDING_DIM = 100
    epochs = 10

    split_train = split_text_label(os.path.join(args.data, "train.txt"))
    split_valid = split_text_label(os.path.join(args.data, "valid.txt"))
    split_test = split_text_label(os.path.join(args.data, "test.txt"))

    labelSet = set()
    wordSet = set()
    # words and labels
    for data in [split_train, split_valid, split_test]:
        for labeled_text in data:
            for word, label in labeled_text:
                labelSet.add(label)
                wordSet.add(word.lower())

    # Sort the set to ensure '0' is assigned to 0
    sorted_labels = sorted(list(labelSet), key=len)

    # Create mapping for labels
    label2Idx = {}
    for label in sorted_labels:
        label2Idx[label] = len(label2Idx)

    num_labels = len(label2Idx)
    idx2Label = {v: k for k, v in label2Idx.items()}

    pickle.dump(idx2Label,
                open(os.path.join(args.output, "idx2Label.pkl"), 'wb'))
    logger.info("Saved idx2Label pickle file")

    # Create mapping for words
    word2Idx = {}
    if len(word2Idx) == 0:
        word2Idx["PADDING_TOKEN"] = len(word2Idx)
        word2Idx["UNKNOWN_TOKEN"] = len(word2Idx)
    for word in wordSet:
        word2Idx[word] = len(word2Idx)
    logger.info("Total number of words is : %d ", len(word2Idx))

    pickle.dump(word2Idx, open(os.path.join(args.output, "word2Idx.pkl"),
                               'wb'))
    logger.info("Saved word2Idx pickle file")

    # Loading glove embeddings
    embeddings_index = {}
    f = open('embeddings/glove.6B.100d.txt', encoding="utf-8")
    for line in f:
        values = line.strip().split(' ')
        word = values[0]  # the first entry is the word
        coefs = np.asarray(
            values[1:], dtype='float32')  #100d vectors representing the word
        embeddings_index[word] = coefs
    f.close()
    logger.info("Glove data loaded")

    #print(str(dict(itertools.islice(embeddings_index.items(), 2))))

    embedding_matrix = np.zeros((len(word2Idx), EMBEDDING_DIM))

    # Word embeddings for the tokens
    for word, i in word2Idx.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector

    pickle.dump(embedding_matrix,
                open(os.path.join(args.output, "embedding.pkl"), 'wb'))
    logger.info("Saved Embedding matrix pickle")

    # Interesting - to check how many words were not there in Glove Embedding
    # indices = np.where(np.all(np.isclose(embedding_matrix, 0), axis=1))
    # print(len(indices[0]))

    train_sentences, train_labels = createMatrices(split_train, word2Idx,
                                                   label2Idx)
    valid_sentences, valid_labels = createMatrices(split_valid, word2Idx,
                                                   label2Idx)
    test_sentences, test_labels = createMatrices(split_test, word2Idx,
                                                 label2Idx)

    train_features, train_labels = padding(train_sentences,
                                           train_labels,
                                           max_seq_len,
                                           padding='post')
    valid_features, valid_labels = padding(valid_sentences,
                                           valid_labels,
                                           max_seq_len,
                                           padding='post')
    test_features, test_labels = padding(test_sentences,
                                         test_labels,
                                         max_seq_len,
                                         padding='post')

    logger.info(
        f"Train features shape is {train_features.shape} and labels shape is{train_labels.shape}"
    )
    logger.info(
        f"Valid features shape is {valid_features.shape} and labels shape is{valid_labels.shape}"
    )
    logger.info(
        f"Test features shape is {test_features.shape} and labels shape is{test_labels.shape}"
    )

    train_dataset = tf.data.Dataset.from_tensor_slices(
        (train_features, train_labels))
    valid_dataset = tf.data.Dataset.from_tensor_slices(
        (valid_features, valid_labels))
    test_dataset = tf.data.Dataset.from_tensor_slices(
        (test_features, test_labels))

    shuffled_train_dataset = train_dataset.shuffle(
        buffer_size=train_features.shape[0], reshuffle_each_iteration=True)

    batched_train_dataset = shuffled_train_dataset.batch(train_batch_size,
                                                         drop_remainder=True)
    batched_valid_dataset = valid_dataset.batch(valid_batch_size,
                                                drop_remainder=True)
    batched_test_dataset = test_dataset.batch(test_batch_size,
                                              drop_remainder=True)

    epoch_bar = master_bar(range(epochs))
    train_pb_max_len = math.ceil(
        float(len(train_features)) / float(train_batch_size))
    valid_pb_max_len = math.ceil(
        float(len(valid_features)) / float(valid_batch_size))
    test_pb_max_len = math.ceil(
        float(len(test_features)) / float(test_batch_size))

    model = TFNer(max_seq_len=max_seq_len,
                  embed_input_dim=len(word2Idx),
                  embed_output_dim=EMBEDDING_DIM,
                  weights=[embedding_matrix],
                  num_labels=num_labels)
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
    scce = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    train_log_dir = f"{args.output}/logs/train"
    valid_log_dir = f"{args.output}/logs/valid"
    train_summary_writer = tf.summary.create_file_writer(train_log_dir)
    valid_summary_writer = tf.summary.create_file_writer(valid_log_dir)

    train_loss_metric = tf.keras.metrics.Mean('training_loss',
                                              dtype=tf.float32)
    valid_loss_metric = tf.keras.metrics.Mean('valid_loss', dtype=tf.float32)

    def train_step_fn(sentences_batch, labels_batch):
        with tf.GradientTape() as tape:
            logits = model(
                sentences_batch)  # batchsize, max_seq_len, num_labels
            loss = scce(labels_batch, logits)  #batchsize,max_seq_len
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(list(zip(grads, model.trainable_variables)))
        return loss, logits

    def valid_step_fn(sentences_batch, labels_batch):
        logits = model(sentences_batch)
        loss = scce(labels_batch, logits)
        return loss, logits

    for epoch in epoch_bar:
        with train_summary_writer.as_default():
            for sentences_batch, labels_batch in progress_bar(
                    batched_train_dataset,
                    total=train_pb_max_len,
                    parent=epoch_bar):

                loss, logits = train_step_fn(sentences_batch, labels_batch)
                train_loss_metric(loss)
                epoch_bar.child.comment = f'training loss : {train_loss_metric.result()}'
            tf.summary.scalar('training loss',
                              train_loss_metric.result(),
                              step=epoch)
            train_loss_metric.reset_states()

        with valid_summary_writer.as_default():
            for sentences_batch, labels_batch in progress_bar(
                    batched_valid_dataset,
                    total=valid_pb_max_len,
                    parent=epoch_bar):
                loss, logits = valid_step_fn(sentences_batch, labels_batch)
                valid_loss_metric.update_state(loss)

                epoch_bar.child.comment = f'validation loss : {valid_loss_metric.result()}'

            # Logging after each Epoch !
            tf.summary.scalar('valid loss',
                              valid_loss_metric.result(),
                              step=epoch)
            valid_loss_metric.reset_states()

    model.save_weights(f"{args.output}/model_weights", save_format='tf')
    logger.info(f"Model weights saved")

    #Evaluating on test dataset

    test_model = TFNer(max_seq_len=max_seq_len,
                       embed_input_dim=len(word2Idx),
                       embed_output_dim=EMBEDDING_DIM,
                       weights=[embedding_matrix],
                       num_labels=num_labels)
    test_model.load_weights(f"{args.output}/model_weights")
    logger.info(f"Model weights restored")

    true_labels = []
    pred_labels = []

    for sentences_batch, labels_batch in progress_bar(batched_test_dataset,
                                                      total=test_pb_max_len):

        logits = test_model(sentences_batch)
        temp1 = tf.nn.softmax(logits)
        preds = tf.argmax(temp1, axis=2)
        true_labels.append(np.asarray(labels_batch))
        pred_labels.append(np.asarray(preds))

    label_correct, label_pred = idx_to_label(pred_labels, true_labels,
                                             idx2Label)
    report = classification_report(label_correct, label_pred, digits=4)
    logger.info(f"Results for the test dataset")
    logger.info(f"\n{report}")
Exemple #15
0
    def evaluate(eval_ATE=True, eval_APC=True):
        # evaluate
        apc_result = {'max_apc_test_acc': 0, 'max_apc_test_f1': 0}
        ate_result = 0
        y_true = []
        y_pred = []
        n_test_correct, n_test_total = 0, 0
        test_apc_logits_all, test_polarities_all = None, None
        model.eval()
        label_map = {i: label for i, label in enumerate(label_list, 1)}
        for input_ids_spc, input_mask, segment_ids, label_ids, polarities, valid_ids, l_mask in eval_dataloader:
            input_ids_spc = input_ids_spc.to(device)
            input_mask = input_mask.to(device)
            segment_ids = segment_ids.to(device)
            valid_ids = valid_ids.to(device)
            label_ids = label_ids.to(device)
            polarities = polarities.to(device)
            l_mask = l_mask.to(device)

            with torch.no_grad():
                ate_logits, apc_logits = model(input_ids_spc,
                                               segment_ids,
                                               input_mask,
                                               valid_ids=valid_ids,
                                               polarities=polarities,
                                               attention_mask_label=l_mask)
            if eval_APC:
                polarities = model.get_batch_polarities(polarities)
                n_test_correct += (torch.argmax(
                    apc_logits, -1) == polarities).sum().item()
                n_test_total += len(polarities)

                if test_polarities_all is None:
                    test_polarities_all = polarities
                    test_apc_logits_all = apc_logits
                else:
                    test_polarities_all = torch.cat(
                        (test_polarities_all, polarities), dim=0)
                    test_apc_logits_all = torch.cat(
                        (test_apc_logits_all, apc_logits), dim=0)

            if eval_ATE:
                if not args.use_bert_spc:
                    label_ids = model.get_batch_token_labels_bert_base_indices(
                        label_ids)
                ate_logits = torch.argmax(F.log_softmax(ate_logits, dim=2),
                                          dim=2)
                ate_logits = ate_logits.detach().cpu().numpy()
                label_ids = label_ids.to('cpu').numpy()
                input_mask = input_mask.to('cpu').numpy()
                for i, label in enumerate(label_ids):
                    temp_1 = []
                    temp_2 = []
                    for j, m in enumerate(label):
                        if j == 0:
                            continue
                        elif label_ids[i][j] == len(label_list):
                            y_true.append(temp_1)
                            y_pred.append(temp_2)
                            break
                        else:
                            temp_1.append(label_map.get(label_ids[i][j], 'O'))
                            temp_2.append(label_map.get(ate_logits[i][j], 'O'))
        if eval_APC:
            test_acc = n_test_correct / n_test_total
            if args.dataset in {'camera', 'car', 'phone', 'notebook'}:
                test_f1 = f1_score(torch.argmax(test_apc_logits_all, -1).cpu(),
                                   test_polarities_all.cpu(),
                                   labels=[0, 1],
                                   average='macro')
            else:
                test_f1 = f1_score(torch.argmax(test_apc_logits_all, -1).cpu(),
                                   test_polarities_all.cpu(),
                                   labels=[0, 1, 2],
                                   average='macro')
            test_acc = round(test_acc * 100, 2)
            test_f1 = round(test_f1 * 100, 2)
            apc_result = {
                'max_apc_test_acc': test_acc,
                'max_apc_test_f1': test_f1
            }

        if eval_ATE:
            report = classification_report(y_true, y_pred, digits=4)
            tmps = report.split()
            ate_result = round(float(tmps[7]) * 100, 2)
        return apc_result, ate_result
X_train = [sent2features(s) for s in data[996:]]
y_train = [sent2labels(s) for s in data[996:]]

X_test = [sent2features(s) for s in data[:996]]
y_test = [sent2labels(s) for s in data[:996]]

crf = sklearn_crfsuite.CRF(
    algorithm='lbfgs',
    c1=0.1,
    c2=0.1,
    max_iterations=20,
    all_possible_transitions=False,
)

if __name__ == '__main__':
    crf.fit(X_train, y_train)

    y_pred = crf.predict(X_test)

    y_p, y_t = [], []
    for i in range(len(y_pred)):
        for j in range(len(y_pred[i])):
            y_p.append(y_pred[i][j])
            y_t.append(y_test[i][j])

    print(
        metrics.flat_classification_report(y_test,
                                           y_pred,
                                           labels=corpus.labels))
    print(classification_report(y_t, y_p))
Exemple #17
0
    def get_classification_report(self,
                                  index2label: [List[str], Dict[int, str]]):
        golds, preds = self._map_to_labels(index2label)

        cr = classification_report(golds, preds, digits=5)
        return report2dict(cr)
def main():
    parser = argparse.ArgumentParser()

    ## Required parameters
    parser.add_argument(
        "--data_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The input data dir. Should contain the .tsv files (or other data files) for the task.",
    )
    parser.add_argument(
        "--bert_model",
        default=None,
        type=str,
        required=True,
        help="Bert pre-trained model selected in the list: bert-base-uncased, "
        "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, "
        "bert-base-multilingual-cased, bert-base-chinese.",
    )
    parser.add_argument(
        "--task_name",
        default=None,
        type=str,
        required=True,
        help="The name of the task to train.",
    )
    parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The output directory where the model predictions and checkpoints will be written.",
    )

    ## Other parameters
    parser.add_argument(
        "--cache_dir",
        default="",
        type=str,
        help=
        "Where do you want to store the pre-trained models downloaded from s3",
    )
    parser.add_argument(
        "--max_seq_length",
        default=128,
        type=int,
        help=
        "The maximum total input sequence length after WordPiece tokenization. \n"
        "Sequences longer than this will be truncated, and sequences shorter \n"
        "than this will be padded.",
    )
    parser.add_argument("--do_train",
                        action="store_true",
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        action="store_true",
                        help="Whether to run eval or not.")
    parser.add_argument(
        "--eval_on",
        default="dev",
        help="Whether to run eval on the dev set or test set.",
    )
    parser.add_argument(
        "--do_lower_case",
        action="store_true",
        help="Set this flag if you are using an uncased model.",
    )
    parser.add_argument(
        "--train_batch_size",
        default=32,
        type=int,
        help="Total batch size for training.",
    )
    parser.add_argument(
        "--eval_batch_size",
        default=8,
        type=int,
        help="Total batch size for eval.",
    )
    parser.add_argument(
        "--learning_rate",
        default=5e-5,
        type=float,
        help="The initial learning rate for Adam.",
    )
    parser.add_argument(
        "--num_train_epochs",
        default=3.0,
        type=float,
        help="Total number of training epochs to perform.",
    )
    parser.add_argument(
        "--warmup_proportion",
        default=0.1,
        type=float,
        help=
        "Proportion of training to perform linear learning rate warmup for. "
        "E.g., 0.1 = 10%% of training.",
    )
    parser.add_argument(
        "--weight_decay",
        default=0.01,
        type=float,
        help="Weight deay if we apply some.",
    )
    parser.add_argument(
        "--adam_epsilon",
        default=1e-8,
        type=float,
        help="Epsilon for Adam optimizer.",
    )
    parser.add_argument("--max_grad_norm",
                        default=1.0,
                        type=float,
                        help="Max gradient norm.")
    parser.add_argument(
        "--no_cuda",
        action="store_true",
        help="Whether not to use CUDA when available",
    )
    parser.add_argument(
        "--local_rank",
        type=int,
        default=-1,
        help="local_rank for distributed training on gpus",
    )
    parser.add_argument("--seed",
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument(
        "--gradient_accumulation_steps",
        type=int,
        default=1,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass.",
    )
    parser.add_argument(
        "--fp16",
        action="store_true",
        help="Whether to use 16-bit float precision instead of 32-bit",
    )
    parser.add_argument(
        "--fp16_opt_level",
        type=str,
        default="O1",
        help=
        "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
        "See details at https://nvidia.github.io/apex/amp.html",
    )
    parser.add_argument(
        "--loss_scale",
        type=float,
        default=0,
        help=
        "Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n"
        "0 (default value): dynamic loss scaling.\n"
        "Positive power of 2: static loss scaling value.\n",
    )
    parser.add_argument(
        "--server_ip",
        type=str,
        default="",
        help="Can be used for distant debugging.",
    )
    parser.add_argument(
        "--server_port",
        type=str,
        default="",
        help="Can be used for distant debugging.",
    )
    args = parser.parse_args()

    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd

        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    processors = {"ner": NerProcessor}

    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        n_gpu = torch.cuda.device_count()
    else:
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        n_gpu = 1
        # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.distributed.init_process_group(backend="nccl")
    logger.info(
        "device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".
        format(device, n_gpu, bool(args.local_rank != -1), args.fp16))

    if args.gradient_accumulation_steps < 1:
        raise ValueError(
            "Invalid gradient_accumulation_steps parameter: {}, should be >= 1"
            .format(args.gradient_accumulation_steps))

    args.train_batch_size = (args.train_batch_size //
                             args.gradient_accumulation_steps)

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    if not args.do_train and not args.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    if (os.path.exists(args.output_dir) and os.listdir(args.output_dir)
            and args.do_train):
        raise ValueError(
            "Output directory ({}) already exists and is not empty.".format(
                args.output_dir))
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    task_name = args.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()
    label_list = processor.get_labels()
    num_labels = len(label_list) + 1

    tokenizer = BertTokenizer.from_pretrained(args.bert_model,
                                              do_lower_case=args.do_lower_case)

    train_examples = None
    num_train_optimization_steps = 0
    if args.do_train:
        train_examples = processor.get_train_examples(args.data_dir)
        num_train_optimization_steps = (int(
            len(train_examples) / args.train_batch_size /
            args.gradient_accumulation_steps) * args.num_train_epochs)
        if args.local_rank != -1:
            num_train_optimization_steps = (num_train_optimization_steps //
                                            torch.distributed.get_world_size())

    if args.local_rank not in [-1, 0]:
        torch.distributed.barrier(
        )  # Make sure only the first process in distributed training will download model & vocab

    # Prepare model
    config = BertConfig.from_pretrained(args.bert_model,
                                        num_labels=num_labels,
                                        finetuning_task=args.task_name)
    model = Ner.from_pretrained(args.bert_model, from_tf=False, config=config)

    if args.local_rank == 0:
        torch.distributed.barrier(
        )  # Make sure only the first process in distributed training will download model & vocab

    model.to(device)

    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            args.weight_decay,
        },
        {
            "params":
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            "weight_decay":
            0.0,
        },
    ]
    warmup_steps = int(args.warmup_proportion * num_train_optimization_steps)
    optimizer = AdamW(
        optimizer_grouped_parameters,
        lr=args.learning_rate,
        eps=args.adam_epsilon,
    )
    scheduler = WarmupLinearSchedule(
        optimizer,
        warmup_steps=warmup_steps,
        t_total=num_train_optimization_steps,
    )
    if args.fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=args.fp16_opt_level)

    # multi-gpu training (should be after apex fp16 initialization)
    if n_gpu > 1:
        model = torch.nn.DataParallel(model)

    if args.local_rank != -1:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=True,
        )

    global_step = 0
    nb_tr_steps = 0
    tr_loss = 0
    label_map = {i: label for i, label in enumerate(label_list, 1)}
    if args.do_train:
        train_features = convert_examples_to_features(train_examples,
                                                      label_list,
                                                      args.max_seq_length,
                                                      tokenizer)
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(train_examples))
        logger.info("  Batch size = %d", args.train_batch_size)
        logger.info("  Num steps = %d", num_train_optimization_steps)
        all_input_ids = torch.tensor([f.input_ids for f in train_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in train_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in train_features],
                                       dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in train_features],
                                     dtype=torch.long)
        all_valid_ids = torch.tensor([f.valid_ids for f in train_features],
                                     dtype=torch.long)
        all_lmask_ids = torch.tensor([f.label_mask for f in train_features],
                                     dtype=torch.long)
        train_data = TensorDataset(
            all_input_ids,
            all_input_mask,
            all_segment_ids,
            all_label_ids,
            all_valid_ids,
            all_lmask_ids,
        )
        if args.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            train_sampler = DistributedSampler(train_data)
        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=args.train_batch_size)

        model.train()
        for _ in trange(int(args.num_train_epochs), desc="Epoch"):
            tr_loss = 0
            nb_tr_examples, nb_tr_steps = 0, 0
            for step, batch in enumerate(
                    tqdm(train_dataloader, desc="Iteration")):
                batch = tuple(t.to(device) for t in batch)
                input_ids, input_mask, segment_ids, label_ids, valid_ids, l_mask = (
                    batch)
                loss = model(
                    input_ids,
                    segment_ids,
                    input_mask,
                    label_ids,
                    valid_ids,
                    l_mask,
                )
                if n_gpu > 1:
                    loss = loss.mean()  # mean() to average on multi-gpu.
                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps

                if args.fp16:
                    with amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                    torch.nn.utils.clip_grad_norm_(
                        amp.master_params(optimizer), args.max_grad_norm)
                else:
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   args.max_grad_norm)

                tr_loss += loss.item()
                nb_tr_examples += input_ids.size(0)
                nb_tr_steps += 1
                if (step + 1) % args.gradient_accumulation_steps == 0:
                    optimizer.step()
                    scheduler.step()  # Update learning rate schedule
                    model.zero_grad()
                    global_step += 1

        # Save a trained model and the associated configuration
        model_to_save = (model.module if hasattr(model, "module") else model
                         )  # Only save the model it-self
        model_to_save.save_pretrained(args.output_dir)
        tokenizer.save_pretrained(args.output_dir)
        label_map = {i: label for i, label in enumerate(label_list, 1)}
        model_config = {
            "bert_model": args.bert_model,
            "do_lower": args.do_lower_case,
            "max_seq_length": args.max_seq_length,
            "num_labels": len(label_list) + 1,
            "label_map": label_map,
        }
        json.dump(
            model_config,
            open(os.path.join(args.output_dir, "model_config.json"), "w"),
        )
        # Load a trained model and config that you have fine-tuned
    else:
        # Load a trained model and vocabulary that you have fine-tuned
        model = Ner.from_pretrained(args.output_dir)
        tokenizer = BertTokenizer.from_pretrained(
            args.output_dir, do_lower_case=args.do_lower_case)

    model.to(device)

    if args.do_eval and (args.local_rank == -1
                         or torch.distributed.get_rank() == 0):
        if args.eval_on == "dev":
            eval_examples = processor.get_dev_examples(args.data_dir)
        elif args.eval_on == "test":
            eval_examples = processor.get_test_examples(args.data_dir)
        else:
            raise ValueError("eval on dev or test set only")
        eval_features = convert_examples_to_features(eval_examples, label_list,
                                                     args.max_seq_length,
                                                     tokenizer)
        logger.info("***** Running evaluation *****")
        logger.info("  Num examples = %d", len(eval_examples))
        logger.info("  Batch size = %d", args.eval_batch_size)
        all_input_ids = torch.tensor([f.input_ids for f in eval_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in eval_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in eval_features],
                                       dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in eval_features],
                                     dtype=torch.long)
        all_valid_ids = torch.tensor([f.valid_ids for f in eval_features],
                                     dtype=torch.long)
        all_lmask_ids = torch.tensor([f.label_mask for f in eval_features],
                                     dtype=torch.long)
        eval_data = TensorDataset(
            all_input_ids,
            all_input_mask,
            all_segment_ids,
            all_label_ids,
            all_valid_ids,
            all_lmask_ids,
        )
        # Run prediction for full data
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data,
                                     sampler=eval_sampler,
                                     batch_size=args.eval_batch_size)
        model.eval()
        eval_loss, eval_accuracy = 0, 0
        nb_eval_steps, nb_eval_examples = 0, 0
        y_true = []
        y_pred = []
        label_map = {i: label for i, label in enumerate(label_list, 1)}
        for (
                input_ids,
                input_mask,
                segment_ids,
                label_ids,
                valid_ids,
                l_mask,
        ) in tqdm(eval_dataloader, desc="Evaluating"):
            input_ids = input_ids.to(device)
            input_mask = input_mask.to(device)
            segment_ids = segment_ids.to(device)
            valid_ids = valid_ids.to(device)
            label_ids = label_ids.to(device)
            l_mask = l_mask.to(device)

            with torch.no_grad():
                logits = model(
                    input_ids,
                    segment_ids,
                    input_mask,
                    valid_ids=valid_ids,
                    attention_mask_label=l_mask,
                )

            logits = torch.argmax(F.log_softmax(logits, dim=2), dim=2)
            logits = logits.detach().cpu().numpy()
            label_ids = label_ids.to("cpu").numpy()
            input_mask = input_mask.to("cpu").numpy()

            for i, label in enumerate(label_ids):
                temp_1 = []
                temp_2 = []
                for j, m in enumerate(label):
                    if j == 0:
                        continue
                    elif label_ids[i][j] == len(label_map):
                        y_true.append(temp_1)
                        y_pred.append(temp_2)
                        break
                    else:
                        temp_1.append(label_map[label_ids[i][j]])
                        temp_2.append(label_map[logits[i][j]])

        report = classification_report(y_true, y_pred, digits=4)
        logger.info("\n%s", report)
        output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
        with open(output_eval_file, "w") as writer:
            logger.info("***** Eval results *****")
            logger.info("\n%s", report)
            writer.write(report)
Exemple #19
0
    async def accuracy(self, sources: Sources):
        if not os.path.isfile(
            os.path.join(self.parent.config.output_dir, "tf_model.h5")
        ):
            raise ModelNotTrained("Train model before assessing for accuracy.")
        config = self.parent.config._asdict()
        config["strategy"] = self.parent.config.strategy
        config["n_device"] = self.parent.config.n_device
        self.tokenizer = self.tokenizer_class.from_pretrained(
            config["output_dir"], do_lower_case=config["do_lower_case"]
        )
        eval_batch_size = (
            config["per_device_eval_batch_size"] * config["n_device"]
        )
        data_df = await self._preprocess_data(sources)
        eval_dataset, num_eval_examples = self.get_dataset(
            data_df,
            self.tokenizer,
            self.pad_token_label_id,
            eval_batch_size,
            mode="accuracy",
        )
        eval_dataset = self.parent.config.strategy.experimental_distribute_dataset(
            eval_dataset
        )

        checkpoints = []
        results = []

        if config["eval_all_checkpoints"]:
            checkpoints = list(
                os.path.dirname(c)
                for c in sorted(
                    pathlib(
                        config["output_dir"] + "/**/" + TF2_WEIGHTS_NAME
                    ).glob(recursive=True),
                    key=lambda f: int("".join(filter(str.isdigit, f)) or -1),
                )
            )

        if len(checkpoints) == 0:
            checkpoints.append(config["output_dir"])

        self.logger.info("Evaluate the following checkpoints: %s", checkpoints)

        for checkpoint in checkpoints:
            global_step = (
                checkpoint.split("-")[-1]
                if re.match(".*checkpoint-[0-9]", checkpoint)
                else "final"
            )

            with self.parent.config.strategy.scope():
                self.model = self.model_class.from_pretrained(checkpoint)

            y_true, y_pred, eval_loss = self._custom_accuracy(
                eval_dataset,
                self.tokenizer,
                self.model,
                num_eval_examples,
                eval_batch_size,
            )
            report = classification_report(y_true, y_pred, digits=4)

            if global_step:
                results.append(
                    {
                        global_step + "_report": report,
                        global_step + "_loss": eval_loss,
                    }
                )

        output_eval_file = os.path.join(
            config["output_dir"], "accuracy_results.txt"
        )
        # create the report and save in output_dir
        with self.tf.io.gfile.GFile(output_eval_file, "w") as writer:
            for res in results:
                for key, val in res.items():
                    if "loss" in key:
                        self.logger.debug(key + " = " + str(val))
                        writer.write(key + " = " + str(val))
                        writer.write("\n")
                    else:
                        self.logger.debug(key)
                        self.logger.debug("\n" + report)
                        writer.write(key + "\n")
                        writer.write(report)
                        writer.write("\n")
        # Return accuracy for the last checkpoint
        return Accuracy(f1_score(y_true, y_pred))
def test(padded_X,
         X_lengths,
         padded_Y,
         model,
         batch_size,
         longest_sent,
         optimizer,
         label_map,
         device,
         upos=None,
         feats=None,
         fixes=None,
         results_dir=None,
         save_file=True,
         results_file="eval_results.txt"):
    y_corr_all = []
    y_pred_all = []
    for example_i in range(0, len(padded_X), batch_size):
        # TODO Erase this
        # If last batch size != 16 break
        if example_i + batch_size > len(padded_X):
            break
        X_ids = padded_X[example_i:min(example_i + batch_size, len(padded_X))]
        upos_ids, feats_ids, fixes_ids = None, None, None
        if upos is not None:
            upos_ids = upos[example_i:min(example_i + batch_size, len(upos))]
        if feats is not None:
            feats_ids = [
                feat[example_i:min(example_i + batch_size, len(feat))]
                for feat in feats
            ]
        if fixes is not None:
            fixes_ids = fixes[example_i:min(example_i +
                                            batch_size, len(fixes))]

        X_leng = X_lengths[example_i:min(example_i +
                                         batch_size, len(X_lengths))]

        Y_ids = padded_Y[example_i:min(example_i + batch_size, len(padded_Y))]

        if upos is not None:
            if feats is not None:
                if fixes is not None:
                    sorted_data = sorted(zip(X_leng, X_ids, Y_ids, upos_ids,
                                             feats_ids, fixes_ids),
                                         key=lambda pair: pair[0],
                                         reverse=True)

                    X_leng, X_ids, Y_ids, upos_ids, feats_ids, fixes_ids = zip(
                        *sorted_data)
                    X_leng, X_ids, Y_ids, upos_ids, feats_ids, fixes_ids = list(
                        X_leng), list(X_ids), list(Y_ids), list(
                            upos_ids), list(feats_ids), list(fixes_ids)
                else:
                    sorted_data = sorted(zip(X_leng, X_ids, Y_ids, upos_ids,
                                             *feats_ids),
                                         key=lambda pair: pair[0],
                                         reverse=True)

                    X_leng, X_ids, Y_ids, upos_ids, *feats_ids = zip(
                        *sorted_data)
                    X_leng, X_ids, Y_ids, upos_ids = list(X_leng), list(
                        X_ids), list(Y_ids), list(upos_ids)
                    feats_ids = [list(feat_ids) for feat_ids in feats_ids]
            else:
                sorted_data = sorted(zip(X_leng, X_ids, Y_ids, upos_ids),
                                     key=lambda pair: pair[0],
                                     reverse=True)

                X_leng, X_ids, Y_ids, upos_ids = zip(*sorted_data)
                X_leng, X_ids, Y_ids, upos_ids = list(X_leng), list(
                    X_ids), list(Y_ids), list(upos_ids)
        elif feats is not None:
            if fixes is not None:
                sorted_data = sorted(zip(X_leng, X_ids, Y_ids, feats_ids,
                                         fixes_ids),
                                     key=lambda pair: pair[0],
                                     reverse=True)

                X_leng, X_ids, Y_ids, feats_ids, fixes_ids = zip(*sorted_data)
                X_leng, X_ids, Y_ids, feats_ids, fixes_ids = list(
                    X_leng), list(X_ids), list(Y_ids), list(feats_ids), list(
                        fixes_ids)
            else:
                sorted_data = sorted(zip(X_leng, X_ids, Y_ids, *feats_ids),
                                     key=lambda pair: pair[0],
                                     reverse=True)

                X_leng, X_ids, Y_ids, *feats_ids = zip(*sorted_data)
                X_leng, X_ids, Y_ids, = list(X_leng), list(X_ids), list(Y_ids)
                feats_ids = [list(feat_ids) for feat_ids in feats_ids]
        elif fixes is not None:
            sorted_data = sorted(zip(X_leng, X_ids, Y_ids, upos_ids, feats_ids,
                                     fixes_ids),
                                 key=lambda pair: pair[0],
                                 reverse=True)

            X_leng, X_ids, Y_ids, fixes_ids = zip(*sorted_data)
            X_leng, X_ids, Y_ids, fixes_ids = list(X_leng), list(X_ids), list(
                Y_ids), list(fixes_ids)
        else:
            sorted_data = sorted(zip(X_leng, X_ids, Y_ids),
                                 key=lambda pair: pair[0],
                                 reverse=True)

            X_leng, X_ids, Y_ids = zip(*sorted_data)
            X_leng, X_ids, Y_ids = list(X_leng), list(X_ids), list(Y_ids)

        Y_ids = torch.tensor([index for exam in Y_ids for index in exam],
                             dtype=torch.long) - 1
        Y_ids = Y_ids.to(device)

        X_ids = torch.tensor(X_ids, dtype=torch.float32)
        X_ids = X_ids.to(device)
        if upos is not None:
            upos_ids = torch.tensor(upos_ids, dtype=torch.long)
            upos_ids = upos_ids.to(device)
        if feats is not None:
            for feat_i, feat_ids in enumerate(feats_ids):
                feat_ids = torch.tensor(feat_ids, dtype=torch.long)
                feats_ids[feat_i] = feat_ids.to(device)
        if fixes is not None:
            fixes_ids = torch.tensor(fixes_ids, dtype=torch.long)
            fixes_ids = fixes_ids.to(device)

        with torch.no_grad():
            if fixes is not None:
                y_pred = model(X_ids, X_leng, upos_ids, feats_ids, fixes_ids)
            elif feats is not None:
                y_pred = model(X_ids, X_leng, upos_ids, feats_ids)
            elif upos is not None:
                y_pred = model(X_ids, X_leng, upos_ids)
            else:
                y_pred = model(X_ids, X_leng)

        y_pred = y_pred.detach().cpu()

        # reshape out_label_ids, create dict for mapping, map all out_label_ids to out_labels, stack them in one array, classification_report(Y_words, out_labels)
        y_pred_reshaped = torch.argmax(y_pred, dim=1)

        y_pred_reshaped = y_pred_reshaped.view(-1, longest_sent).numpy()
        y_corr_reshaped = Y_ids.view(-1, longest_sent).cpu().numpy()

        y_corr = []
        y_pred_tags = []

        for i_y in range(batch_size):
            y_corr_row = []
            y_pred_tags_row = []
            for j_y in range(X_leng[i_y]):
                y_corr_row.append(label_map[y_corr_reshaped[i_y][j_y]])
                y_pred_tags_row.append(label_map[y_pred_reshaped[i_y][j_y]])
            y_corr.append(y_corr_row)
            y_pred_tags.append(y_pred_tags_row)

        y_corr_all.extend(y_corr)
        y_pred_all.extend(y_pred_tags)

        optimizer.zero_grad()

    report = classification_report(y_corr_all, y_pred_all, digits=4)
    if save_file:
        if not os.path.exists(results_dir):
            os.mkdir(results_dir)
        output_eval_file = os.path.join(results_dir, results_file)

        with open(output_eval_file, "w") as writer:
            writer.write(report)

    print(report)
Exemple #21
0
 def test_inv_classification_report(self):
     print(
         classification_report(self.y_true_inv,
                               self.y_pred_inv,
                               suffix=True))
Exemple #22
0
    def evaluate(self, eval_dataset, output_dir):
        """
        Evaluates the model on eval_dataset.

        Utility function to be used by the eval_model() method. Not intended to be used directly.
        """

        device = self.device
        model = self.model
        args = self.args
        pad_token_label_id = self.pad_token_label_id
        eval_output_dir = output_dir

        results = {}

        eval_sampler = SequentialSampler(eval_dataset)
        eval_dataloader = DataLoader(eval_dataset,
                                     sampler=eval_sampler,
                                     batch_size=args["eval_batch_size"])

        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None
        model.eval()

        for batch in tqdm(eval_dataloader, disable=args["silent"]):
            batch = tuple(t.to(device) for t in batch)

            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "labels": batch[3],
                }
                # XLM and RoBERTa don"t use segment_ids
                if args["model_type"] in ["bert", "xlnet"]:
                    inputs["token_type_ids"] = batch[2]
                outputs = model(**inputs)
                tmp_eval_loss, logits = outputs[:2]

                eval_loss += tmp_eval_loss.mean().item()

            nb_eval_steps += 1

            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs["labels"].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(
                    out_label_ids,
                    inputs["labels"].detach().cpu().numpy(),
                    axis=0)

        eval_loss = eval_loss / nb_eval_steps
        model_outputs = preds
        preds = np.argmax(preds, axis=2)

        label_map = {i: label for i, label in enumerate(self.labels)}

        out_label_list = [[] for _ in range(out_label_ids.shape[0])]
        preds_list = [[] for _ in range(out_label_ids.shape[0])]

        for i in range(out_label_ids.shape[0]):
            for j in range(out_label_ids.shape[1]):
                if out_label_ids[i, j] != pad_token_label_id:
                    out_label_list[i].append(label_map[out_label_ids[i][j]])
                    preds_list[i].append(label_map[preds[i][j]])

        result = {
            "eval_loss": eval_loss,
            "precision": precision_score(out_label_list, preds_list),
            "recall": recall_score(out_label_list, preds_list),
            "f1_score": f1_score(out_label_list, preds_list),
        }

        results.update(result)

        output_eval_file = os.path.join(eval_output_dir, "eval_results.txt")
        with open(output_eval_file, "w") as writer:
            if args["classification_report"]:
                cls_report = classification_report(out_label_list, preds_list)
                writer.write("{}\n".format(cls_report))
            for key in sorted(result.keys()):
                writer.write("{} = {}\n".format(key, str(result[key])))

        return results, model_outputs, preds_list
Exemple #23
0
def main():
    parser = argparse.ArgumentParser()

    ## Required parameters
    parser.add_argument(
        "--data_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The input data dir. Should contain the .tsv files (or other data files) for the task."
    )
    parser.add_argument(
        "--bert_model",
        default=None,
        type=str,
        required=True,
        help="Bert pre-trained model selected in the list: bert-base-uncased, "
        "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, "
        "bert-base-multilingual-cased, bert-base-chinese.")
    parser.add_argument("--task_name",
                        default=None,
                        type=str,
                        required=True,
                        help="The name of the task to train.")
    parser.add_argument("--test_file", default='', type=str, help="Test file")
    parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The output directory where the model predictions and checkpoints will be written."
    )

    ## Other parameters
    parser.add_argument(
        "--cache_dir",
        default="",
        type=str,
        help=
        "Where do you want to store the pre-trained models downloaded from s3")
    parser.add_argument(
        "--max_seq_length",
        default=128,
        type=int,
        help=
        "The maximum total input sequence length after WordPiece tokenization. \n"
        "Sequences longer than this will be truncated, and sequences shorter \n"
        "than this will be padded.")
    parser.add_argument("--validate_per_epoch",
                        default=3,
                        type=int,
                        help="validations number per epoch")
    parser.add_argument("--do_train",
                        action='store_true',
                        help="Whether to run training.")
    parser.add_argument("--do_validation",
                        action='store_true',
                        help="Whether to run validation.")
    parser.add_argument("--do_eval",
                        action='store_true',
                        help="Whether to run eval on the dev set.")
    parser.add_argument(
        "--do_lower_case",
        action='store_true',
        help="Set this flag if you are using an uncased model.")
    parser.add_argument("--train_batch_size",
                        default=32,
                        type=int,
                        help="Total batch size for training.")
    parser.add_argument("--eval_batch_size",
                        default=32,
                        type=int,
                        help="Total batch size for eval.")
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--drop",
                        default=0.1,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--num_train_epochs",
                        default=3.0,
                        type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument(
        "--warmup_proportion",
        default=0.1,
        type=float,
        help=
        "Proportion of training to perform linear learning rate warmup for. "
        "E.g., 0.1 = 10%% of training.")
    parser.add_argument("--weight_decay",
                        default=0.01,
                        type=float,
                        help="Weight deay if we apply some.")
    parser.add_argument("--adam_epsilon",
                        default=1e-8,
                        type=float,
                        help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm",
                        default=1.0,
                        type=float,
                        help="Max gradient norm.")
    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument(
        '--gradient_accumulation_steps',
        type=int,
        default=1,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass."
    )
    parser.add_argument(
        '--fp16',
        action='store_true',
        help="Whether to use 16-bit float precision instead of 32-bit")
    parser.add_argument(
        '--fp16_opt_level',
        type=str,
        default='O1',
        help=
        "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
        "See details at https://nvidia.github.io/apex/amp.html")
    parser.add_argument(
        '--loss_scale',
        type=float,
        default=0,
        help=
        "Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n"
        "0 (default value): dynamic loss scaling.\n"
        "Positive power of 2: static loss scaling value.\n")
    parser.add_argument('--server_ip',
                        type=str,
                        default='',
                        help="Can be used for distant debugging.")
    parser.add_argument('--server_port',
                        type=str,
                        default='',
                        help="Can be used for distant debugging.")
    args = parser.parse_args()

    if os.path.exists(args.output_dir) and os.listdir(
            args.output_dir) and args.do_train:
        raise ValueError(
            "Output directory ({}) already exists and is not empty.".format(
                args.output_dir))
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    if args.do_train:
        logger.addHandler(
            logging.FileHandler(os.path.join(args.output_dir, "train.log"),
                                'w'))
    else:
        logger.addHandler(
            logging.FileHandler(os.path.join(args.output_dir, "eval.log"),
                                'w'))

    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd
        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    processors = {"ner": NerProcessor}

    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        n_gpu = torch.cuda.device_count()
    else:
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        n_gpu = 1
        # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.distributed.init_process_group(backend='nccl')
    logger.info(
        "device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".
        format(device, n_gpu, bool(args.local_rank != -1), args.fp16))

    if args.gradient_accumulation_steps < 1:
        raise ValueError(
            "Invalid gradient_accumulation_steps parameter: {}, should be >= 1"
            .format(args.gradient_accumulation_steps))

    args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    if not args.do_train and not args.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    task_name = args.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()
    label_list = processor.get_tag_labels(args.data_dir)
    global EVAL_TAGS
    EVAL_TAGS = [
        label for label in label_list if label not in ['O', '[CLS]', '[SEP]']
    ]
    # EVAL_TAGS = [f'{x}-{y}' for x in ['B', 'I'] for y in EVAL_TAGS]
    logger.info(EVAL_TAGS)
    num_labels = len(label_list) + 1
    allowed_tags = set(EVAL_TAGS + ['O'])

    do_lower_case = 'uncased' in args.bert_model
    tokenizer = BertTokenizer.from_pretrained(args.bert_model,
                                              do_lower_case=do_lower_case)

    train_examples = None
    num_train_optimization_steps = 0
    if args.do_train:
        train_examples = processor.get_train_examples(args.data_dir)
        num_train_optimization_steps = int(
            len(train_examples) / args.train_batch_size /
            args.gradient_accumulation_steps) * args.num_train_epochs
        if args.local_rank != -1:
            num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size(
            )

    if args.local_rank not in [-1, 0]:
        torch.distributed.barrier(
        )  # Make sure only the first process in distributed training will download model & vocab

    # Prepare model
    config = BertConfig.from_pretrained(args.bert_model,
                                        num_labels=num_labels,
                                        finetuning_task=args.task_name,
                                        hidden_dropout_prob=args.drop)
    print(config)
    model = Ner.from_pretrained(args.bert_model, from_tf=False, config=config)

    if args.local_rank == 0:
        torch.distributed.barrier(
        )  # Make sure only the first process in distributed training will download model & vocab

    model.to(device)

    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        args.weight_decay
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]
    warmup_steps = int(args.warmup_proportion * num_train_optimization_steps)
    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.learning_rate,
                      eps=args.adam_epsilon)
    scheduler = WarmupLinearSchedule(optimizer,
                                     warmup_steps=warmup_steps,
                                     t_total=num_train_optimization_steps)
    if args.fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=args.fp16_opt_level)

    # multi-gpu training (should be after apex fp16 initialization)
    if n_gpu > 1:
        model = torch.nn.DataParallel(model)

    if args.local_rank != -1:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=True)

    global_step = 0
    nb_tr_steps = 0
    tr_loss = 0
    label_map = {i: label for i, label in enumerate(label_list, 1)}
    best_dev = 0.0
    if args.do_validation:
        dev_examples = processor.get_dev_examples(args.data_dir, label_list)
        dev_features = convert_examples_to_features(dev_examples, label_list,
                                                    args.max_seq_length,
                                                    tokenizer)
        logger.info("***** Dev set *****")
        logger.info("  Num examples = %d", len(dev_examples))
        all_input_ids = torch.tensor([f.input_ids for f in dev_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in dev_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in dev_features],
                                       dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in dev_features],
                                     dtype=torch.long)
        all_valid_ids = torch.tensor([f.valid_ids for f in dev_features],
                                     dtype=torch.long)
        all_lmask_ids = torch.tensor([f.label_mask for f in dev_features],
                                     dtype=torch.long)
        dev_data = TensorDataset(all_input_ids, all_input_mask,
                                 all_segment_ids, all_label_ids, all_valid_ids,
                                 all_lmask_ids)
        dev_sampler = SequentialSampler(dev_data)
        dev_dataloader = DataLoader(dev_data,
                                    sampler=dev_sampler,
                                    batch_size=args.eval_batch_size)
        validation_steps = int(
            len(train_examples) /
            args.train_batch_size) // args.validate_per_epoch
    if args.do_train:
        train_features = convert_examples_to_features(train_examples,
                                                      label_list,
                                                      args.max_seq_length,
                                                      tokenizer)
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(train_examples))
        logger.info("  Batch size = %d", args.train_batch_size)
        logger.info("  Num steps = %d", num_train_optimization_steps)
        all_input_ids = torch.tensor([f.input_ids for f in train_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in train_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in train_features],
                                       dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in train_features],
                                     dtype=torch.long)
        all_valid_ids = torch.tensor([f.valid_ids for f in train_features],
                                     dtype=torch.long)
        all_lmask_ids = torch.tensor([f.label_mask for f in train_features],
                                     dtype=torch.long)
        train_data = TensorDataset(all_input_ids, all_input_mask,
                                   all_segment_ids, all_label_ids,
                                   all_valid_ids, all_lmask_ids)
        if args.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            train_sampler = DistributedSampler(train_data)
        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=args.train_batch_size)

        model.train()
        start_time = time.time()
        for epoch in trange(int(args.num_train_epochs), desc="Epoch"):
            tr_loss = 0
            nb_tr_examples, nb_tr_steps = 0, 0
            for step, batch in enumerate(
                    tqdm(train_dataloader, desc="Iteration")):
                batch = tuple(t.to(device) for t in batch)
                input_ids, input_mask, segment_ids, label_ids, valid_ids, l_mask = batch
                loss = model(input_ids,
                             segment_ids,
                             input_mask,
                             label_ids,
                             valid_ids,
                             l_mask,
                             device=device)
                if n_gpu > 1:
                    loss = loss.mean()  # mean() to average on multi-gpu.
                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps

                if args.fp16:
                    with amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                    torch.nn.utils.clip_grad_norm_(
                        amp.master_params(optimizer), args.max_grad_norm)
                else:
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   args.max_grad_norm)

                tr_loss += loss.item()
                nb_tr_examples += input_ids.size(0)
                nb_tr_steps += 1
                if (step + 1) % args.gradient_accumulation_steps == 0:
                    optimizer.step()
                    scheduler.step()  # Update learning rate schedule
                    model.zero_grad()
                    global_step += 1

                if args.do_validation and (step + 1) % validation_steps == 0:
                    logger.info(
                        'Epoch: {}, Step: {} / {}, used_time = {:.2f}s, loss = {:.6f}'
                        .format(epoch, step + 1, len(train_dataloader),
                                time.time() - start_time,
                                tr_loss / nb_tr_steps))
                    model.eval()
                    y_true = []
                    y_pred = []
                    label_map = {
                        i: label
                        for i, label in enumerate(label_list, 1)
                    }
                    label_map[0] = '[PAD]'
                    for batch in tqdm(dev_dataloader, desc='Validation'):
                        batch = tuple(t.to(device) for t in batch)
                        input_ids, input_mask, segment_ids, label_ids, valid_ids, l_mask = batch
                        with torch.no_grad():
                            logits = model(input_ids,
                                           segment_ids,
                                           input_mask,
                                           None,
                                           valid_ids,
                                           l_mask,
                                           device=device)
                        logits = torch.argmax(F.log_softmax(logits, dim=2),
                                              dim=2)
                        logits = logits.detach().cpu().numpy()
                        label_ids = label_ids.to('cpu').numpy()
                        # input_mask = input_mask.to('cpu').numpy()

                        for i, label in enumerate(label_ids):
                            temp_1 = []
                            temp_2 = []
                            for j, m in enumerate(label):
                                if j == 0:
                                    continue
                                elif label_ids[i][j] == len(label_map) - 1:
                                    y_true.append(temp_1)
                                    y_pred.append(temp_2)
                                    break
                                else:
                                    temp_1.append(label_map[label_ids[i][j]])
                                    temp_2.append(label_map[logits[i][j]])

                    y_true_copy = [[
                        x if x in allowed_tags else 'O' for x in y
                    ] for y in y_true]
                    y_pred_copy = [[
                        x if x in allowed_tags else 'O' for x in y
                    ] for y in y_pred]
                    report = classification_report(y_true_copy,
                                                   y_pred_copy,
                                                   digits=6)
                    # report_dict = classification_report(y_true_copy,
                    #                                     y_pred_copy,
                    #                                     output_dict=True)
                    # report_dict = report
                    logger.info("***** Validation results *****")
                    logger.info("\n%s", report)

                    fscore = float([
                        line.strip().split()[4] for line in report.split('\n')
                        if line.strip().startswith('micro')
                    ][0])

                    if fscore > best_dev:
                        logger.info(f'!!!Best dev: {fscore}')
                        logger.info(f'at epoch: {epoch}')
                        best_dev = fscore
                        model_to_save = model.module if hasattr(
                            model,
                            'module') else model  # Only save the model it-self
                        model_to_save.save_pretrained(args.output_dir)
                        tokenizer.save_pretrained(args.output_dir)
                        label_map = {
                            i: label
                            for i, label in enumerate(label_list, 1)
                        }
                        label_map[0] = '[PAD]'
                        model_config = {
                            "bert_model": args.bert_model,
                            "do_lower": args.do_lower_case,
                            "max_seq_length": args.max_seq_length,
                            "num_labels": len(label_list) + 1,
                            "label_map": label_map
                        }
                        json.dump(
                            model_config,
                            open(
                                os.path.join(args.output_dir,
                                             "model_config.json"), "w"))
                    model.train()

    model = Ner.from_pretrained(args.output_dir)
    do_lower_case = 'uncased' in args.bert_model
    tokenizer = BertTokenizer.from_pretrained(args.output_dir,
                                              do_lower_case=do_lower_case)
    model.to(device)

    if args.do_eval and (args.local_rank == -1
                         or torch.distributed.get_rank() == 0):
        args.test_file = os.path.join(
            args.data_dir,
            'test.json') if args.test_file == '' else args.test_file
        eval_examples = processor.get_test_examples(args.test_file)
        eval_features = convert_examples_to_features(eval_examples, label_list,
                                                     args.max_seq_length,
                                                     tokenizer)
        logger.info("***** Running evaluation *****")
        logger.info("  Num examples = %d", len(eval_examples))
        logger.info("  Batch size = %d", args.eval_batch_size)
        all_input_ids = torch.tensor([f.input_ids for f in eval_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in eval_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in eval_features],
                                       dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in eval_features],
                                     dtype=torch.long)
        all_valid_ids = torch.tensor([f.valid_ids for f in eval_features],
                                     dtype=torch.long)
        all_lmask_ids = torch.tensor([f.label_mask for f in eval_features],
                                     dtype=torch.long)
        eval_data = TensorDataset(all_input_ids, all_input_mask,
                                  all_segment_ids, all_label_ids,
                                  all_valid_ids, all_lmask_ids)
        # Run prediction for full data
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data,
                                     sampler=eval_sampler,
                                     batch_size=args.eval_batch_size)
        model.eval()
        eval_loss, eval_accuracy = 0, 0
        nb_eval_steps, nb_eval_examples = 0, 0
        y_true = []
        y_pred = []
        tag_scores = []
        label_map = {i: label for i, label in enumerate(label_list, 1)}
        label_map[0] = '[PAD]'
        for input_ids, input_mask, segment_ids, label_ids, valid_ids, l_mask in tqdm(
                eval_dataloader, desc="Evaluating"):
            input_ids = input_ids.to(device)
            input_mask = input_mask.to(device)
            segment_ids = segment_ids.to(device)
            valid_ids = valid_ids.to(device)
            label_ids = label_ids.to(device)
            l_mask = l_mask.to(device)

            with torch.no_grad():
                logits = model(input_ids,
                               segment_ids,
                               input_mask,
                               valid_ids=valid_ids,
                               attention_mask_label=l_mask,
                               device=device)

            scores = np.max(F.softmax(logits, dim=-1).cpu().numpy(), axis=-1)
            logits = torch.argmax(F.log_softmax(logits, dim=2), dim=2)
            logits = logits.detach().cpu().numpy()
            # scores = scores.detach().cpu().numpy()
            label_ids = label_ids.to('cpu').numpy()
            input_mask = input_mask.to('cpu').numpy()

            for i, label in enumerate(label_ids):
                temp_1 = []
                temp_2 = []
                temp_3 = []
                for j, m in enumerate(label):
                    if j == 0:
                        continue
                    elif label_ids[i][j] == len(label_map) - 1:
                        y_true.append(temp_1)
                        y_pred.append(temp_2)
                        tag_scores.append(temp_3)
                        break
                    else:
                        temp_1.append(label_map[label_ids[i][j]])
                        temp_2.append(label_map[logits[i][j]])
                        temp_3.append(scores[i][j])

        y_true_copy = [[x if x in allowed_tags else 'O' for x in y]
                       for y in y_true]
        y_pred_copy = [[x if x in allowed_tags else 'O' for x in y]
                       for y in y_pred]

        # report = classification_report(y_true_copy,
        #                                y_pred_copy, digits=4)

        report = 'all scores are 0!\n'
        logger.info("\n%s", report)
        output_eval_file = os.path.join(
            args.output_dir,
            f"{args.test_file.split('/')[-1]}_eval_results.txt")
        output_preds_file = os.path.join(
            args.output_dir,
            f"{args.test_file.split('/')[-1]}_predictions.tsv")
        with open(output_eval_file, "w") as writer:
            logger.info("***** Eval results *****")
            logger.info("\n%s", report)
            writer.write(report)

        prediction_results = {
            'id': [ex.guid for ex in eval_examples],
            'token': [ex.text_a for ex in eval_examples],
            'tag_label': [' '.join(ex.label) for ex in eval_examples],
            'tag_pred': [' '.join(pred) for pred in y_pred],
            'scores':
            [' '.join([str(x) for x in score]) for score in tag_scores]
        }
        pd.DataFrame(prediction_results).to_csv(output_preds_file,
                                                sep='\t',
                                                index=False)
def main():
    args: ModelArguments = get_arguments()
    process = NerProcessor(args.data_dir)

    label_list = process.get_labels()
    num_labels = len(label_list) + 1

    if os.path.exists(args.output_dir) and os.listdir(
            args.output_dir) and args.do_train:
        raise ValueError(
            'Output directory ({}) already exist and the dir is not empty')
    if not args.output_dir:
        os.makedirs(args.output_dir)

    if args.do_train:
        tokenizer = FullTokenizer(os.path.join(args.bert_model, "vocab.txt"),
                                  args.do_lower_case)

    if args.multi_gpu:
        if len(args.gpu.split(',')) == 1:
            strategy = tf.distribute.MirroredStrategy()
        else:
            # build the gpu device name arr
            gpus = [f'/gpu:{gpu}' for gpu in args.gpu.split(',')]
            strategy = tf.distribute.MirroredStrategy(devices=gpus)
    else:
        strategy = tf.distribute.OneDeviceStrategy(device=args.gpu)

    if args.do_train:
        train_examples = process.get_train_examples()

        # optimization total steps -> learning_rate scheduler, weight decay, warmup learning rate
        num_train_optimization_steps = int(
            len(train_examples) /
            args.train_batch_size) * args.num_train_epochs

        warmup_steps = int(args.warmup_proportion *
                           num_train_optimization_steps)

        # keep the final learning should be zero
        learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(
            initial_learning_rate=args.learning_rate,
            decay_steps=num_train_optimization_steps,
            end_learning_rate=0.)

        if warmup_steps:
            # layer norm and bias should not weight decay
            learning_rate_fn = AdamWeightDecay(
                learning_rate=args.learning_rate,
                weight_decay_rate=args.weight_decay,
                beta_1=0.9,
                beta_2=0.99,
                epsilon=args.adam_epsilon,
                exclude_from_weight_decay=['layer_norm', 'bias'])

        with strategy.scope():
            ner = BertNer(args.bert_model, tf.float32, args.num_labels,
                          args.max_seq_length)
            # can define the specific meaning of the reduction
            loss_fct = tf.keras.losses.SparseCategoricalCrossentropy(
                reduction=tf.keras.losses.Reduction.NONE)

    label_map = {label: index for index, label in enumerate(label_list, 1)}

    if args.do_train:
        train_features = convert_examples_to_features(train_examples,
                                                      label_list,
                                                      args.max_seq_length,
                                                      tokenizer)
        logger.info('*** Running training ***')
        logger.info('  Num Examples = %d', len(train_examples))
        logger.info('  Batch Size = %d', len(args.train_batch_size))
        logger.info('  Num Steps = %d', len(num_train_optimization_steps))
        all_input_ids = tf.data.Dataset.from_tensor_slices(
            np.asarray([f.input_ids for f in train_features]))
        all_input_mask = tf.data.Dataset.from_tensor_slices(
            np.asarray([f.input_mask for f in train_features]))

        all_label_ids = tf.data.Dataset.from_tensor_slices(
            np.asarray([f.label_ids for f in train_features]))
        all_label_mask = tf.data.Dataset.from_tensor_slices(
            np.asarray([f.label_mask for f in train_features]))

        all_valid_ids = tf.data.Dataset.from_tensor_slices(
            np.asarray([f.valid_ids for f in train_features]))
        all_segment_ids = tf.data.Dataset.from_tensor_slices(
            np.asarray([f.segment_ids for f in train_features]))

        train_data = tf.data.Dataset.zip(
            (all_input_ids, all_input_mask, all_segment_ids, all_valid_ids,
             all_label_ids, all_label_mask))

        # set the shuffle buffer size, reshuffle the train data in each iteration
        shuffled_train_data = train_data.shuffle(
            buffer_size=int(len(train_features) * 0.1),
            seed=args.seed,
            reshuffle_each_iteration=True).batch(args.train_batch_size)

        distributed_data = strategy.experimental_distribute_dataset(
            shuffled_train_data)
        loss_metric = tf.keras.metrics.Mean()

        epoch_bar = master_bar(range(1))
        optimizer: tf.keras.optimizers.Optimizer = None

        def train_steps(input_ids, input_mask, segment_id, valid_ids,
                        label_ids, label_mask):
            def step_fn(_input_ids, _input_mask, _segment_id, _valid_ids,
                        _label_ids, _label_mask):
                with tf.GradientTape() as tape:
                    # _input_ids, one-axis, which will be run on pattern
                    output = ner(_input_ids,
                                 _input_mask,
                                 _segment_id,
                                 _label_ids,
                                 training=True)

                    # flatten all of the outputs
                    _label_mask = tf.reshape(_label_mask, (-1))
                    output = tf.reshape(output, (-1, num_labels))
                    output = tf.boolean_mask(output, _input_mask)

                    _label_ids = tf.reshape(_label_ids, (-1, ))
                    _label_ids = tf.boolean_mask(_label_ids, _label_mask)

                    cross_entropy = loss_fct(_label_ids, output)

                    # this is for single one train data
                    loss = tf.reduce_sum(
                        cross_entropy) * 1. / args.train_batch_size

                gradients = tape.gradient(loss, ner.trainable_variables)
                optimizer.apply_gradients(
                    grads_and_vars=zip(gradients, ner.trainable_variables))

                return cross_entropy

            # 在多个gpu上并行跑训练数据
            per_example_loss = strategy.experimental_run_v2(
                step_fn,
                args=(input_ids, input_mask, segment_id, valid_ids, label_ids,
                      label_mask))
            mean_loss = strategy.reduce(tf.distribute)
            return mean_loss

        pb_max_length = math.ceil(len(train_features) / args.train_batch_size)
        for epoch in epoch_bar:
            with strategy.scope():
                for (input_ids, input_mask, segment_ids, valid_ids, label_ids,
                     label_mask) in progress_bar(distributed_data,
                                                 total=pb_max_length,
                                                 parent=epoch_bar):
                    loss = train_steps(input_ids, input_mask, segment_ids,
                                       valid_ids, label_ids, label_mask)
                    loss_metric(loss)
                    epoch_bar.child.comment = f'loss: {loss}'
                loss_metric.reset_states()

        ner.save_weights(os.path.join(args.output_dir, 'model.h5'))

    if args.do_eval:
        tokenizer = FullTokenizer(os.path.join(args.bert_model, 'vocab.txt'),
                                  do_lower_case=args.do_lower_case)
        ner = BertNer(args.bert_model, tf.float32, args.num_labels,
                      args.max_seq_length)

        # create example eval data to build the model
        ids = tf.ones((1, 128), dtype=tf.float32)
        ner(ids, ids, ids, ids, ids, training=False)
        ner.load_weights(os.path.join(args.output_dir, 'model.h5'))

        # load the data
        if args.eval_on == 'dev':
            eval_examples = process.get_dev_examples()
        elif args.eval_on == 'test':
            eval_examples = process.get_test_examples()
        else:
            raise KeyError(f'eval_on arguments is expected in [dev, test]')

        eval_features = convert_examples_to_features(eval_examples, label_list,
                                                     args.max_seq_length,
                                                     tokenizer)

        # print the eval info
        logger.info('*** Eval Examples ***')
        logger.info('  Num Examples = %d', len(eval_features))
        logger.info('  Batch Size = %d', args.eval_batch_size)

        all_input_ids = tf.data.Dataset.from_tensor_slices(
            [f.input_ids for f in eval_features])
        all_input_mask = tf.data.Dataset.from_tensor_slices(
            [f.input_mask for f in eval_features])
        all_segment_ids = tf.data.Dataset.from_tensor_slices(
            [f.segment_ids for f in eval_features])
        all_valid_ids = tf.data.Dataset.from_tensor_slices(
            [f.valid_ids for f in eval_features])
        all_label_ids = tf.data.Dataset.from_tensor_slices(
            [f.label_ids for f in eval_features])
        all_label_mask = tf.data.Dataset.from_tensor_slices(
            [f.label_mask for f in eval_features])

        eval_data = tf.data.Dataset.zip(
            (all_input_ids, all_input_mask, all_segment_ids, all_valid_ids,
             all_label_ids, all_label_mask)).batch(args.eval_batch_size)

        loss_metric = tf.metrics.Mean()
        epoch_bar = master_bar(range(1))
        processor_bar_length = math.ceil(
            len(eval_features) / args.eval_batch_size)
        y_true, y_predict = [], []
        for epoch in epoch_bar:
            for (input_ids, input_mask, segment_ids, valid_ids, label_ids,
                 label_mask) in progress_bar(eval_data,
                                             total=processor_bar_length,
                                             parent=epoch_bar):
                logits = ner(input_ids,
                             input_mask,
                             segment_ids,
                             valid_ids,
                             training=False)
                logits = tf.argmax(logits, axis=-1)
                label_predict = tf.boolean_mask(logits, label_mask)
                y_true.append(label_ids)
                y_predict.append(label_predict)

        report = classification_report(y_true, y_predict, digits=4)
        output_eval_file = os.path.join(args.output_dir, 'eval_result.txt')
        with open(output_eval_file, 'w', encoding='utf-8') as f:
            logger.info('*** Eval Result ***')
            logger.info(report)
            f.write(report)
Exemple #25
0
def evaluate(opt):
    # set config
    config = load_config(opt)
    if opt.num_threads > 0: torch.set_num_threads(opt.num_threads)
    config['opt'] = opt
    logger.info("%s", config)

    # set path
    set_path(config)

    # prepare test dataset
    test_loader = prepare_datasets(config)

    # load pytorch model checkpoint
    checkpoint = load_checkpoint(config)

    # prepare model and load parameters
    model = load_model(config, checkpoint)
    model.eval()

    # convert to onnx format
    if opt.convert_onnx:
        (x, y) = next(iter(test_loader))
        x = to_device(x, opt.device)
        y = to_device(y, opt.device)
        convert_onnx(config, model, x)
        check_onnx(config)
        logger.info("[ONNX model saved at {}".format(opt.onnx_path))
        # quantize onnx
        if opt.quantize_onnx:
            quantize_onnx(opt.onnx_path, opt.quantized_onnx_path)
            logger.info("[Quantized ONNX model saved at {}".format(
                opt.quantized_onnx_path))
        return

    # load onnx model for using onnxruntime
    if opt.enable_ort:
        import onnxruntime as ort
        sess_options = ort.SessionOptions()
        sess_options.inter_op_num_threads = opt.num_threads
        sess_options.intra_op_num_threads = opt.num_threads
        ort_session = ort.InferenceSession(opt.onnx_path,
                                           sess_options=sess_options)

    # enable to use dynamic quantized model (pytorch>=1.3.0)
    if opt.enable_dqm and opt.device == 'cpu':
        model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear},
                                                    dtype=torch.qint8)
        print(model)

    # evaluation
    preds = None
    ys = None
    n_batches = len(test_loader)
    total_examples = 0
    whole_st_time = time.time()
    first_time = time.time()
    first_examples = 0
    total_duration_time = 0.0
    with torch.no_grad():
        for i, (x, y) in enumerate(tqdm(test_loader, total=n_batches)):
            start_time = time.time()
            x = to_device(x, opt.device)
            y = to_device(y, opt.device)

            if opt.enable_ort:
                x = to_numpy(x)
                if config['emb_class'] == 'glove':
                    ort_inputs = {
                        ort_session.get_inputs()[0].name: x[0],
                        ort_session.get_inputs()[1].name: x[1]
                    }
                    if opt.use_char_cnn:
                        ort_inputs[ort_session.get_inputs()[2].name] = x[2]
                if config['emb_class'] in [
                        'bert', 'distilbert', 'albert', 'roberta', 'bart',
                        'electra'
                ]:
                    if config['emb_class'] in ['distilbert', 'bart']:
                        ort_inputs = {
                            ort_session.get_inputs()[0].name: x[0],
                            ort_session.get_inputs()[1].name: x[1]
                        }
                    else:
                        ort_inputs = {
                            ort_session.get_inputs()[0].name: x[0],
                            ort_session.get_inputs()[1].name: x[1],
                            ort_session.get_inputs()[2].name: x[2]
                        }
                    if opt.bert_use_pos:
                        ort_inputs[ort_session.get_inputs()[3].name] = x[3]
                if opt.use_crf:
                    logits, prediction = ort_session.run(None, ort_inputs)
                    prediction = to_device(torch.tensor(prediction),
                                           opt.device)
                    logits = to_device(torch.tensor(logits), opt.device)
                else:
                    logits = ort_session.run(None, ort_inputs)[0]
                    logits = to_device(torch.tensor(logits), opt.device)
            else:
                if opt.use_crf: logits, prediction = model(x)
                else: logits = model(x)

            if preds is None:
                if opt.use_crf: preds = to_numpy(prediction)
                else: preds = to_numpy(logits)
                ys = to_numpy(y)
            else:
                if opt.use_crf:
                    preds = np.append(preds, to_numpy(prediction), axis=0)
                else:
                    preds = np.append(preds, to_numpy(logits), axis=0)
                ys = np.append(ys, to_numpy(y), axis=0)
            cur_examples = y.size(0)
            total_examples += cur_examples
            if i == 0:  # first one may take longer time, so ignore in computing duration.
                first_time = float((time.time() - first_time) * 1000)
                first_examples = cur_examples
            if opt.num_examples != 0 and total_examples >= opt.num_examples:
                logger.info("[Stop Evaluation] : up to the {} examples".format(
                    total_examples))
                break
            duration_time = float((time.time() - start_time) * 1000)
            if i != 0: total_duration_time += duration_time
            '''
            logger.info("[Elapsed Time] : {}ms".format(duration_time))
            '''
    whole_time = float((time.time() - whole_st_time) * 1000)
    avg_time = (whole_time - first_time) / (total_examples - first_examples)
    if not opt.use_crf: preds = np.argmax(preds, axis=2)
    # compute measure using seqeval
    labels = model.labels
    ys_lbs = [[] for _ in range(ys.shape[0])]
    preds_lbs = [[] for _ in range(ys.shape[0])]
    pad_label_id = config['pad_label_id']
    for i in range(ys.shape[0]):  # foreach sentence
        for j in range(ys.shape[1]):  # foreach token
            if ys[i][j] != pad_label_id:
                ys_lbs[i].append(labels[ys[i][j]])
                preds_lbs[i].append(labels[preds[i][j]])
    ret = {
        "precision": precision_score(ys_lbs, preds_lbs),
        "recall": recall_score(ys_lbs, preds_lbs),
        "f1": f1_score(ys_lbs, preds_lbs),
        "report": classification_report(ys_lbs, preds_lbs, digits=4),
    }
    print(ret['report'])
    f1 = ret['f1']
    # write predicted labels to file
    default_label = config['default_label']
    write_prediction(opt, ys, preds, labels, pad_label_id, default_label)

    logger.info("[F1] : {}, {}".format(f1, total_examples))
    logger.info("[Elapsed Time] : {} examples, {}ms, {}ms on average".format(
        total_examples, whole_time, avg_time))
    logger.info(
        "[Elapsed Time(total_duration_time, average)] : {}ms, {}ms".format(
            total_duration_time, total_duration_time / (total_examples - 1)))
Exemple #26
0
def report(labels, preds):
    return classification_report(labels, preds)
Exemple #27
0
                                      attention_mask=b_input_mask,
                                      labels=b_labels)
                logits = model(b_input_ids,
                               token_type_ids=None,
                               attention_mask=b_input_mask)
            logits = logits.detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()
            # print(np.argmax(logits, axis=2).shape)
            predictions.extend([list(p) for p in np.argmax(logits, axis=2)])
            true_labels.extend(label_ids)
            tmp_eval_accuracy = flat_accuracy(logits, label_ids)

            eval_loss += tmp_eval_loss.mean().item()
            eval_accuracy += tmp_eval_accuracy

            nb_eval_examples += b_input_ids.size(0)
            nb_eval_steps += 1
        eval_loss = eval_loss / nb_eval_steps
        print("Validation loss: {}".format(eval_loss))
        print("Validation Accuracy: {}".format(eval_accuracy / nb_eval_steps))
        pred_tags = [[id2label[p_i] for p_i in p] for p in predictions]
        valid_tags = [[id2label[l_i] for l_i in l] for l in true_labels]
        with open("logs/logs_epoch_{}.txt".format(epoch), "w") as f:
            for tokens, pred, valid in zip(tokenized_test_text, pred_tags,
                                           valid_tags):
                f.write(" ".join(tokens) + "\n")
                f.write(" ".join(pred) + "\n")
                f.write(" ".join(valid) + "\n\n")
        print("F1-Score: {}".format(f1_score(pred_tags, valid_tags)))
        print(classification_report(pred_tags, valid_tags))
def main():
    parser = argparse.ArgumentParser()

    ## Required parameters
    parser.add_argument(
        "--data_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The input data dir. Should contain the .tsv files (or other data files) for the task."
    )
    parser.add_argument(
        "--bert_model",
        default=None,
        type=str,
        required=True,
        help="Bert pre-trained model selected in the list: bert-base-uncased, "
        "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, "
        "bert-base-multilingual-cased, bert-base-chinese.")
    parser.add_argument("--task_name",
                        default=None,
                        type=str,
                        required=True,
                        help="The name of the task to train.")
    parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The output directory where the model predictions and checkpoints will be written."
    )

    ## Other parameters
    parser.add_argument(
        "--cache_dir",
        default="",
        type=str,
        help=
        "Where do you want to store the pre-trained models downloaded from s3")
    parser.add_argument(
        "--max_seq_length",
        default=128,
        type=int,
        help=
        "The maximum total input sequence length after WordPiece tokenization. \n"
        "Sequences longer than this will be truncated, and sequences shorter \n"
        "than this will be padded.")
    parser.add_argument(
        "--percent",
        default=100,
        type=int,
        help="The percentage of examples used in the training data.\n")
    parser.add_argument("--do_train",
                        action='store_true',
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        action='store_true',
                        help="Whether to run eval on the dev set.")
    parser.add_argument(
        "--do_lower_case",
        action='store_true',
        help="Set this flag if you are using an uncased model.")
    parser.add_argument("--train_batch_size",
                        default=32,
                        type=int,
                        help="Total batch size for training.")
    parser.add_argument("--eval_batch_size",
                        default=8,
                        type=int,
                        help="Total batch size for eval.")
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--num_train_epochs",
                        default=3.0,
                        type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument(
        "--warmup_proportion",
        default=0.1,
        type=float,
        help=
        "Proportion of training to perform linear learning rate warmup for. "
        "E.g., 0.1 = 10%% of training.")
    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument(
        '--gradient_accumulation_steps',
        type=int,
        default=1,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass."
    )
    parser.add_argument(
        '--fp16',
        action='store_true',
        help="Whether to use 16-bit float precision instead of 32-bit")
    parser.add_argument(
        '--pretrain',
        action='store_true',
        help="Whether to load a pre-trained model for continuing training")
    parser.add_argument('--pretrained_model_file',
                        type=str,
                        help="The path of the pretrained_model_file")
    parser.add_argument(
        '--loss_scale',
        type=float,
        default=0,
        help=
        "Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n"
        "0 (default value): dynamic loss scaling.\n"
        "Positive power of 2: static loss scaling value.\n")
    parser.add_argument('--server_ip',
                        type=str,
                        default='',
                        help="Can be used for distant debugging.")
    parser.add_argument('--server_port',
                        type=str,
                        default='',
                        help="Can be used for distant debugging.")
    args = parser.parse_args()

    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd
        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    processors = {"ner": NerProcessor}

    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        n_gpu = torch.cuda.device_count()
    else:
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        n_gpu = 1
        # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.distributed.init_process_group(backend='nccl')
    logger.info(
        "device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".
        format(device, n_gpu, bool(args.local_rank != -1), args.fp16))

    if args.gradient_accumulation_steps < 1:
        raise ValueError(
            "Invalid gradient_accumulation_steps parameter: {}, should be >= 1"
            .format(args.gradient_accumulation_steps))

    args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    if not args.do_train and not args.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    if os.path.exists(args.output_dir) and os.listdir(
            args.output_dir) and args.do_train:
        raise ValueError(
            "Output directory ({}) already exists and is not empty.".format(
                args.output_dir))
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    task_name = args.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()
    label_list = processor.get_labels()
    num_labels = len(label_list) + 1

    tokenizer = BertTokenizer.from_pretrained(args.bert_model,
                                              do_lower_case=args.do_lower_case)

    train_examples = None
    num_train_optimization_steps = None
    if args.do_train:
        train_examples = processor.get_train_examples(args.data_dir)
        train_examples = train_examples[:int(
            len(train_examples) * args.percent / 100)]
        num_train_optimization_steps = int(
            len(train_examples) / args.train_batch_size /
            args.gradient_accumulation_steps) * args.num_train_epochs
        if args.local_rank != -1:
            num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size(
            )

    # Prepare model
    cache_dir = args.cache_dir if args.cache_dir else os.path.join(
        str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{}'.format(
            args.local_rank))
    model = BertForTokenClassification.from_pretrained(args.bert_model,
                                                       cache_dir=cache_dir,
                                                       num_labels=num_labels)
    if args.pretrain:
        # Load a pre-trained model
        print('load a pre-trained model from ' + args.pretrained_model_file)
        pretrained_state_dict = torch.load(args.pretrained_model_file)
        model_state_dict = model.state_dict()
        print('pretrained_state_dict', pretrained_state_dict.keys())
        print('model_state_dict', model_state_dict.keys())
        pretrained_state = {
            k: v
            for k, v in pretrained_state_dict.items() if k in model_state_dict
            and v.size() == model_state_dict[k].size()
        }
        model_state_dict.update(pretrained_state)
        print('updated_state_dict', model_state_dict.keys())
        model.load_state_dict(model_state_dict)
        model.to(device)
    if args.fp16:
        model.half()
    model.to(device)
    if args.local_rank != -1:
        try:
            from apex.parallel import DistributedDataParallel as DDP
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training."
            )

        model = DDP(model)
    elif n_gpu > 1:
        model = torch.nn.DataParallel(model)

    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]
    if args.fp16:
        try:
            from apex.optimizers import FP16_Optimizer
            from apex.optimizers import FusedAdam
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training."
            )

        optimizer = FusedAdam(optimizer_grouped_parameters,
                              lr=args.learning_rate,
                              bias_correction=False,
                              max_grad_norm=1.0)
        if args.loss_scale == 0:
            optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
        else:
            optimizer = FP16_Optimizer(optimizer,
                                       static_loss_scale=args.loss_scale)

    else:
        optimizer = BertAdam(optimizer_grouped_parameters,
                             lr=args.learning_rate,
                             warmup=args.warmup_proportion,
                             t_total=num_train_optimization_steps)

    global_step = 0
    if args.do_train:
        train_features = convert_examples_to_features(train_examples,
                                                      label_list,
                                                      args.max_seq_length,
                                                      tokenizer)
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(train_examples))
        logger.info("  Batch size = %d", args.train_batch_size)
        logger.info("  Num steps = %d", num_train_optimization_steps)
        all_input_ids = torch.tensor([f.input_ids for f in train_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in train_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in train_features],
                                       dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in train_features],
                                     dtype=torch.long)
        train_data = TensorDataset(all_input_ids, all_input_mask,
                                   all_segment_ids, all_label_ids)
        if args.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            train_sampler = DistributedSampler(train_data)
        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=args.train_batch_size)

        model.train()
        for _ in trange(int(args.num_train_epochs), desc="Epoch"):
            tr_loss = 0
            nb_tr_examples, nb_tr_steps = 0, 0
            for step, batch in enumerate(
                    tqdm(train_dataloader, desc="Iteration")):
                batch = tuple(t.to(device) for t in batch)
                input_ids, input_mask, segment_ids, label_ids = batch
                loss = model(input_ids, segment_ids, input_mask, label_ids)
                if n_gpu > 1:
                    loss = loss.mean()  # mean() to average on multi-gpu.
                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps

                if args.fp16:
                    optimizer.backward(loss)
                else:
                    loss.backward()

                tr_loss += loss.item()
                nb_tr_examples += input_ids.size(0)
                nb_tr_steps += 1
                if (step + 1) % args.gradient_accumulation_steps == 0:
                    if args.fp16:
                        # modify learning rate with special warm up BERT uses
                        # if args.fp16 is False, BertAdam is used that handles this automatically
                        lr_this_step = args.learning_rate * warmup_linear(
                            global_step / num_train_optimization_steps,
                            args.warmup_proportion)
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr_this_step
                    optimizer.step()
                    optimizer.zero_grad()
                    global_step += 1
            print('train loss', tr_loss)

        # Save a trained model and the associated configuration
        model_to_save = model.module if hasattr(
            model, 'module') else model  # Only save the model it-self
        output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME)
        torch.save(model_to_save.state_dict(), output_model_file)
        output_config_file = os.path.join(args.output_dir, CONFIG_NAME)
        with open(output_config_file, 'w') as f:
            f.write(model_to_save.config.to_json_string())
        label_map = {i: label for i, label in enumerate(label_list, 0)}
        model_config = {
            "bert_model": args.bert_model,
            "do_lower": args.do_lower_case,
            "max_seq_length": args.max_seq_length,
            "num_labels": len(label_list) + 1,
            "label_map": label_map
        }
        json.dump(
            model_config,
            open(os.path.join(args.output_dir, "model_config.json"), "w"))
        # Load a trained model and config that you have fine-tuned
    else:
        output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME)
        model_state_dict = torch.load(output_model_file)
        model = BertForTokenClassification.from_pretrained(
            args.bert_model,
            state_dict=model_state_dict,
            num_labels=num_labels)
        model.load_state_dict(torch.load(output_model_file))

    model.to(device)

    if args.do_eval and (args.local_rank == -1
                         or torch.distributed.get_rank() == 0):
        eval_examples = processor.get_dev_examples(args.data_dir)
        eval_features = convert_examples_to_features(eval_examples, label_list,
                                                     args.max_seq_length,
                                                     tokenizer)
        logger.info("***** Running evaluation *****")
        logger.info("  Num examples = %d", len(eval_examples))
        logger.info("  Batch size = %d", args.eval_batch_size)
        all_input_ids = torch.tensor([f.input_ids for f in eval_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in eval_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in eval_features],
                                       dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in eval_features],
                                     dtype=torch.long)
        eval_data = TensorDataset(all_input_ids, all_input_mask,
                                  all_segment_ids, all_label_ids)
        # Run prediction for full data
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data,
                                     sampler=eval_sampler,
                                     batch_size=args.eval_batch_size)
        model.eval()
        y_true = []
        y_pred = []
        label_map = {i: label for i, label in enumerate(label_list, 0)}
        for input_ids, input_mask, segment_ids, label_ids in tqdm(
                eval_dataloader, desc="Evaluating"):
            input_ids = input_ids.to(device)
            input_mask = input_mask.to(device)
            segment_ids = segment_ids.to(device)
            label_ids = label_ids.to(device)

            with torch.no_grad():
                logits = model(input_ids, segment_ids, input_mask)

            logits = torch.argmax(logits, dim=2)
            logits = logits.detach().cpu().numpy()
            label_ids = label_ids.to('cpu').numpy()
            input_mask = input_mask.to('cpu').numpy()
            for i, mask in enumerate(input_mask):
                temp_1 = []
                temp_2 = []
                for j, m in enumerate(mask):
                    if j == 0:
                        continue
                    if m:
                        if label_map[label_ids[i][j]] != "X":
                            temp_1.append(label_map[label_ids[i][j]])
                            temp_2.append(label_map[logits[i][j]])
                    else:
                        temp_1.pop()
                        temp_2.pop()
                        break
                if temp_1[-1] == '[SEP]':
                    temp_1.pop()
                    temp_2.pop()
                y_true.append(temp_1)
                y_pred.append(temp_2)

        report = classification_report(y_true, y_pred, digits=4)
        prediction_file = os.path.join(args.output_dir, 'predictions.txt')
        write_predictions(eval_examples, y_true, y_pred, prediction_file)
        output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
        with open(output_eval_file, "w") as writer:
            logger.info("***** Eval results *****")
            logger.info("\n%s", report)
            writer.write(report)
Exemple #29
0
def main(_):
    logging.set_verbosity(logging.INFO)
    args = flags.FLAGS.flag_values_dict()

    if (os.path.exists(args["output_dir"]) and os.listdir(args["output_dir"])
            and args["do_train"] and not args["overwrite_output_dir"]):
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome."
            .format(args["output_dir"]))

    if args["fp16"]:
        tf.config.optimizer.set_experimental_options(
            {"auto_mixed_precision": True})

    if args["tpu"]:
        resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
            tpu=args["tpu"])
        tf.config.experimental_connect_to_cluster(resolver)
        tf.tpu.experimental.initialize_tpu_system(resolver)
        strategy = tf.distribute.experimental.TPUStrategy(resolver)
        args["n_device"] = args["num_tpu_cores"]
    elif len(args["gpus"].split(",")) > 1:
        args["n_device"] = len(
            [f"/gpu:{gpu}" for gpu in args["gpus"].split(",")])
        strategy = tf.distribute.MirroredStrategy(
            devices=[f"/gpu:{gpu}" for gpu in args["gpus"].split(",")])
    elif args["no_cuda"]:
        args["n_device"] = 1
        strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")
    else:
        args["n_device"] = len(args["gpus"].split(","))
        strategy = tf.distribute.OneDeviceStrategy(device="/gpu:" +
                                                   args["gpus"].split(",")[0])

    logging.warning(
        "n_device: %s, distributed training: %s, 16-bits training: %s",
        args["n_device"],
        bool(args["n_device"] > 1),
        args["fp16"],
    )

    labels = get_labels(args["labels"])
    num_labels = len(labels)
    pad_token_label_id = -1
    # IBO
    print(args["config_name"]
          if args["config_name"] else args["model_name_or_path"])
    config = AutoConfig.from_pretrained(
        args["config_name"]
        if args["config_name"] else args["model_name_or_path"],
        num_labels=num_labels,
        cache_dir=args["cache_dir"],
    )

    logging.info("Training/evaluation parameters %s", args)
    args["model_type"] = config.model_type

    # Training
    if args["do_train"]:
        tokenizer = AutoTokenizer.from_pretrained(
            args["tokenizer_name"]
            if args["tokenizer_name"] else args["model_name_or_path"],
            do_lower_case=args["do_lower_case"],
            cache_dir=args["cache_dir"],
        )

        with strategy.scope():
            model = TFAutoModelForTokenClassification.from_pretrained(
                args["model_name_or_path"],
                from_pt=bool(".bin" in args["model_name_or_path"]),
                config=config,
                cache_dir=args["cache_dir"],
            )

        train_batch_size = args["per_device_train_batch_size"] * args[
            "n_device"]
        train_dataset, num_train_examples = load_and_cache_examples(
            args,
            tokenizer,
            labels,
            pad_token_label_id,
            train_batch_size,
            mode="train")
        train_dataset = strategy.experimental_distribute_dataset(train_dataset)
        train(
            args,
            strategy,
            train_dataset,
            tokenizer,
            model,
            num_train_examples,
            labels,
            train_batch_size,
            pad_token_label_id,
        )

        os.makedirs(args["output_dir"], exist_ok=True)

        logging.info("Saving model to %s", args["output_dir"])

        model.save_pretrained(args["output_dir"])
        tokenizer.save_pretrained(args["output_dir"])

    # Evaluation
    if args["do_eval"]:
        tokenizer = AutoTokenizer.from_pretrained(
            args["output_dir"], do_lower_case=args["do_lower_case"])
        checkpoints = []
        results = []

        if args["eval_all_checkpoints"]:
            checkpoints = list(
                os.path.dirname(c) for c in sorted(
                    glob.glob(args["output_dir"] + "/**/" + TF2_WEIGHTS_NAME,
                              recursive=True),
                    key=lambda f: int("".join(filter(str.isdigit, f)) or -1),
                ))

        logging.info("Evaluate the following checkpoints: %s", checkpoints)

        if len(checkpoints) == 0:
            checkpoints.append(args["output_dir"])

        for checkpoint in checkpoints:
            global_step = checkpoint.split("-")[-1] if re.match(
                ".*checkpoint-[0-9]", checkpoint) else "final"

            with strategy.scope():
                model = TFAutoModelForTokenClassification.from_pretrained(
                    checkpoint)

            y_true, y_pred, eval_loss = evaluate(args,
                                                 strategy,
                                                 model,
                                                 tokenizer,
                                                 labels,
                                                 pad_token_label_id,
                                                 mode="dev")
            report = metrics.classification_report(y_true, y_pred, digits=4)

            if global_step:
                results.append({
                    global_step + "_report": report,
                    global_step + "_loss": eval_loss
                })

        output_eval_file = os.path.join(args["output_dir"], "eval_results.txt")

        with tf.io.gfile.GFile(output_eval_file, "w") as writer:
            for res in results:
                for key, val in res.items():
                    if "loss" in key:
                        logging.info(key + " = " + str(val))
                        writer.write(key + " = " + str(val))
                        writer.write("\n")
                    else:
                        logging.info(key)
                        logging.info("\n" + report)
                        writer.write(key + "\n")
                        writer.write(report)
                        writer.write("\n")

    if args["do_predict"]:
        tokenizer = AutoTokenizer.from_pretrained(
            args["output_dir"], do_lower_case=args["do_lower_case"])
        model = TFAutoModelForTokenClassification.from_pretrained(
            args["output_dir"])
        eval_batch_size = args["per_device_eval_batch_size"] * args["n_device"]
        predict_dataset, _ = load_and_cache_examples(args,
                                                     tokenizer,
                                                     labels,
                                                     pad_token_label_id,
                                                     eval_batch_size,
                                                     mode="test")
        y_true, y_pred, pred_loss = evaluate(args,
                                             strategy,
                                             model,
                                             tokenizer,
                                             labels,
                                             pad_token_label_id,
                                             mode="test")
        output_test_results_file = os.path.join(args["output_dir"],
                                                "test_results.txt")
        output_test_predictions_file = os.path.join(args["output_dir"],
                                                    "test_predictions.txt")
        report = metrics.classification_report(y_true, y_pred, digits=4)

        with tf.io.gfile.GFile(output_test_results_file, "w") as writer:
            report = metrics.classification_report(y_true, y_pred, digits=4)

            logging.info("\n" + report)

            writer.write(report)
            writer.write("\n\nloss = " + str(pred_loss))

        with tf.io.gfile.GFile(output_test_predictions_file, "w") as writer:
            with tf.io.gfile.GFile(os.path.join(args["data_dir"], "test.txt"),
                                   "r") as f:
                example_id = 0

                for line in f:
                    if line.startswith(
                            "-DOCSTART-") or line == "" or line == "\n":
                        writer.write(line)

                        if not y_pred[example_id]:
                            example_id += 1
                    elif y_pred[example_id]:
                        output_line = line.split(
                        )[0] + " " + y_pred[example_id].pop(0) + "\n"
                        writer.write(output_line)
                    else:
                        logging.warning(
                            "Maximum sequence length exceeded: No prediction for '%s'.",
                            line.split()[0])
Exemple #30
0
def evaluate(args, model, tokenizer, labels, pad_token_label_id, mode, prefix=""):
    eval_dataset = FunsdDataset(args, tokenizer, labels, pad_token_label_id, mode=mode)

    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
    eval_sampler = SequentialSampler(eval_dataset)
    eval_dataloader = DataLoader(
        eval_dataset,
        sampler=eval_sampler,
        batch_size=args.eval_batch_size,
        collate_fn=None,
    )

    # Eval!
    logger.info("***** Running evaluation %s *****", prefix)
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", args.eval_batch_size)
    eval_loss = 0.0
    nb_eval_steps = 0
    preds = None
    out_label_ids = None
    model.eval()
    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        with torch.no_grad():
            inputs = {
                "input_ids": batch[0].to(args.device),
                "attention_mask": batch[1].to(args.device),
                "labels": batch[3].to(args.device),
            }
            if args.model_type in ["layoutlm"]:
                inputs["bbox"] = batch[4].to(args.device)
            inputs["token_type_ids"] = (
                batch[2].to(args.device)
                if args.model_type in ["bert", "layoutlm"]
                else None
            )  # RoBERTa don"t use segment_ids
            outputs = model(**inputs)
            tmp_eval_loss, logits = outputs[:2]

            if args.n_gpu > 1:
                tmp_eval_loss = (
                    tmp_eval_loss.mean()
                )  # mean() to average on multi-gpu parallel evaluating

            eval_loss += tmp_eval_loss.item()
        nb_eval_steps += 1
        if preds is None:
            preds = logits.detach().cpu().numpy()
            out_label_ids = inputs["labels"].detach().cpu().numpy()
        else:
            preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
            out_label_ids = np.append(
                out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0
            )

    eval_loss = eval_loss / nb_eval_steps
    preds = np.argmax(preds, axis=2)

    label_map = {i: label for i, label in enumerate(labels)}

    out_label_list = [[] for _ in range(out_label_ids.shape[0])]
    preds_list = [[] for _ in range(out_label_ids.shape[0])]

    for i in range(out_label_ids.shape[0]):
        for j in range(out_label_ids.shape[1]):
            if out_label_ids[i, j] != pad_token_label_id:
                out_label_list[i].append(label_map[out_label_ids[i][j]])
                preds_list[i].append(label_map[preds[i][j]])

    results = {
        "loss": eval_loss,
        "precision": precision_score(out_label_list, preds_list),
        "recall": recall_score(out_label_list, preds_list),
        "f1": f1_score(out_label_list, preds_list),
    }

    report = classification_report(out_label_list, preds_list)
    logger.info("\n" + report)

    logger.info("***** Eval results %s *****", prefix)
    for key in sorted(results.keys()):
        logger.info("  %s = %s", key, str(results[key]))

    return results, preds_list