def do_eval(dataset=None, vocab_file="", eval_json="", load_checkpoint_path="", seq_length=384): """ do eval """ if load_checkpoint_path == "": raise ValueError( "Finetune model missed, evaluation task must load finetune model!") tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=True) eval_examples = read_squad_examples(eval_json, False) eval_features = convert_examples_to_features(examples=eval_examples, tokenizer=tokenizer, max_seq_length=seq_length, doc_stride=128, max_query_length=64, is_training=False, output_fn=None, verbose_logging=False) net = BertSquad(bert_net_cfg, False, 2) net.set_train(False) param_dict = load_checkpoint(load_checkpoint_path) load_param_into_net(net, param_dict) model = Model(net) output = [] RawResult = collections.namedtuple( "RawResult", ["unique_id", "start_logits", "end_logits"]) columns_list = ["input_ids", "input_mask", "segment_ids", "unique_ids"] for data in dataset.create_dict_iterator(): input_data = [] for i in columns_list: input_data.append(Tensor(data[i])) input_ids, input_mask, segment_ids, unique_ids = input_data start_positions = Tensor([1], mstype.float32) end_positions = Tensor([1], mstype.float32) is_impossible = Tensor([1], mstype.float32) logits = model.predict(input_ids, input_mask, segment_ids, start_positions, end_positions, unique_ids, is_impossible) ids = logits[0].asnumpy() start = logits[1].asnumpy() end = logits[2].asnumpy() for i in range(bert_net_cfg.batch_size): unique_id = int(ids[i]) start_logits = [float(x) for x in start[i].flat] end_logits = [float(x) for x in end[i].flat] output.append( RawResult(unique_id=unique_id, start_logits=start_logits, end_logits=end_logits)) write_predictions(eval_examples, eval_features, output, 20, 30, True, "./predictions.json", None, None)
def test_eval(): """Evaluation function for SQuAD task""" tokenizer = tokenization.FullTokenizer(vocab_file="./vocab.txt", do_lower_case=True) input_file = "dataset/v1.1/dev-v1.1.json" eval_examples = read_squad_examples(input_file, False) eval_features = convert_examples_to_features(examples=eval_examples, tokenizer=tokenizer, max_seq_length=384, doc_stride=128, max_query_length=64, is_training=False, output_fn=None, verbose_logging=False) device_id = int(os.getenv('DEVICE_ID')) context.set_context(mode=context.GRAPH_MODE, device_target='Ascend', device_id=device_id) dataset = get_squad_dataset(bert_net_cfg.batch_size, 1) net = BertSquad(bert_net_cfg, False, 2) net.set_train(False) param_dict = load_checkpoint(cfg.finetune_ckpt) load_param_into_net(net, param_dict) model = Model(net) output = [] RawResult = collections.namedtuple( "RawResult", ["unique_id", "start_logits", "end_logits"]) columns_list = ["input_ids", "input_mask", "segment_ids", "unique_ids"] for data in dataset.create_dict_iterator(): input_data = [] for i in columns_list: input_data.append(Tensor(data[i])) input_ids, input_mask, segment_ids, unique_ids = input_data start_positions = Tensor([1], mstype.float32) end_positions = Tensor([1], mstype.float32) is_impossible = Tensor([1], mstype.float32) logits = model.predict(input_ids, input_mask, segment_ids, start_positions, end_positions, unique_ids, is_impossible) ids = logits[0].asnumpy() start = logits[1].asnumpy() end = logits[2].asnumpy() for i in range(bert_net_cfg.batch_size): unique_id = int(ids[i]) start_logits = [float(x) for x in start[i].flat] end_logits = [float(x) for x in end[i].flat] output.append( RawResult(unique_id=unique_id, start_logits=start_logits, end_logits=end_logits)) write_predictions(eval_examples, eval_features, output, 20, 30, True, "./predictions.json", None, None, False, False)