Esempio n. 1
0
 def create_and_check_bert_for_question_answering(
     self,
     config,
     input_ids,
     token_type_ids,
     input_mask,
     sequence_labels,
     token_labels,
     choice_labels,
 ):
     model = BertForQuestionAnswering(config=config)
     model.eval()
     loss, start_logits, end_logits = model(input_ids, token_type_ids,
                                            input_mask, sequence_labels,
                                            sequence_labels)
     result = {
         "loss": loss,
         "start_logits": start_logits,
         "end_logits": end_logits
     }
     self.parent.assertListEqual(list(result["start_logits"].size()),
                                 [self.batch_size, self.seq_length])
     self.parent.assertListEqual(list(result["end_logits"].size()),
                                 [self.batch_size, self.seq_length])
     self.check_loss_output(result)
Esempio n. 2
0
 def __init__(self, model_state_dict) -> None:
     no_cuda = True
     self.device = torch.device(
         "cuda" if torch.cuda.is_available() and not no_cuda else "cpu")
     self.tokenizer = BertTokenizer.from_pretrained('bert-base-chinese',
                                                    do_lower_case=False)
     config = BertConfig.from_pretrained('bert-base-chinese')
     self.model = BertForQuestionAnswering(config)
     self.model.load_state_dict(
         torch.load(model_state_dict, map_location='cpu'))
     self.model.to(self.device)
     self.model.eval()  # TODO
                    type=str,
                    required=True,
                    help="model para after pretrained")

args = parser.parse_args()
args.n_gpu = torch.cuda.device_count()
args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
device = torch.device(
    "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
args.device = device
tokenizer = BertTokenizer.from_pretrained(
    args.tokenizer_name if args.tokenizer_name else args.model_name_or_path,
    do_lower_case=False)
config = BertConfig.from_pretrained(
    args.tokenizer_name if args.tokenizer_name else args.model_name_or_path)
model = BertForQuestionAnswering(config)
model_state_dict = args.state_dict
model.load_state_dict(torch.load(model_state_dict))
model.to(args.device)
model.eval()
input_file = args.predict_file


def handle_file(input_file, context, question):
    with open(input_file, "r") as reader:
        orig_data = json.load(reader)
        orig_data["data"][0]['paragraphs'][0]['context'] = context
        for i in range(len(question)):
            orig_data["data"][0]['paragraphs'][0]['qas'][i][
                'question'] = question[i]
    with open(input_file, "w") as writer: