def preprocess(self, data: Input):
     input_text = data.get_as_string()
     tokens = self.tokenizer.encode_plus(input_text,
                                         max_length=self.max_length,
                                         truncation=True,
                                         padding=True,
                                         add_special_tokens=True,
                                         return_tensors='np')
     input_ids = tokens["input_ids"]
     attention_mask = tokens["attention_mask"]
     outputs = Output()
     outputs.add_as_numpy([input_ids, attention_mask])
     return outputs
Beispiel #2
0
 def preprocess(self, data: Input):
     input_json = data.get_as_json()
     question = input_json["question"]
     context = input_json["paragraph"]
     tokens = self.tokenizer.encode_plus(question,
                                         context,
                                         max_length=self.max_length,
                                         truncation=True,
                                         padding=True,
                                         add_special_tokens=True,
                                         return_tensors="np")
     input_ids = tokens["input_ids"]
     attention_mask = tokens["attention_mask"]
     outputs = Output()
     outputs.add_as_numpy([input_ids, attention_mask])
     return outputs