def preprocess(self, data: Input): input_text = data.get_as_string() tokens = self.tokenizer.encode_plus(input_text, max_length=self.max_length, truncation=True, padding=True, add_special_tokens=True, return_tensors='np') input_ids = tokens["input_ids"] attention_mask = tokens["attention_mask"] outputs = Output() outputs.add_as_numpy([input_ids, attention_mask]) return outputs
def preprocess(self, data: Input): input_json = data.get_as_json() question = input_json["question"] context = input_json["paragraph"] tokens = self.tokenizer.encode_plus(question, context, max_length=self.max_length, truncation=True, padding=True, add_special_tokens=True, return_tensors="np") input_ids = tokens["input_ids"] attention_mask = tokens["attention_mask"] outputs = Output() outputs.add_as_numpy([input_ids, attention_mask]) return outputs