Example #1
0
    def predict(self,passage :str,question :str):
        example = input_to_squad_example(passage,question)
        features = squad_examples_to_features(example,self.tokenizer,self.max_seq_length,self.doc_stride,self.max_query_length)
        all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)
        all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long)
        dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids,
                                all_example_index)
        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=1)
        all_results = []
        for batch in eval_dataloader:
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {'input_ids':      batch[0],
                        'attention_mask': batch[1],
                        'token_type_ids': batch[2]  
                        }
                example_indices = batch[3]
                outputs = self.model(**inputs)

            for i, example_index in enumerate(example_indices):
                eval_feature = features[example_index.item()]
                unique_id = int(eval_feature.unique_id)
                result = RawResult(unique_id    = unique_id,
                                    start_logits = to_list(outputs[0][i]),
                                    end_logits   = to_list(outputs[1][i]))
                all_results.append(result)
        answer = get_answer(example,features,all_results,self.n_best_size,self.max_answer_length,self.do_lower_case)
        return answer
Example #2
0
    def predict(self, passages: list, question: str):
        examples = input_to_squad_example(passages, question)
        features = squad_examples_to_features(examples,
                                              self.tokenizer,
                                              self.max_seq_length,
                                              self.doc_stride,
                                              self.max_query_length,
                                              vsl=self.vsl)
        if not self.use_tf and not self.use_onnx_runtime:
            torch_input_ids = torch.tensor([f.input_ids for f in features],
                                           dtype=torch.long).to(self.device)
            torch_input_mask = torch.tensor([f.input_mask for f in features],
                                            dtype=torch.long).to(self.device)
            torch_segment_ids = torch.tensor([f.segment_ids for f in features],
                                             dtype=torch.long).to(self.device)
            torch_example_index = torch.arange(torch_input_ids.size(0),
                                               dtype=torch.long).to(
                                                   self.device)
        all_results = []
        if self.use_tf:
            if self.tf_onnx:
                # this is the TF graph converted from ONNX
                inputs = {
                    'input_ids:0': [f.input_ids for f in features],
                    'attention_mask:0': [f.input_mask for f in features],
                    'token_type_ids:0': [f.segment_ids for f in features]
                }
                start_logits, end_logits = self.model.run(
                    ['Squeeze_49:0', 'Squeeze_50:0'], feed_dict=inputs)
            else:
                # this is the original TF graph
                inputs = {
                    'input_ids:0': [f.input_ids for f in features],
                    'input_mask:0': [f.input_mask for f in features],
                    'segment_ids:0': [f.segment_ids for f in features]
                }
                start_logits, end_logits = self.model.run(
                    ['start_logits:0', 'end_logits:0'], feed_dict=inputs)
            example_indices = np.arange(len(features))
            outputs = [start_logits, end_logits]
        elif self.use_onnx_runtime:
            inputs = {
                self.model.get_inputs()[0].name:
                np.array([f.input_ids for f in features]),
                self.model.get_inputs()[1].name:
                np.array([f.input_mask for f in features]),
                self.model.get_inputs()[2].name:
                np.array([f.segment_ids for f in features])
            }
            output_names = [
                self.model.get_outputs()[0].name,
                self.model.get_outputs()[1].name
            ]
            example_indices = np.arange(len(features))
            outputs = self.model.run(output_names, inputs)
        else:
            example_indices = torch_example_index
            if self.use_jit:
                outputs = self.model(torch_input_ids, torch_input_mask,
                                     torch_segment_ids)
            else:
                with torch.no_grad():
                    inputs = {
                        'input_ids': torch_input_ids,
                        'attention_mask': torch_input_mask,
                        'token_type_ids': torch_segment_ids
                    }
                    outputs = self.model(**inputs)

        for i, example_index in enumerate(example_indices):
            eval_feature = features[example_index.item()]
            unique_id = int(eval_feature.unique_id)
            result = RawResult(unique_id=unique_id,
                               start_logits=to_list(outputs[0][i]),
                               end_logits=to_list(outputs[1][i]))
            all_results.append(result)
        answers = get_answer(examples, features, all_results, self.n_best_size,
                             self.max_answer_length, self.do_lower_case)
        return answers