예제 #1
0
    def predict_naive(self, use_context):
        # Load relations file
        relations = load_file(self.relations_filepath)
        # Iterate through relations file and predict for each relation
        aggregate_em = aggregate_f1 = 0
        per_relation_metrics = {}
        for relation in relations:
            data_file = os.path.join(self.data_directory, relation['relation']) + '.jsonl'
            data = load_file(data_file)
            # Adding to set filters any accidental duplicates
            samples = set()
            for d in data:
                if use_context:
                    samples.add(Sample(d['subject'], d['context'], d['object'], None, relation['template']))
                else:
                    samples.add(Sample(d['subject'], None, d['object'], None, relation['template']))

            samples = list(samples)
            print(f'Loaded relation {relation["relation"]}. There are {len(samples)} test samples')
            print('Batching samples')
            batches, samples = self.model.batch(samples, self.batch_size)
            all_results = []
            for batch in tqdm(batches):
                results = self.model.decode_lm(batch, 20)
                all_results.extend(results)
            relation_em, relation_f1, per_relation_metrics = calculate_relation_metrics(samples, all_results, per_relation_metrics, relation)
            aggregate_em += relation_em
            aggregate_f1 += relation_f1
        aggregate_em /= len(relations)
        aggregate_f1 /= len(relations)
        return aggregate_em, aggregate_f1, per_relation_metrics
예제 #2
0
    def predict(self):
        # Load relations file
        relations = load_file(self.relations_filepath)
        # Iterate through relations file and predict for each relation
        aggregate_em = aggregate_f1 = 0
        per_relation_metrics = {}
        for relation in relations:
            data_file = os.path.join(self.data_directory,
                                     relation['relation']) + '.jsonl'
            data = load_file(data_file)
            # Adding to set filters any accidental duplicates
            samples_set = set()
            for d in data:
                samples_set.add(
                    Sample(d['subject'], d['context'], d['object'], None,
                           relation['template']))

            samples = list(samples_set)
            init_len = len(samples)
            if self.must_choose_answer:
                print('Must choose answer is True. Skipping filtering step')
            else:
                print('Starting filtering')
                samples = self.context_filter.filter(samples)
                final_len = len(samples)
                print(f'Filtering finished. Filtered {init_len - final_len}.')

            all_results = []
            if final_len != 0:
                print(
                    f'Loaded relation {relation["relation"]}. There are {len(samples)} test samples'
                )
                print('Batching samples')
                batches, samples = self.model.batch(samples, self.batch_size)
                print('Starting inference')
                for batch in tqdm(batches):
                    results = self.model.predict(batch)
                    all_results.extend(results)
            else:
                print('All samples were filtered. Skipping inference.')
            # Now we need to readd all the filtered samples
            filtered_samples = [s for s in samples_set if s not in samples]
            samples = list(samples)
            samples.extend(filtered_samples)
            # Predict empty string for every sample
            filtered_predictions = [''] * len(filtered_samples)
            all_results.extend(filtered_predictions)
            relation_em, relation_f1, per_relation_metrics = calculate_relation_metrics(
                samples, all_results, per_relation_metrics, relation)
            aggregate_em += relation_em
            aggregate_f1 += relation_f1
        aggregate_em /= len(relations)
        aggregate_f1 /= len(relations)
        return aggregate_em, aggregate_f1, per_relation_metrics
예제 #3
0
    def predict(self):
        # Load relations file
        relations = load_file(self.relations_filepath)
        # Iterate through relations file and predict for each relation
        aggregate_em = aggregate_f1 = 0
        per_relation_metrics = {}
        for relation in relations:
            data_file = os.path.join(self.data_directory,
                                     relation['relation']) + '.jsonl'
            data = load_file(data_file)
            # Adding to set filters any accidental duplicates
            samples = set()
            for d in data:
                question = relation['question'].replace('[X]', d['subject'])
                samples.add(
                    Sample(d['subject'], d['context'], d['object'], question))
            samples = list(samples)
            print(
                f'Loaded relation {relation["relation"]}. There are {len(samples)} test samples'
            )
            # Most of below is taken directly from HuggingFace, which is what Lewis et al use to train their QA head
            # Defaults from huggingface
            do_lower_case = True
            max_answer_length = 30
            verbose_logging = False
            null_score_diff_threshold = 0.0
            n_best = 20
            max_query_length = 64
            doc_stride = 128
            max_seq_length = 384

            # Load the samples into squad format
            examples = read_input_examples(samples)
            features = convert_examples_to_features(
                examples=examples,
                tokenizer=self.tokenizer,
                max_seq_length=max_seq_length,
                doc_stride=doc_stride,
                max_query_length=max_query_length,
                is_training=False,
                cls_token_segment_id=0,
                pad_token_segment_id=0,
                cls_token_at_end=False,
                sequence_a_is_doc=False)

            # Convert to Tensors and build dataset
            all_input_ids = torch.tensor([f.input_ids for f in features],
                                         dtype=torch.long)
            all_input_mask = torch.tensor([f.input_mask for f in features],
                                          dtype=torch.long)
            all_segment_ids = torch.tensor([f.segment_ids for f in features],
                                           dtype=torch.long)
            all_cls_index = torch.tensor([f.cls_index for f in features],
                                         dtype=torch.long)
            all_p_mask = torch.tensor([f.p_mask for f in features],
                                      dtype=torch.float)
            all_example_index = torch.arange(all_input_ids.size(0),
                                             dtype=torch.long)
            dataset = TensorDataset(all_input_ids, all_input_mask,
                                    all_segment_ids, all_example_index,
                                    all_cls_index, all_p_mask)
            eval_sampler = SequentialSampler(dataset)
            eval_dataloader = DataLoader(dataset,
                                         sampler=eval_sampler,
                                         batch_size=self.batch_size)
            all_results = []
            for batch in tqdm(eval_dataloader, desc="Evaluating"):
                #stime = time.time()
                batch = tuple(t.to(device=self.device) for t in batch)
                with torch.no_grad():
                    inputs = {
                        'input_ids': batch[0],
                        'attention_mask': batch[1]
                    }
                    inputs['token_type_ids'] = batch[2]
                    example_indices = batch[3]
                    outputs = self.model(**inputs)

                for i, example_index in enumerate(example_indices):
                    eval_feature = features[example_index.item()]
                    unique_id = int(eval_feature.unique_id)
                    result = RawResult(unique_id=unique_id,
                                       start_logits=to_list(outputs[0][i]),
                                       end_logits=to_list(outputs[1][i]))
                    all_results.append(result)

            predictions = get_predictions(
                examples,
                features,
                all_results,
                n_best,
                max_answer_length,
                do_lower_case,
                verbose_logging,
                self.trained_to_reject,
                null_score_diff_threshold,
                must_choose_answer=self.must_choose_answer)
            predictions = [predictions[p] for p in predictions]
            self.total_samples += len(predictions)
            relation_em, relation_f1, per_relation_metrics, self.se_list, _ = calculate_relation_metrics(
                samples, predictions, per_relation_metrics, relation,
                self.se_list, False)
            aggregate_em += relation_em
            aggregate_f1 += relation_f1
        aggregate_em /= len(relations)
        aggregate_f1 /= len(relations)
        return aggregate_em, aggregate_f1, per_relation_metrics