def predict_naive(self, use_context): # Load relations file relations = load_file(self.relations_filepath) # Iterate through relations file and predict for each relation aggregate_em = aggregate_f1 = 0 per_relation_metrics = {} for relation in relations: data_file = os.path.join(self.data_directory, relation['relation']) + '.jsonl' data = load_file(data_file) # Adding to set filters any accidental duplicates samples = set() for d in data: if use_context: samples.add(Sample(d['subject'], d['context'], d['object'], None, relation['template'])) else: samples.add(Sample(d['subject'], None, d['object'], None, relation['template'])) samples = list(samples) print(f'Loaded relation {relation["relation"]}. There are {len(samples)} test samples') print('Batching samples') batches, samples = self.model.batch(samples, self.batch_size) all_results = [] for batch in tqdm(batches): results = self.model.decode_lm(batch, 20) all_results.extend(results) relation_em, relation_f1, per_relation_metrics = calculate_relation_metrics(samples, all_results, per_relation_metrics, relation) aggregate_em += relation_em aggregate_f1 += relation_f1 aggregate_em /= len(relations) aggregate_f1 /= len(relations) return aggregate_em, aggregate_f1, per_relation_metrics
def predict(self): # Load relations file relations = load_file(self.relations_filepath) # Iterate through relations file and predict for each relation aggregate_em = aggregate_f1 = 0 per_relation_metrics = {} for relation in relations: data_file = os.path.join(self.data_directory, relation['relation']) + '.jsonl' data = load_file(data_file) # Adding to set filters any accidental duplicates samples_set = set() for d in data: samples_set.add( Sample(d['subject'], d['context'], d['object'], None, relation['template'])) samples = list(samples_set) init_len = len(samples) if self.must_choose_answer: print('Must choose answer is True. Skipping filtering step') else: print('Starting filtering') samples = self.context_filter.filter(samples) final_len = len(samples) print(f'Filtering finished. Filtered {init_len - final_len}.') all_results = [] if final_len != 0: print( f'Loaded relation {relation["relation"]}. There are {len(samples)} test samples' ) print('Batching samples') batches, samples = self.model.batch(samples, self.batch_size) print('Starting inference') for batch in tqdm(batches): results = self.model.predict(batch) all_results.extend(results) else: print('All samples were filtered. Skipping inference.') # Now we need to readd all the filtered samples filtered_samples = [s for s in samples_set if s not in samples] samples = list(samples) samples.extend(filtered_samples) # Predict empty string for every sample filtered_predictions = [''] * len(filtered_samples) all_results.extend(filtered_predictions) relation_em, relation_f1, per_relation_metrics = calculate_relation_metrics( samples, all_results, per_relation_metrics, relation) aggregate_em += relation_em aggregate_f1 += relation_f1 aggregate_em /= len(relations) aggregate_f1 /= len(relations) return aggregate_em, aggregate_f1, per_relation_metrics
def predict(self): # Load relations file relations = load_file(self.relations_filepath) # Iterate through relations file and predict for each relation aggregate_em = aggregate_f1 = 0 per_relation_metrics = {} for relation in relations: data_file = os.path.join(self.data_directory, relation['relation']) + '.jsonl' data = load_file(data_file) # Adding to set filters any accidental duplicates samples = set() for d in data: question = relation['question'].replace('[X]', d['subject']) samples.add( Sample(d['subject'], d['context'], d['object'], question)) samples = list(samples) print( f'Loaded relation {relation["relation"]}. There are {len(samples)} test samples' ) # Most of below is taken directly from HuggingFace, which is what Lewis et al use to train their QA head # Defaults from huggingface do_lower_case = True max_answer_length = 30 verbose_logging = False null_score_diff_threshold = 0.0 n_best = 20 max_query_length = 64 doc_stride = 128 max_seq_length = 384 # Load the samples into squad format examples = read_input_examples(samples) features = convert_examples_to_features( examples=examples, tokenizer=self.tokenizer, max_seq_length=max_seq_length, doc_stride=doc_stride, max_query_length=max_query_length, is_training=False, cls_token_segment_id=0, pad_token_segment_id=0, cls_token_at_end=False, sequence_a_is_doc=False) # Convert to Tensors and build dataset all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long) all_cls_index = torch.tensor([f.cls_index for f in features], dtype=torch.long) all_p_mask = torch.tensor([f.p_mask for f in features], dtype=torch.float) all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long) dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index, all_cls_index, all_p_mask) eval_sampler = SequentialSampler(dataset) eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.batch_size) all_results = [] for batch in tqdm(eval_dataloader, desc="Evaluating"): #stime = time.time() batch = tuple(t.to(device=self.device) for t in batch) with torch.no_grad(): inputs = { 'input_ids': batch[0], 'attention_mask': batch[1] } inputs['token_type_ids'] = batch[2] example_indices = batch[3] outputs = self.model(**inputs) for i, example_index in enumerate(example_indices): eval_feature = features[example_index.item()] unique_id = int(eval_feature.unique_id) result = RawResult(unique_id=unique_id, start_logits=to_list(outputs[0][i]), end_logits=to_list(outputs[1][i])) all_results.append(result) predictions = get_predictions( examples, features, all_results, n_best, max_answer_length, do_lower_case, verbose_logging, self.trained_to_reject, null_score_diff_threshold, must_choose_answer=self.must_choose_answer) predictions = [predictions[p] for p in predictions] self.total_samples += len(predictions) relation_em, relation_f1, per_relation_metrics, self.se_list, _ = calculate_relation_metrics( samples, predictions, per_relation_metrics, relation, self.se_list, False) aggregate_em += relation_em aggregate_f1 += relation_f1 aggregate_em /= len(relations) aggregate_f1 /= len(relations) return aggregate_em, aggregate_f1, per_relation_metrics