def test_more_than_two_overlapping_predicates(self): """ Test whether the predictor can correctly consolidate multiword predicates. """ tokenizer = WordTokenizer(word_splitter=SpacyWordSplitter( pos_tags=True)) sent_tokens = tokenizer.tokenize( "John refused to consider joining the club.") # Emulate predications - for "refused" and "consider" and "joining" predictions = [ [ "B-ARG0", "B-V", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "O" ], [ "B-ARG0", "B-BV", "I-BV", "B-V", "B-ARG1", "I-ARG1", "I-ARG1", "O" ], ["B-ARG0", "B-BV", "I-BV", "I-BV", "B-V", "B-ARG1", "I-ARG1", "O"], ] # Consolidate pred_dict = consolidate_predictions(predictions, sent_tokens) # Check that only "refused to consider to join" is left assert len(pred_dict) == 1 tags = list(pred_dict.values())[0] assert get_predicate_text(sent_tokens, tags) == "refused to consider joining"
def test_predicate_consolidation(self): """ Test whether the predictor can correctly consolidate multiword predicates. """ tokenizer = WordTokenizer(word_splitter=SpacyWordSplitter( pos_tags=True)) sent_tokens = tokenizer.tokenize( "In December, John decided to join the party.") # Emulate predications - for both "decided" and "join" predictions = [ [ "B-ARG2", "I-ARG2", "O", "B-ARG0", "B-V", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "O" ], [ "O", "O", "O", "B-ARG0", "B-BV", "I-BV", "B-V", "B-ARG1", "I-ARG1", "O" ], ] # Consolidate pred_dict = consolidate_predictions(predictions, sent_tokens) # Check that only "decided to join" is left assert len(pred_dict) == 1 tags = list(pred_dict.values())[0] assert get_predicate_text(sent_tokens, tags) == "decided to join"
def format_extractions(sent_tokens, sent_predictions): """ Convert token-level raw predictions to clean extractions. """ # Consolidate predictions if not (len(set(map(len, sent_predictions))) == 1): raise AssertionError assert len(sent_tokens) == len(sent_predictions[0]) sent_str = " ".join(map(str, sent_tokens)) pred_dict = consolidate_predictions(sent_predictions, sent_tokens) # Build and return output dictionary results = [] all_tags = [] results_dict = {'verbs':[], 'words': [str(token) for token in sent_tokens]} for tags in pred_dict.values(): # Join multi-word predicates tags = join_mwp(tags) all_tags.append(tags) # Create description text oie_frame = get_oie_frame(sent_tokens, tags) # Add a predicate prediction to outputs. results.append("\t".join([sent_str, get_frame_str(oie_frame)])) results_dict['verbs'].append({'tags': tags}) return results, all_tags, results_dict
def consolidate(ie_res): # consolidate ie results ie_slim = [] for ie in ie_res: outputs = [v['tags'] for v in ie['verbs']] sent_tokens = [Token(w) for w in ie['words']] consolidated = oie.consolidate_predictions(outputs, sent_tokens) tags = [t for v, t in consolidated.items()] # tags = [t for v,t in ie.items()] ie_slim.append({'words': sent_tokens, 'tags': tags}) return ie_slim
def test_more_than_two_overlapping_predicates(self): """ Test whether the predictor can correctly consolidate multiword predicates. """ tokenizer = WordTokenizer(word_splitter=SpacyWordSplitter(pos_tags=True)) sent_tokens = tokenizer.tokenize("John refused to consider joining the club.") # Emulate predications - for "refused" and "consider" and "joining" predictions = [['B-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O'],\ ['B-ARG0', 'B-BV', 'I-BV', 'B-V', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'O'],\ ['B-ARG0', 'B-BV', 'I-BV', 'I-BV', 'B-V', 'B-ARG1', 'I-ARG1', 'O']] # Consolidate pred_dict = consolidate_predictions(predictions, sent_tokens) # Check that only "refused to consider to join" is left assert len(pred_dict) == 1 tags = list(pred_dict.values())[0] assert get_predicate_text(sent_tokens, tags) == "refused to consider joining"
def test_predicate_consolidation(self): """ Test whether the predictor can correctly consolidate multiword predicates. """ tokenizer = WordTokenizer(word_splitter=SpacyWordSplitter(pos_tags=True)) sent_tokens = tokenizer.tokenize("In December, John decided to join the party.") # Emulate predications - for both "decided" and "join" predictions = [['B-ARG2', 'I-ARG2', 'O', 'B-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', \ 'I-ARG1', 'I-ARG1', 'O'], ['O', 'O', 'O', 'B-ARG0', 'B-BV', 'I-BV', 'B-V', 'B-ARG1', \ 'I-ARG1', 'O']] # Consolidate pred_dict = consolidate_predictions(predictions, sent_tokens) # Check that only "decided to join" is left assert len(pred_dict) == 1 tags = list(pred_dict.values())[0] assert get_predicate_text(sent_tokens, tags) == "decided to join"
def format_extractions(sent_tokens, sent_predictions): """ Convert token-level raw predictions to clean extractions. """ # Consolidate predictions if not (len(set(map(len, sent_predictions))) == 1): raise AssertionError assert len(sent_tokens) == len(sent_predictions[0]) pred_dict = consolidate_predictions(sent_predictions, sent_tokens) # Build and return output dictionary frames = [] for tags in pred_dict.values(): # Join multi-word predicates tags = join_mwp(tags) # Create description text oie_frame = get_oie_frame(sent_tokens, tags) frames.append(oie_frame) return frames