def test_more_than_two_overlapping_predicates(self):
        """
        Test whether the predictor can correctly consolidate multiword
        predicates.
        """
        tokenizer = WordTokenizer(word_splitter=SpacyWordSplitter(
            pos_tags=True))

        sent_tokens = tokenizer.tokenize(
            "John refused to consider joining the club.")

        # Emulate predications - for "refused" and "consider" and "joining"
        predictions = [
            [
                "B-ARG0", "B-V", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1",
                "I-ARG1", "O"
            ],
            [
                "B-ARG0", "B-BV", "I-BV", "B-V", "B-ARG1", "I-ARG1", "I-ARG1",
                "O"
            ],
            ["B-ARG0", "B-BV", "I-BV", "I-BV", "B-V", "B-ARG1", "I-ARG1", "O"],
        ]

        # Consolidate
        pred_dict = consolidate_predictions(predictions, sent_tokens)

        # Check that only "refused to consider to join" is left
        assert len(pred_dict) == 1
        tags = list(pred_dict.values())[0]
        assert get_predicate_text(sent_tokens,
                                  tags) == "refused to consider joining"
    def test_predicate_consolidation(self):
        """
        Test whether the predictor can correctly consolidate multiword
        predicates.
        """
        tokenizer = WordTokenizer(word_splitter=SpacyWordSplitter(
            pos_tags=True))

        sent_tokens = tokenizer.tokenize(
            "In December, John decided to join the party.")

        # Emulate predications - for both "decided" and "join"
        predictions = [
            [
                "B-ARG2", "I-ARG2", "O", "B-ARG0", "B-V", "B-ARG1", "I-ARG1",
                "I-ARG1", "I-ARG1", "O"
            ],
            [
                "O", "O", "O", "B-ARG0", "B-BV", "I-BV", "B-V", "B-ARG1",
                "I-ARG1", "O"
            ],
        ]
        # Consolidate
        pred_dict = consolidate_predictions(predictions, sent_tokens)

        # Check that only "decided to join" is left
        assert len(pred_dict) == 1
        tags = list(pred_dict.values())[0]
        assert get_predicate_text(sent_tokens, tags) == "decided to join"
コード例 #3
0
ファイル: format_oie.py プロジェクト: oriern/SuperPAL
def format_extractions(sent_tokens, sent_predictions):
    """
    Convert token-level raw predictions to clean extractions.
    """
    # Consolidate predictions
    if not (len(set(map(len, sent_predictions))) == 1):
        raise AssertionError
    assert len(sent_tokens) == len(sent_predictions[0])
    sent_str = " ".join(map(str, sent_tokens))

    pred_dict = consolidate_predictions(sent_predictions, sent_tokens)

    # Build and return output dictionary
    results = []
    all_tags = []
    results_dict = {'verbs':[], 'words': [str(token) for token in sent_tokens]}

    for tags in pred_dict.values():
        # Join multi-word predicates
        tags = join_mwp(tags)
        all_tags.append(tags)

        # Create description text
        oie_frame = get_oie_frame(sent_tokens, tags)

        # Add a predicate prediction to outputs.
        results.append("\t".join([sent_str, get_frame_str(oie_frame)]))
        results_dict['verbs'].append({'tags': tags})

    return results, all_tags, results_dict
コード例 #4
0
def consolidate(ie_res):
    # consolidate ie results

    ie_slim = []
    for ie in ie_res:
        outputs = [v['tags'] for v in ie['verbs']]
        sent_tokens = [Token(w) for w in ie['words']]

        consolidated = oie.consolidate_predictions(outputs, sent_tokens)
        tags = [t for v, t in consolidated.items()]
        # tags = [t for v,t in ie.items()]

        ie_slim.append({'words': sent_tokens, 'tags': tags})
    return ie_slim
コード例 #5
0
    def test_more_than_two_overlapping_predicates(self):
        """
        Test whether the predictor can correctly consolidate multiword
        predicates.
        """
        tokenizer = WordTokenizer(word_splitter=SpacyWordSplitter(pos_tags=True))

        sent_tokens = tokenizer.tokenize("John refused to consider joining the club.")

        # Emulate predications - for "refused" and "consider" and "joining"
        predictions = [['B-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O'],\
                       ['B-ARG0', 'B-BV', 'I-BV', 'B-V', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'O'],\
                       ['B-ARG0', 'B-BV', 'I-BV', 'I-BV', 'B-V', 'B-ARG1', 'I-ARG1', 'O']]

        # Consolidate
        pred_dict = consolidate_predictions(predictions, sent_tokens)

        # Check that only "refused to consider to join" is left
        assert len(pred_dict) == 1
        tags = list(pred_dict.values())[0]
        assert get_predicate_text(sent_tokens, tags) == "refused to consider joining"
コード例 #6
0
    def test_predicate_consolidation(self):
        """
        Test whether the predictor can correctly consolidate multiword
        predicates.
        """
        tokenizer = WordTokenizer(word_splitter=SpacyWordSplitter(pos_tags=True))

        sent_tokens = tokenizer.tokenize("In December, John decided to join the party.")

        # Emulate predications - for both "decided" and "join"
        predictions = [['B-ARG2', 'I-ARG2', 'O', 'B-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', \
                        'I-ARG1', 'I-ARG1', 'O'],
                       ['O', 'O', 'O', 'B-ARG0', 'B-BV', 'I-BV', 'B-V', 'B-ARG1', \
                        'I-ARG1', 'O']]
        # Consolidate
        pred_dict = consolidate_predictions(predictions, sent_tokens)

        # Check that only "decided to join" is left
        assert len(pred_dict) == 1
        tags = list(pred_dict.values())[0]
        assert get_predicate_text(sent_tokens, tags) == "decided to join"
コード例 #7
0
def format_extractions(sent_tokens, sent_predictions):
    """
    Convert token-level raw predictions to clean extractions.
    """
    # Consolidate predictions
    if not (len(set(map(len, sent_predictions))) == 1):
        raise AssertionError
    assert len(sent_tokens) == len(sent_predictions[0])

    pred_dict = consolidate_predictions(sent_predictions, sent_tokens)

    # Build and return output dictionary
    frames = []

    for tags in pred_dict.values():
        # Join multi-word predicates
        tags = join_mwp(tags)

        # Create description text
        oie_frame = get_oie_frame(sent_tokens, tags)
        frames.append(oie_frame)

    return frames