def test_transitive(self):
        vn_frame_transitive = VerbnetOfficialFrame('XX', [{
            'elem': 'NP',
            'role': 'Agent',
            'restr': 'a'
        }, {
            'elem': 'V'
        }, {
            'elem': 'NP',
            'role': 'Theme',
            'restr': 'b'
        }])

        self.assertEqual(vn_frame_transitive.passivize(), [
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'Theme',
                'restr': 'b'
            }, {
                'elem': 'V'
            }]),
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'Theme',
                'restr': 'b'
            }, {
                'elem': 'V'
            }, {
                'elem': 'by'
            }, {
                'elem': 'NP',
                'role': 'Agent',
                'restr': 'a'
            }])
        ])
    def test_baseline_alg(self):
        frame_occurrence = VerbnetFrameOccurrence([{
            'elem': 'NP'
        }, {
            'elem': 'V'
        }, {
            'elem': 'NP'
        }, {
            'elem': 'NP'
        }, {
            'elem': 'for'
        }, {
            'elem': 'NP'
        }], 4, 'a predicate')

        verbnet_frames = [
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'R1'
            }, {
                'elem': 'V'
            }, {
                'elem': 'NP',
                'role': 'R2'
            }, {
                'elem': 'by'
            }, {
                'elem': 'NP',
                'role': 'R3'
            }]),
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'R1'
            }, {
                'elem': 'V'
            }, {
                'elem': 'NP',
                'role': 'R4'
            }, {
                'elem': {'for', 'as'}
            }, {
                'elem': 'NP',
                'role': 'R5'
            }])
        ]
        matcher = FrameMatcher(frame_occurrence, 'baseline')
        matcher.perform_frame_matching(verbnet_frames)
        self.assertEqual(frame_occurrence.roles,
                         [{'R1'}, {'R4'}, set(), {'R5'}])
    def test_3(self):
        frame_occurrence = VerbnetFrameOccurrence([{
            'elem': 'NP'
        }, {
            'elem': 'V'
        }, {
            'elem': 'with'
        }, {
            'elem': 'NP'
        }], 2, 'a predicate')
        frame = VerbnetOfficialFrame('c', [{
            'elem': 'NP',
            'role': 'Agent'
        }, {
            'elem': 'V'
        }, {
            'elem': 'NP',
            'role': 'Patient'
        }, {
            'elem': 'with'
        }, {
            'elem': 'NP',
            'role': 'Role3'
        }])

        matcher = FrameMatcher(frame_occurrence, 'sync_predicates')
        best_score = matcher.perform_frame_matching([frame])
        self.assertEqual(best_score, int(100 / 2 + 100 / 3))
    def test_present_that(self):
        frame_occurrence = VerbnetFrameOccurrence([{
            'elem': 'NP'
        }, {
            'elem': 'V'
        }, {
            'elem': 'that'
        }, {
            'elem': 'S'
        }], 2, 'consider')
        matcher = FrameMatcher(frame_occurrence, 'sync_predicates')

        best_score = matcher.perform_frame_matching([
            VerbnetOfficialFrame('consider-29.9-1', [{
                'elem': 'NP',
                'role': 'Agent'
            }, {
                'elem': 'V'
            }, {
                'elem': 'that'
            }, {
                'elem': 'S',
                'role': 'Patient'
            }])
        ])

        self.assertEqual(best_score, 200)
        self.assertEqual(frame_occurrence.roles, [{'Agent'}, {'Patient'}])
    def _build_frame(self, xml_frame, vnclass, role_list, restrictions):
        """Parse one frame

        :param xml_frame: XML representation of the frame.
        :type xml_frame: xml.etree.ElementTree.Element.
        :param vnclass: The VerbNet class to which the frame belongs.
        :type vnclass: str.

        """
        # Extract the structure
        base_structure = xml_frame.find("DESCRIPTION").attrib["primary"]
        # Transform it into a list
        base_structure = base_structure.split(" ")

        # Lexeme at the beginning of a structure are capitalized.
        # We need to them to be completely lowercase to match them against syntax item.
        element = base_structure[0]
        if element[0].isupper() and element.split(".")[0].upper() != element.split(".")[0]:
            base_structure[0] = element.lower()

        syntax_data = xml_frame.find("SYNTAX")

        roles, structure = self._build_structure(
            base_structure, syntax_data, vnclass, role_list)
        role_restr = []
        #logger.debug('_build_frame {}, {}, {}, {}'.format(vnclass,roles,role_list,restrictions))
        for x in roles:
            if x in role_list and len(restrictions) > role_list.index(x):
                role_restr.append(restrictions[role_list.index(x)])

        syntax = self._merge_syntax(structure, roles, role_restr)
        result = VerbnetOfficialFrame(vnclass, syntax)

        return result
    def test_2(self):
        frame_occurrence = VerbnetFrameOccurrence([{
            'elem': 'to'
        }, {
            'elem': 'be'
        }], 0, 'a predicate')
        frame = VerbnetOfficialFrame('c', [{
            'elem': 'NP',
            'role': 'Agent'
        }, {
            'elem': 'V'
        }, {
            'elem': 'NP',
            'role': 'Patient'
        }, {
            'elem': 'with'
        }, {
            'elem': 'NP',
            'role': 'Role3'
        }])

        self.assertEqual(frame_occurrence.num_slots, 0)
    def test_1(self):
        frame_occurrence = VerbnetFrameOccurrence([{
            'elem': 'NP'
        }, {
            'elem': 'V'
        }, {
            'elem': 'NP'
        }, {
            'elem': 'with'
        }, {
            'elem': 'NP'
        }], 3, 'a predicate')
        frame2 = VerbnetOfficialFrame('Class 1', [{
            'elem': 'NP',
            'role': 'Agent'
        }, {
            'elem': 'V'
        }, {
            'elem': 'NP',
            'role': 'Patient'
        }, {
            'elem': 'for'
        }, {
            'elem': 'NP',
            'role': 'Role1'
        }])
        frame3 = VerbnetOfficialFrame('Class 1', [{
            'elem': 'NP',
            'role': 'Agent'
        }, {
            'elem': 'V'
        }, {
            'elem': 'NP',
            'role': 'Patient'
        }, {
            'elem': 'with'
        }, {
            'elem': 'NP',
            'role': 'Role2'
        }])
        frame4 = VerbnetOfficialFrame('Class 2', [{
            'elem': 'NP',
            'role': 'Agent'
        }, {
            'elem': 'V'
        }, {
            'elem': 'NP',
            'role': 'Patient'
        }, {
            'elem': 'with'
        }, {
            'elem': 'NP',
            'role': 'Role3'
        }])

        matcher = FrameMatcher(frame_occurrence, 'sync_predicates')
        best_score = matcher.perform_frame_matching([frame2])
        self.assertEqual(best_score, int(100 * 4 / 3))
        best_score = matcher.perform_frame_matching([frame3, frame4])
        self.assertEqual(best_score, 200)
        self.assertEqual(frame_occurrence.possible_roles(),
                         [{'Agent'}, {'Patient'}, {'Role2', 'Role3'}])
        self.assertEqual(frame_occurrence.roles,
                         [{'Agent'}, {'Patient'}, {'Role2', 'Role3'}])
    def test_4(self):
        frame_occurrence = VerbnetFrameOccurrence([{
            'elem': 'NP'
        }, {
            'elem': 'V'
        }, {
            'elem': 'NP'
        }], 2, 'a predicate')
        matcher = FrameMatcher(frame_occurrence, 'sync_predicates')
        verbnet_frames = [
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'Agent'
            }, {
                'elem': 'V'
            }, {
                'elem': 'NP',
                'role': 'Theme'
            }]),
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'Agent'
            }, {
                'elem': 'V'
            }, {
                'elem': 'NP',
                'role': 'Theme'
            }]),
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'Theme'
            }, {
                'elem': 'V'
            }]),
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'Agent'
            }, {
                'elem': 'V'
            }, {
                'elem': 'NP',
                'role': 'Theme'
            }]),
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'Theme'
            }, {
                'elem': 'V'
            }, {
                'elem': 'with'
            }, {
                'elem': 'NP',
                'role': 'Instrument'
            }]),
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'Agent'
            }, {
                'elem': 'V'
            }, {
                'elem': 'NP',
                'role': 'Theme'
            }, {
                'elem': 'with'
            }, {
                'elem': 'NP',
                'role': 'Instrument'
            }]),
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'Instrument'
            }, {
                'elem': 'V'
            }, {
                'elem': 'NP',
                'role': 'Theme'
            }])
        ]
        matcher.perform_frame_matching(verbnet_frames)

        self.assertEqual(frame_occurrence.roles,
                         [{'Agent', 'Instrument'}, {'Theme'}])
    def _build_structure(self, base_structure, syntax_data, vnclass, role_list):
        """ Build the final structure from base_structure

        :param base_structure: The base structure
        :type base_structure: str List
        :param syntax_data: The XML "SYNTAX" node
        :type syntax_data: xml.etree.ElementTree.Element
        :param vnclass: The VerbNet class of the frame
        :type vnclass: str
        :returns: (str | str List) List -- the final structure

        """
        structure = []
        roles = []

        index_xml = -1
        num_slot = 0

        for i, full_element in enumerate(base_structure):
            full_element = full_element.split(".")
            element = full_element[0]

            for end in ['-Middle', '-Dative', '-dative', '-Result',
                        '-Conative', '-Fulfilling', '-Quote']:
                if element.endswith(end):
                    element = element[:-len(end)]

            #if '-' in element:
                #raise Exception('Unexpected {} in {}'.format(element, vnclass))

            # TODO handle adverbial phrases and adverbs?
            if element in ['ADV', 'ADVP']:
                pass
            # S_INF -> to S
            elif element == 'S_INF':
                structure = structure + ['to', 'S']
            # Handle the "whether/if" syntax (which means "whether" or "if")
            elif "/" in element:
                structure.append(set(element.split("/")))
            # Replace PP by "{preposition set} + NP"
            elif element == "PP":
                new_index, prep = self._read_syntax_data(
                    index_xml, syntax_data, "keyword", base_structure)
                if new_index == -1:
                    self.unhandled.append({
                        "file": self.filename,
                        "elem": "PP",
                        "data": "No syntax data found"
                    })
                    if len(full_element) > 1 and full_element[1] == "location":
                        structure += [verbnetprepclasses.prep["loc"], "NP"]
                    else:
                        structure += [verbnetprepclasses.all_preps, "NP"]
                else:
                    index_xml = new_index
                    structure += [prep, "NP"]
            # Everything else (NP, V, ...) is unmodified
            else:
                structure.append(element)

            search = element
            if len(search) > 0 and search[0].islower():
                search = "keyword"

            # Look for a matching element in SYNTAX
            # and check whether we can find an unexpected keyword to add,
            # between our current position and the matching element
            new_index, keyword = self._read_syntax_data(
                index_xml, syntax_data, search, base_structure)
            if keyword != "" and search != "keyword":
                structure.insert(-1, keyword)
            if new_index != -1:
                index_xml = new_index

            if VerbnetOfficialFrame._is_a_slot({'elem': element}):
                roles.append(None)

            if len(full_element) > 1:
                potential_role = "-".join([x.title() for x in full_element[1].split('-')])
                if potential_role in role_list:
                    roles[num_slot - 1] = potential_role

        # Fill the role list
        i = 0
        for element in syntax_data:
            if ((element.tag not in ["VERB", "PREP", "LEX"]) and
                    "value" in element.attrib):

                if i >= len(roles):
                    roles.append(None)
                    self.unhandled.append({
                        "file": self.filename,
                        "elem": "\\",
                        "data": "Too many roles in the syntax"
                    })
                else:
                    if roles[i] is not None and roles[i] != element.attrib["value"]:
                        self.unhandled.append({
                        "file": self.filename,
                        "elem": "\\",
                        "data": "Conflict between roles indicated in syntax and structure"
                        })
                    else:
                        roles[i] = element.attrib["value"]
                i += 1

        while len(roles) > 0 and roles[-1] is None:
            del roles[-1]

        return roles, structure
Example #10
0
    def test_global(self):
        reader = VerbnetReader(paths.VERBNET_PATH)
        self.assertEqual(len(reader.frames_for_verb), 4402)
        empty_restr = VNRestriction.build_empty()

        test_frames = {
            'sparkle':
            VerbnetOfficialFrame(
                'light_emission-43.1', [{
                    'elem': 'there'
                }, {
                    'elem': 'V'
                }, {
                    'elem':
                    'NP',
                    'role':
                    'Theme',
                    'restr':
                    VNRestriction.build_not(VNRestriction.build('animate'))
                }, {
                    'elem': verbnetprepclasses.prep['loc']
                }, {
                    'elem': 'NP',
                    'role': 'Location',
                    'restr': empty_restr
                }]),
            'employ':
            VerbnetOfficialFrame('use-105', [{
                'elem':
                'NP',
                'role':
                'Agent',
                'restr':
                VNRestriction.build_or(VNRestriction.build('animate'),
                                       VNRestriction.build('organization'))
            }, {
                'elem': 'V'
            }, {
                'elem': 'NP',
                'role': 'Theme',
                'restr': empty_restr
            }]),
            'break':
            VerbnetOfficialFrame('break-45.1',
                                 [{
                                     'elem': 'NP',
                                     'role': 'Patient',
                                     'restr': VNRestriction.build('solid')
                                 }, {
                                     'elem': 'V'
                                 }]),
            'suggest':
            VerbnetOfficialFrame(
                'say-37.7', [{
                    'elem':
                    'NP',
                    'role':
                    'Agent',
                    'restr':
                    VNRestriction.build_or(VNRestriction.build('animate'),
                                           VNRestriction.build('organization'))
                }, {
                    'elem': 'V'
                }, {
                    'elem': 'how'
                }, {
                    'elem': 'to'
                }, {
                    'elem': 'S',
                    'role': 'Topic',
                    'restr': VNRestriction.build('communication')
                }]),
            'snooze':
            VerbnetOfficialFrame('snooze-40.4',
                                 [{
                                     'elem': 'NP',
                                     'role': 'Agent',
                                     'restr': VNRestriction.build('animate')
                                 }, {
                                     'elem': 'V'
                                 }])
        }

        for verb, frame in test_frames.items():
            self.assertIn(verb, reader.frames_for_verb)
            self.assertIn(frame, reader.frames_for_verb[verb])

        reader.frames_for_verb = {}
        root = ET.ElementTree(file=str(paths.VERBNET_PATH /
                                       'separate-23.1.xml'))
        reader._handle_class(root.getroot(), [], [], [])

        animate = VNRestriction.build('animate')

        list1 = [
            VerbnetOfficialFrame('separate-23.1',
                                 [{
                                     'elem': 'NP',
                                     'role': 'Agent',
                                     'restr': VNRestriction.build('animate')
                                 }, {
                                     'elem': 'V'
                                 }, {
                                     'elem': 'NP',
                                     'role': 'Patient',
                                     'restr': empty_restr
                                 }, {
                                     'elem': {'from'}
                                 }, {
                                     'elem': 'NP',
                                     'role': 'Co-Patient',
                                     'restr': empty_restr
                                 }]),
            VerbnetOfficialFrame('separate-23.1',
                                 [{
                                     'elem': 'NP',
                                     'role': 'Agent',
                                     'restr': VNRestriction.build('animate')
                                 }, {
                                     'elem': 'V'
                                 }, {
                                     'elem': 'NP',
                                     'role': 'Patient',
                                     'restr': empty_restr
                                 }]),
            VerbnetOfficialFrame('separate-23.1', [{
                'elem': 'NP',
                'role': 'Patient',
                'restr': empty_restr
            }, {
                'elem': 'V'
            }]),
            VerbnetOfficialFrame('separate-23.1', [{
                'elem': 'NP',
                'role': 'Patient',
                'restr': empty_restr
            }, {
                'elem': 'V'
            }, {
                'elem': {'from'}
            }, {
                'elem': 'NP',
                'role': 'Co-Patient',
                'restr': empty_restr
            }]),
            VerbnetOfficialFrame('separate-23.1', [{
                'elem': 'NP',
                'role': 'Patient',
                'restr': empty_restr
            }, {
                'elem': 'V'
            }])
        ]

        list2 = [
            VerbnetOfficialFrame('separate-23.1-1', [{
                'elem': 'NP',
                'role': 'Patient',
                'restr': empty_restr
            }, {
                'elem': 'V'
            }, {
                'elem': {'from'}
            }, {
                'elem': 'NP',
                'role': 'Co-Patient',
                'restr': empty_restr
            }])
        ]

        list3 = [
            VerbnetOfficialFrame('separate-23.1-2', [{
                'elem': 'NP',
                'role': 'Patient',
                'restr': empty_restr
            }, {
                'elem': 'V'
            }, {
                'elem': {'with'}
            }, {
                'elem': 'NP',
                'role': 'Co-Patient',
                'restr': empty_restr
            }])
        ]

        expected_result = {
            'dissociate': list1 + list3,
            'disconnect': list1 + list3,
            'divide': list1 + list2,
            'disassociate': list1,
            'disentangle': list1 + list2,
            'divorce': list1 + list2,
            'separate': list1 + list3,
            'segregate': list1 + list2,
            'part': list1 + list3,
            'differentiate': list1 + list2,
            'uncoil': list1,
            'decouple': list1 + list2,
            'sever': list1,
            'dissimilate': list1 + list2
        }

        for verb in expected_result:
            if expected_result[verb] != reader.frames_for_verb[verb]:
                print('Error with {}'.format(verb))
                for expected, got in zip(expected_result[verb],
                                         reader.frames_for_verb[verb]):
                    if expected != got:
                        print('{} != {}'.format(expected, got))

        self.assertEqual(reader.frames_for_verb, expected_result)