def test_transitive(self): vn_frame_transitive = VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'Agent', 'restr': 'a' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Theme', 'restr': 'b' }]) self.assertEqual(vn_frame_transitive.passivize(), [ VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'Theme', 'restr': 'b' }, { 'elem': 'V' }]), VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'Theme', 'restr': 'b' }, { 'elem': 'V' }, { 'elem': 'by' }, { 'elem': 'NP', 'role': 'Agent', 'restr': 'a' }]) ])
def test_baseline_alg(self): frame_occurrence = VerbnetFrameOccurrence([{ 'elem': 'NP' }, { 'elem': 'V' }, { 'elem': 'NP' }, { 'elem': 'NP' }, { 'elem': 'for' }, { 'elem': 'NP' }], 4, 'a predicate') verbnet_frames = [ VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'R1' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'R2' }, { 'elem': 'by' }, { 'elem': 'NP', 'role': 'R3' }]), VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'R1' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'R4' }, { 'elem': {'for', 'as'} }, { 'elem': 'NP', 'role': 'R5' }]) ] matcher = FrameMatcher(frame_occurrence, 'baseline') matcher.perform_frame_matching(verbnet_frames) self.assertEqual(frame_occurrence.roles, [{'R1'}, {'R4'}, set(), {'R5'}])
def test_3(self): frame_occurrence = VerbnetFrameOccurrence([{ 'elem': 'NP' }, { 'elem': 'V' }, { 'elem': 'with' }, { 'elem': 'NP' }], 2, 'a predicate') frame = VerbnetOfficialFrame('c', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Patient' }, { 'elem': 'with' }, { 'elem': 'NP', 'role': 'Role3' }]) matcher = FrameMatcher(frame_occurrence, 'sync_predicates') best_score = matcher.perform_frame_matching([frame]) self.assertEqual(best_score, int(100 / 2 + 100 / 3))
def test_present_that(self): frame_occurrence = VerbnetFrameOccurrence([{ 'elem': 'NP' }, { 'elem': 'V' }, { 'elem': 'that' }, { 'elem': 'S' }], 2, 'consider') matcher = FrameMatcher(frame_occurrence, 'sync_predicates') best_score = matcher.perform_frame_matching([ VerbnetOfficialFrame('consider-29.9-1', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'that' }, { 'elem': 'S', 'role': 'Patient' }]) ]) self.assertEqual(best_score, 200) self.assertEqual(frame_occurrence.roles, [{'Agent'}, {'Patient'}])
def _build_frame(self, xml_frame, vnclass, role_list, restrictions): """Parse one frame :param xml_frame: XML representation of the frame. :type xml_frame: xml.etree.ElementTree.Element. :param vnclass: The VerbNet class to which the frame belongs. :type vnclass: str. """ # Extract the structure base_structure = xml_frame.find("DESCRIPTION").attrib["primary"] # Transform it into a list base_structure = base_structure.split(" ") # Lexeme at the beginning of a structure are capitalized. # We need to them to be completely lowercase to match them against syntax item. element = base_structure[0] if element[0].isupper() and element.split(".")[0].upper() != element.split(".")[0]: base_structure[0] = element.lower() syntax_data = xml_frame.find("SYNTAX") roles, structure = self._build_structure( base_structure, syntax_data, vnclass, role_list) role_restr = [] #logger.debug('_build_frame {}, {}, {}, {}'.format(vnclass,roles,role_list,restrictions)) for x in roles: if x in role_list and len(restrictions) > role_list.index(x): role_restr.append(restrictions[role_list.index(x)]) syntax = self._merge_syntax(structure, roles, role_restr) result = VerbnetOfficialFrame(vnclass, syntax) return result
def test_2(self): frame_occurrence = VerbnetFrameOccurrence([{ 'elem': 'to' }, { 'elem': 'be' }], 0, 'a predicate') frame = VerbnetOfficialFrame('c', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Patient' }, { 'elem': 'with' }, { 'elem': 'NP', 'role': 'Role3' }]) self.assertEqual(frame_occurrence.num_slots, 0)
def test_1(self): frame_occurrence = VerbnetFrameOccurrence([{ 'elem': 'NP' }, { 'elem': 'V' }, { 'elem': 'NP' }, { 'elem': 'with' }, { 'elem': 'NP' }], 3, 'a predicate') frame2 = VerbnetOfficialFrame('Class 1', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Patient' }, { 'elem': 'for' }, { 'elem': 'NP', 'role': 'Role1' }]) frame3 = VerbnetOfficialFrame('Class 1', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Patient' }, { 'elem': 'with' }, { 'elem': 'NP', 'role': 'Role2' }]) frame4 = VerbnetOfficialFrame('Class 2', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Patient' }, { 'elem': 'with' }, { 'elem': 'NP', 'role': 'Role3' }]) matcher = FrameMatcher(frame_occurrence, 'sync_predicates') best_score = matcher.perform_frame_matching([frame2]) self.assertEqual(best_score, int(100 * 4 / 3)) best_score = matcher.perform_frame_matching([frame3, frame4]) self.assertEqual(best_score, 200) self.assertEqual(frame_occurrence.possible_roles(), [{'Agent'}, {'Patient'}, {'Role2', 'Role3'}]) self.assertEqual(frame_occurrence.roles, [{'Agent'}, {'Patient'}, {'Role2', 'Role3'}])
def test_4(self): frame_occurrence = VerbnetFrameOccurrence([{ 'elem': 'NP' }, { 'elem': 'V' }, { 'elem': 'NP' }], 2, 'a predicate') matcher = FrameMatcher(frame_occurrence, 'sync_predicates') verbnet_frames = [ VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Theme' }]), VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Theme' }]), VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'Theme' }, { 'elem': 'V' }]), VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Theme' }]), VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'Theme' }, { 'elem': 'V' }, { 'elem': 'with' }, { 'elem': 'NP', 'role': 'Instrument' }]), VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Theme' }, { 'elem': 'with' }, { 'elem': 'NP', 'role': 'Instrument' }]), VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'Instrument' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Theme' }]) ] matcher.perform_frame_matching(verbnet_frames) self.assertEqual(frame_occurrence.roles, [{'Agent', 'Instrument'}, {'Theme'}])
def _build_structure(self, base_structure, syntax_data, vnclass, role_list): """ Build the final structure from base_structure :param base_structure: The base structure :type base_structure: str List :param syntax_data: The XML "SYNTAX" node :type syntax_data: xml.etree.ElementTree.Element :param vnclass: The VerbNet class of the frame :type vnclass: str :returns: (str | str List) List -- the final structure """ structure = [] roles = [] index_xml = -1 num_slot = 0 for i, full_element in enumerate(base_structure): full_element = full_element.split(".") element = full_element[0] for end in ['-Middle', '-Dative', '-dative', '-Result', '-Conative', '-Fulfilling', '-Quote']: if element.endswith(end): element = element[:-len(end)] #if '-' in element: #raise Exception('Unexpected {} in {}'.format(element, vnclass)) # TODO handle adverbial phrases and adverbs? if element in ['ADV', 'ADVP']: pass # S_INF -> to S elif element == 'S_INF': structure = structure + ['to', 'S'] # Handle the "whether/if" syntax (which means "whether" or "if") elif "/" in element: structure.append(set(element.split("/"))) # Replace PP by "{preposition set} + NP" elif element == "PP": new_index, prep = self._read_syntax_data( index_xml, syntax_data, "keyword", base_structure) if new_index == -1: self.unhandled.append({ "file": self.filename, "elem": "PP", "data": "No syntax data found" }) if len(full_element) > 1 and full_element[1] == "location": structure += [verbnetprepclasses.prep["loc"], "NP"] else: structure += [verbnetprepclasses.all_preps, "NP"] else: index_xml = new_index structure += [prep, "NP"] # Everything else (NP, V, ...) is unmodified else: structure.append(element) search = element if len(search) > 0 and search[0].islower(): search = "keyword" # Look for a matching element in SYNTAX # and check whether we can find an unexpected keyword to add, # between our current position and the matching element new_index, keyword = self._read_syntax_data( index_xml, syntax_data, search, base_structure) if keyword != "" and search != "keyword": structure.insert(-1, keyword) if new_index != -1: index_xml = new_index if VerbnetOfficialFrame._is_a_slot({'elem': element}): roles.append(None) if len(full_element) > 1: potential_role = "-".join([x.title() for x in full_element[1].split('-')]) if potential_role in role_list: roles[num_slot - 1] = potential_role # Fill the role list i = 0 for element in syntax_data: if ((element.tag not in ["VERB", "PREP", "LEX"]) and "value" in element.attrib): if i >= len(roles): roles.append(None) self.unhandled.append({ "file": self.filename, "elem": "\\", "data": "Too many roles in the syntax" }) else: if roles[i] is not None and roles[i] != element.attrib["value"]: self.unhandled.append({ "file": self.filename, "elem": "\\", "data": "Conflict between roles indicated in syntax and structure" }) else: roles[i] = element.attrib["value"] i += 1 while len(roles) > 0 and roles[-1] is None: del roles[-1] return roles, structure
def test_global(self): reader = VerbnetReader(paths.VERBNET_PATH) self.assertEqual(len(reader.frames_for_verb), 4402) empty_restr = VNRestriction.build_empty() test_frames = { 'sparkle': VerbnetOfficialFrame( 'light_emission-43.1', [{ 'elem': 'there' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Theme', 'restr': VNRestriction.build_not(VNRestriction.build('animate')) }, { 'elem': verbnetprepclasses.prep['loc'] }, { 'elem': 'NP', 'role': 'Location', 'restr': empty_restr }]), 'employ': VerbnetOfficialFrame('use-105', [{ 'elem': 'NP', 'role': 'Agent', 'restr': VNRestriction.build_or(VNRestriction.build('animate'), VNRestriction.build('organization')) }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Theme', 'restr': empty_restr }]), 'break': VerbnetOfficialFrame('break-45.1', [{ 'elem': 'NP', 'role': 'Patient', 'restr': VNRestriction.build('solid') }, { 'elem': 'V' }]), 'suggest': VerbnetOfficialFrame( 'say-37.7', [{ 'elem': 'NP', 'role': 'Agent', 'restr': VNRestriction.build_or(VNRestriction.build('animate'), VNRestriction.build('organization')) }, { 'elem': 'V' }, { 'elem': 'how' }, { 'elem': 'to' }, { 'elem': 'S', 'role': 'Topic', 'restr': VNRestriction.build('communication') }]), 'snooze': VerbnetOfficialFrame('snooze-40.4', [{ 'elem': 'NP', 'role': 'Agent', 'restr': VNRestriction.build('animate') }, { 'elem': 'V' }]) } for verb, frame in test_frames.items(): self.assertIn(verb, reader.frames_for_verb) self.assertIn(frame, reader.frames_for_verb[verb]) reader.frames_for_verb = {} root = ET.ElementTree(file=str(paths.VERBNET_PATH / 'separate-23.1.xml')) reader._handle_class(root.getroot(), [], [], []) animate = VNRestriction.build('animate') list1 = [ VerbnetOfficialFrame('separate-23.1', [{ 'elem': 'NP', 'role': 'Agent', 'restr': VNRestriction.build('animate') }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Patient', 'restr': empty_restr }, { 'elem': {'from'} }, { 'elem': 'NP', 'role': 'Co-Patient', 'restr': empty_restr }]), VerbnetOfficialFrame('separate-23.1', [{ 'elem': 'NP', 'role': 'Agent', 'restr': VNRestriction.build('animate') }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Patient', 'restr': empty_restr }]), VerbnetOfficialFrame('separate-23.1', [{ 'elem': 'NP', 'role': 'Patient', 'restr': empty_restr }, { 'elem': 'V' }]), VerbnetOfficialFrame('separate-23.1', [{ 'elem': 'NP', 'role': 'Patient', 'restr': empty_restr }, { 'elem': 'V' }, { 'elem': {'from'} }, { 'elem': 'NP', 'role': 'Co-Patient', 'restr': empty_restr }]), VerbnetOfficialFrame('separate-23.1', [{ 'elem': 'NP', 'role': 'Patient', 'restr': empty_restr }, { 'elem': 'V' }]) ] list2 = [ VerbnetOfficialFrame('separate-23.1-1', [{ 'elem': 'NP', 'role': 'Patient', 'restr': empty_restr }, { 'elem': 'V' }, { 'elem': {'from'} }, { 'elem': 'NP', 'role': 'Co-Patient', 'restr': empty_restr }]) ] list3 = [ VerbnetOfficialFrame('separate-23.1-2', [{ 'elem': 'NP', 'role': 'Patient', 'restr': empty_restr }, { 'elem': 'V' }, { 'elem': {'with'} }, { 'elem': 'NP', 'role': 'Co-Patient', 'restr': empty_restr }]) ] expected_result = { 'dissociate': list1 + list3, 'disconnect': list1 + list3, 'divide': list1 + list2, 'disassociate': list1, 'disentangle': list1 + list2, 'divorce': list1 + list2, 'separate': list1 + list3, 'segregate': list1 + list2, 'part': list1 + list3, 'differentiate': list1 + list2, 'uncoil': list1, 'decouple': list1 + list2, 'sever': list1, 'dissimilate': list1 + list2 } for verb in expected_result: if expected_result[verb] != reader.frames_for_verb[verb]: print('Error with {}'.format(verb)) for expected, got in zip(expected_result[verb], reader.frames_for_verb[verb]): if expected != got: print('{} != {}'.format(expected, got)) self.assertEqual(reader.frames_for_verb, expected_result)