def test_Pred_cmp_self(self): """ All Pred instances should be equal. """ p1 = Pred() p2 = Pred() self.assertEqual(p1, p2) self.assertLessEqual(p1, p2) self.assertGreaterEqual(p1, p2) self.assertFalse(p1 < p2) self.assertFalse(p1 > p2) self.assertFalse(p1 != p2)
def test_Pred_from_normalised_string(self): """ Pred.from_normalised_string should instantiate RealPreds or GPreds depending on whether there is a leading underscore """ # Check the preds are of the right type cat_pred = Pred.from_normalised_string('_cat_n_1') the_pred = Pred.from_normalised_string('the_q') self.assertIsInstance(cat_pred, RealPred) self.assertIsInstance(the_pred, GPred) # Check the preds are the equivalent to initialising directly cat_realpred = RealPred.from_normalised_string('_cat_n_1') the_gpred = GPred.from_normalised_string('the_q') self.assertEqual(cat_pred, cat_realpred) self.assertEqual(the_pred, the_gpred)
def test_Pred_new(self): """ Pred() instances denote underpecified preds, and should not take any arguments """ with self.assertRaises(TypeError): Pred('name')
def test_Pred_cmp_subclasses(self): """ Any Pred instance should be less than instances of subclasses. """ p = Pred() cat = RealPred('cat', 'n', '1') pron = GPred('pron') self.assertEqual(p, p) self.assertEqual(pron, pron) self.assertEqual(cat, cat) self.assertNotEqual(p, pron) self.assertNotEqual(p, cat) self.assertNotEqual(pron, cat) self.assertLess(p, pron) self.assertLess(p, cat) self.assertLess(pron, cat) self.assertLessEqual(p, pron) self.assertLessEqual(p, cat) self.assertLessEqual(pron, cat) self.assertGreater(pron, p) self.assertGreater(cat, p) self.assertGreater(cat, pron) self.assertGreaterEqual(pron, p) self.assertGreaterEqual(cat, p) self.assertGreaterEqual(cat, pron)
def _parse_pred(string, nodeid, queries, equalities): assert string.islower(), 'Predicates must be lower-case.' assert ' ' not in string, 'Predicates must not contain spaces.' if string[0] == '"' and string[-1] == '"': string = string[1:-1] assert '"' not in string, 'Predicates must not contain quotes.' assert string[ 0] != '\'', 'Predicates with opening single-quote have been deprecated.' if (string[:4] == 'pred' and (len(string) == 4 or string[4] in special_values)) or ( string[:8] == 'predsort' and (len(string) == 8 or string[8] in special_values)): i = 8 if string[:8] == 'predsort' else 4 value = _parse_value( string[i:], None, queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].pred)) assert not value return Pred(), string[:i] rel_suffix = '' if string[-4:] == '_rel': string = string[:-4] rel_suffix = '_rel' if string[0] != '_': name = _parse_value( string, '?', queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].pred.name)) return GPred(name), name + rel_suffix values = string[1:].rsplit('_', 2) count = len(values) assert count > 0, 'Invalid number of arguments for RealPred.' if count == 1: values.insert(0, '?') values.append('unknown') elif count == 2: values.append(None) lemma = _parse_value( values[0], '?', queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].pred.lemma)) pos = _parse_value( values[1], 'u', queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].pred.pos)) # u ??? sense = _parse_value( values[2], 'unknown', queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].pred.sense )) # unknown ??? if count == 1: ref_name = '_{}{}'.format(pos, rel_suffix) elif count == 2: ref_name = '_{}_{}{}'.format(lemma, pos, rel_suffix) else: ref_name = '_{}_{}_{}{}'.format(lemma, pos, sense, rel_suffix) return RealPred(lemma, pos, sense), ref_name
def test_Pred_from_string(self): """ Pred.from_string should normalise the string as necessary """ cat_pred = RealPred.from_normalised_string('_cat_n_1') self.assertEqual(Pred.from_string('_cat_n_1_rel'), cat_pred) self.assertEqual(Pred.from_string('"_cat_n_1_rel"'), cat_pred) self.assertEqual(Pred.from_string('_CAT_N_1_REL'), cat_pred) the_pred = GPred.from_normalised_string('the') self.assertEqual(Pred.from_string('the_rel'), the_pred) self.assertEqual(Pred.from_string('"the_rel"'), the_pred) self.assertEqual(Pred.from_string('THE_REL'), the_pred)
def test_Pred_normalise_string(self): """ Pred strings should be normalised - see comments below """ # Remove quotes from quoted preds self.assertEqual(Pred.normalise_string('"pron"'), 'pron') with self.assertRaises(DeprecationWarning): warnings.simplefilter('error') self.assertEqual(Pred.normalise_string("'pron"), "pron") warnings.resetwarnings() # No internal spaces or quotes with self.assertRaises(ValueError): Pred.normalise_string('pred name') with self.assertRaises(ValueError): Pred.normalise_string('pred"name') # Force lower case self.assertEqual(Pred.normalise_string('PRON'), 'pron') # Strip trailing _rel self.assertEqual(Pred.normalise_string('pron_rel'), 'pron')
def test_Pred_normalise_string(self): """ Pred strings should be normalised - see comments below """ # Remove quotes from quoted preds self.assertEqual(Pred.normalise_string('"pron"'), 'pron') with self.assertRaises(DeprecationWarning): warnings.simplefilter('error') self.assertEqual(Pred.normalise_string("'pron"), "pron") warnings.resetwarnings() # No internal spaces or quotes with self.assertRaises(ValueError): Pred.normalise_string('pred name') with self.assertRaises(ValueError): Pred.normalise_string('pred"name') # Force lower case with self.assertRaises(Warning): warnings.simplefilter('error') self.assertEqual(Pred.normalise_string('PRON'), 'pron') warnings.resetwarnings() # Strip trailing _rel self.assertEqual(Pred.normalise_string('pron_rel'), 'pron')
def test_Pred_from_string(self): """ Pred.from_string should normalise the string as necessary """ cat_pred = RealPred.from_normalised_string('_cat_n_1') self.assertEqual(Pred.from_string('_cat_n_1_rel'), cat_pred) self.assertEqual(Pred.from_string('"_cat_n_1_rel"'), cat_pred) with self.assertRaises(Warning): warnings.simplefilter('error') self.assertEqual(Pred.from_string('_CAT_N_1_REL'), cat_pred) warnings.resetwarnings() the_pred = GPred.from_normalised_string('the') self.assertEqual(Pred.from_string('the_rel'), the_pred) self.assertEqual(Pred.from_string('"the_rel"'), the_pred) with self.assertRaises(Warning): warnings.simplefilter('error') self.assertEqual(Pred.from_string('THE_REL'), the_pred) warnings.resetwarnings()
def _parse_node(string, nodeid, queries, equalities, anchors, sortinfo_classes, sortinfo_shortforms): m = string.find('(') if m < 0: m = string.find(' ') if m < 0: l = string.find(':') else: l = string.find(':', 0, m) if l < 0: ref_ids = None l = 0 else: ref_ids = string[:l] l += 1 while string[l] == ' ': l += 1 if string[l:l + 4] == 'node' and (len(string) - l == 4 or string[l + 4] in special_values): value = _parse_value(string[l + 4:], None, queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]])) assert not value pred = Pred() carg = '?' sortinfo = Sortinfo() ref_name = 'node' elif m < 0: pred, ref_name = _parse_pred(string[l:], nodeid, queries, equalities) carg = None sortinfo = None else: pred, ref_name = _parse_pred(string[l:m], nodeid, queries, equalities) if string[m] == '(': r = string.index(')', m) if string[m + 1] == '"' and string[r - 1] == '"': carg = string[m + 2:r - 1] else: carg = string[m + 1:r] assert '"' not in carg carg = _parse_value( carg, '?', queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].carg)) m = r + 1 else: carg = None if m < len(string) and string[m] == ' ': while string[m] == ' ': m += 1 sortinfo = _parse_sortinfo(string[m:], nodeid, queries, equalities, sortinfo_classes, sortinfo_shortforms) else: sortinfo = None if not ref_ids: ref_ids = None node = Node(nodeid, pred, sortinfo=sortinfo, carg=carg) else: if ref_ids[0] == '[' and ref_ids[-1] == ']': ref_ids = ref_ids[1:-1].split(',') node = AnchorNode(anchors=ref_ids, nodeid=nodeid, pred=pred, sortinfo=sortinfo, carg=carg) elif ref_ids[0] == '(' and ref_ids[-1] == ')': ref_ids = ref_ids[1:-1].split(',') node = OptionalNode(anchors=ref_ids, nodeid=nodeid, pred=pred, sortinfo=sortinfo, carg=carg) elif ref_ids[0] == '{' and ref_ids[-1] == '}': ref_ids = ref_ids[1:-1].split(',') node = SubgraphNode(anchors=ref_ids, nodeid=nodeid, pred=pred, sortinfo=sortinfo, carg=carg) else: ref_ids = ref_ids.split(',') node = Node(nodeid=nodeid, pred=pred, sortinfo=sortinfo, carg=carg) for ref_id in ref_ids: assert ref_id not in anchors, 'Reference ids have to be unique.' anchors[ref_id] = node return node, ref_ids, ref_name
def _read_elempred(string, vs, rs): assert string[0] == '[' and string[-1] == ']' l = 1 while string[l] == ' ': l += 1 m = string.find('<', l) if m >= 0: r = string.index('>', m) c = string.find(':', m, r) if c >= 0: cfrom = int(string[m + 1:c]) cto = int(string[c + 1:r]) else: assert m + 1 == r cfrom = None cto = None else: m = r = string.index(' ', l) cfrom = None cto = None if string[m - 15:m] == '_u_unknown_rel"': assert string[l:l + 2] == '"_' slash = string.index('/', l + 2, m - 15) pos = { 'FW': 'u', 'JJ': 'a', 'NN': 'n', 'NNS': 'n', 'RB': 'a', 'VB': 'v', 'VBP': 'v', 'VBG': 'v', 'VBN': 'v' } # ????????????????????????????????????? assert string[slash + 1:m - 15] in pos, 'Invalid unknown word POS: {}'.format( string[slash + 1:m - 15]) pred = Pred.from_string(string[l + 1:slash] + '_' + pos[string[slash + 1:m - 15]] + string[m - 13:m - 1]) else: assert string[l:m].islower() if string[l] == '"': assert string[m - 1] == '"' pred = Pred.from_string(string[l + 1:m - 1]) else: pred = Pred.from_string(string[l:m]) l = find_next(string, start=r + 1) r = find_previous(string, start=l, end=len(string) - 1) + 1 attributes = _read_attributes(string[l:r], not_lower=('carg', )) label = _read_reference(attributes.pop('lbl'), vs, rs) carg = attributes.pop('carg', None) assert (carg is not None) == ( isinstance(pred, GPred) and pred.name in ('basic_card', 'basic_numbered_hour', 'card', 'dofm', 'dofw', 'mofy', 'named', 'named_n', 'numbered_hour', 'ord', 'season', 'year_range', 'yofc')), ( carg, pred) # gpreds with CARG args = {} quantifier = False if isinstance(pred, GPred): if pred.name in ( 'def_explicit_q', 'def_implicit_q', 'def_poss_q', 'every_q', 'free_relative_q', 'free_relative_ever_q', 'idiom_q_i', 'number_q', 'pronoun_q', 'proper_q', 'some_q', 'udef_q', 'which_q'): # all quantifier gpreds according to core.smi quantifier = True handle = _read_reference(attributes.pop('rstr'), vs, rs) args['rstr'] = handle if 'body' in attributes: handle = _read_reference(attributes.pop('body'), vs, rs) args['body'] = handle elif pred.name in ( 'discourse', 'implicit_conj' ): # all conjunction gpreds with potential L/R-HNDL according to core.smi if 'arg1' in attributes: ref = _read_reference(attributes.pop('arg1'), vs, rs) args['arg1'] = ref ref = _read_reference(attributes.pop('arg2'), vs, rs) args['arg2'] = ref else: ref = _read_reference(attributes.pop('l-index'), vs, rs) args['l-index'] = ref ref = _read_reference(attributes.pop('r-index'), vs, rs) args['r-index'] = ref if 'l-hndl' in attributes: assert pred.name == 'implicit_conj' handle = _read_reference(attributes.pop('l-hndl'), vs, rs) args['l-hndl'] = handle handle = _read_reference(attributes.pop('r-hndl'), vs, rs) args['r-hndl'] = handle elif pred.name in ( 'fw_seq', 'num_seq' ): # all conjunction gpreds without potential L/R-HNDL according to core.smi ref = _read_reference(attributes.pop('l-index'), vs, rs) args['l-index'] = ref ref = _read_reference(attributes.pop('r-index'), vs, rs) args['r-index'] = ref elif pred.name == 'unknown': # only predicate with ARG ref = _read_reference(attributes.pop('arg'), vs, rs) args['arg'] = ref else: # regular gpreds for n in range(1, 4): role = 'arg' + str(n) if role not in attributes: break ref = _read_reference(attributes.pop(role), vs, rs) args[role] = ref elif isinstance(pred, RealPred): assert pred.pos in 'acnpquvx' if pred.pos == 'q': # quantifier quantifier = True handle = _read_reference(attributes.pop('rstr'), vs, rs) args['rstr'] = handle if 'body' in attributes: handle = _read_reference(attributes.pop('body'), vs, rs) args['body'] = handle elif pred.pos == 'c' and pred.lemma not in ( 'vice+versa', ): # conjunction if 'arg1' in attributes: ref = _read_reference(attributes.pop('arg1'), vs, rs) args['arg1'] = ref if 'arg2' in attributes: ref = _read_reference(attributes.pop('arg2'), vs, rs) args['arg2'] = ref else: ref = _read_reference(attributes.pop('l-index'), vs, rs) args['l-index'] = ref ref = _read_reference(attributes.pop('r-index'), vs, rs) args['r-index'] = ref if 'l-hndl' in attributes: # optional L/R-HNDL handle = _read_reference(attributes.pop('l-hndl'), vs, rs) args['l-hndl'] = handle handle = _read_reference(attributes.pop('r-hndl'), vs, rs) args['r-hndl'] = handle else: # regular realpreds for n in range(1, 5): role = 'arg' + str(n) if role not in attributes: break ref = _read_reference(attributes.pop(role), vs, rs) args[role] = ref else: assert False if quantifier: intrinsic = _read_reference(attributes.pop('arg0'), vs, rs) var = None else: intrinsic, var = _read_variable(attributes.pop('arg0'), vs, rs) assert not attributes, 'Invalid attributes for predicate {}: {}'.format( pred, attributes) return ElemPred(label=label, pred=pred, intrinsic=intrinsic, carg=carg, args=args, cfrom=cfrom, cto=cto), var
def test_Pred_repr(self): """ The 'official' string representation of Pred() should evaluate to Pred() """ self.assertEqual(eval(repr(Pred())), Pred())
def test_Pred_str(self): """ The 'informal' string representation of a Pred should be 'predsort', the type name for predicates in Delph-in grammars """ self.assertEqual(str(Pred()), 'predsort')
def predsort(): dmrs = DictDmrs() dmrs.add_node( Node(pred=Pred(), sortinfo=Sortinfo())) # underspecified predicate and sortinfo return dmrs
# Also remove pronouns extended_filter = DEFAULT_FILTER | {GPred('pron')} # Replace the first pred with the second: rename = [(RealPred('forwards', 'p'), RealPred('forward', 'p', 'dir'))] # Replace a pair of nodes with a single node # (the first pred linked to the second pred, is replaced by the third pred) shrink = [('_left_a_1', 'ARG1/EQ', 'place_n', '_left_n_1'), ('_right_a_1', 'ARG1/EQ', 'place_n', '_right_n_1'), ('loc_nonsp', 'ARG2/NEQ', '_left_n_1', '_left_p_dir'), ('loc_nonsp', 'ARG2/NEQ', '_right_n_1', '_right_p_dir'), ('_to_p', 'ARG2/NEQ', '_left_n_1', '_left_p_dir'), ('_to_p', 'ARG2/NEQ', '_right_n_1', '_right_p_dir')] shrink = [(Pred.from_string(a), LinkLabel.from_string(b), Pred.from_string(c), Pred.from_string(d)) for a, b, c, d in shrink] def simplify(dmrs): """ Simplify an input DMRS to a form that can be converted to robot commands """ # Remove unnecessary GPreds (defaults, plus pronouns) gpred_filtering(dmrs, extended_filter) # Remove quantifiers for node in copy(dmrs.nodes): if dmrs.is_quantifier(node.nodeid): dmrs.remove_node(node.nodeid)
def test_Node_underspecification(self): with self.assertRaises(TypeError): Node(pred='_the_q').is_more_specific(4) # complete underspecification self.assertFalse(Node().is_more_specific(Node())) self.assertFalse(Node().is_less_specific(Node())) # pred underspecification self.assertFalse(Node(pred=Pred()).is_more_specific(Node())) self.assertTrue(Node(pred=Pred()).is_less_specific(Node())) self.assertTrue(Node().is_more_specific(Node(pred=Pred()))) self.assertFalse(Node().is_less_specific(Node(pred=Pred()))) self.assertFalse(Node(pred=Pred()).is_more_specific(Node(pred=Pred()))) self.assertFalse(Node(pred=Pred()).is_less_specific(Node(pred=Pred()))) self.assertFalse( Node(pred=Pred()).is_more_specific(Node(pred=GPred(name='abc')))) self.assertTrue( Node(pred=Pred()).is_less_specific(Node(pred=GPred(name='abc')))) self.assertTrue( Node(pred=GPred(name='abc')).is_more_specific(Node(pred=Pred()))) self.assertFalse( Node(pred=GPred(name='abc')).is_less_specific(Node(pred=Pred()))) # carg underspecification self.assertFalse(Node(carg='?').is_more_specific(Node())) self.assertTrue(Node(carg='?').is_less_specific(Node())) self.assertTrue(Node().is_more_specific(Node(carg='?'))) self.assertFalse(Node().is_less_specific(Node(carg='?'))) self.assertFalse(Node(carg='?').is_more_specific(Node(carg='?'))) self.assertFalse(Node(carg='?').is_less_specific(Node(carg='?'))) self.assertFalse(Node(carg='?').is_more_specific(Node(carg='abc'))) self.assertTrue(Node(carg='?').is_less_specific(Node(carg='abc'))) self.assertTrue(Node(carg='abc').is_more_specific(Node(carg='?'))) self.assertFalse(Node(carg='abc').is_less_specific(Node(carg='?'))) # sortinfo underspecification self.assertFalse(Node(sortinfo=Sortinfo()).is_more_specific(Node())) self.assertTrue(Node(sortinfo=Sortinfo()).is_less_specific(Node())) self.assertTrue(Node().is_more_specific(Node(sortinfo=Sortinfo()))) self.assertFalse(Node().is_less_specific(Node(sortinfo=Sortinfo()))) self.assertFalse( Node(sortinfo=Sortinfo()).is_more_specific( Node(sortinfo=Sortinfo()))) self.assertFalse( Node(sortinfo=Sortinfo()).is_less_specific( Node(sortinfo=Sortinfo()))) self.assertFalse( Node(sortinfo=Sortinfo()).is_more_specific( Node(sortinfo=EventSortinfo(sf='abc')))) self.assertTrue( Node(sortinfo=Sortinfo()).is_less_specific( Node(sortinfo=EventSortinfo(sf='abc')))) self.assertTrue( Node(sortinfo=EventSortinfo(sf='abc')).is_more_specific( Node(sortinfo=Sortinfo()))) self.assertFalse( Node(sortinfo=EventSortinfo(sf='abc')).is_less_specific( Node(sortinfo=Sortinfo()))) # mixed specification self.assertFalse(Node(pred=Pred()).is_more_specific(Node(carg='?'))) self.assertFalse(Node(pred=Pred()).is_less_specific(Node(carg='?'))) self.assertFalse( Node(pred=Pred()).is_more_specific(Node(sortinfo=Sortinfo()))) self.assertFalse( Node(pred=Pred()).is_less_specific(Node(sortinfo=Sortinfo()))) self.assertFalse( Node(carg='?').is_more_specific(Node(sortinfo=Sortinfo()))) self.assertFalse( Node(carg='?').is_less_specific(Node(sortinfo=Sortinfo())))
# Also remove pronouns extended_filter = DEFAULT_FILTER | {GPred('pron')} # Replace the first pred with the second: rename = [(RealPred('forwards','p'), RealPred('forward','p','dir'))] # Replace a pair of nodes with a single node # (the first pred linked to the second pred, is replaced by the third pred) shrink = [('_left_a_1', 'ARG1/EQ', 'place_n', '_left_n_1'), ('_right_a_1', 'ARG1/EQ', 'place_n', '_right_n_1'), ('loc_nonsp', 'ARG2/NEQ', '_left_n_1', '_left_p_dir'), ('loc_nonsp', 'ARG2/NEQ', '_right_n_1', '_right_p_dir'), ('_to_p', 'ARG2/NEQ', '_left_n_1', '_left_p_dir'), ('_to_p', 'ARG2/NEQ', '_right_n_1', '_right_p_dir')] shrink = [(Pred.from_string(a), LinkLabel.from_string(b), Pred.from_string(c), Pred.from_string(d)) for a,b,c,d in shrink] def simplify(dmrs): """ Simplify an input DMRS to a form that can be converted to robot commands """ # Remove unnecessary GPreds (defaults, plus pronouns) gpred_filtering(dmrs, extended_filter) # Remove quantifiers for node in copy(dmrs.nodes): if dmrs.is_quantifier(node.nodeid): dmrs.remove_node(node.nodeid)