def testRaiseError(self): rules = [ RecognitionRule(r'<Thursday~.+>', 'date', 'test'), RecognitionRule(r'<Friday~.+>', 'date', 'test2') ] self.assertRaises(RuleLoadError, RecognitionRuleBlock, None, [], 'invalid', rules)
def testDeliminateNumbers5(self): rule = RecognitionRule(r'NUM_START<two~.+><hundred~.+><and~.+><sixty~.+><eight~.+>NUM_END', 'date', 'test', deliminate_numbers=True) (sent, success) = rule.apply([('these', 'POS', set()), ('are', 'POS', set()), ('the', 'POS', set()), ('first', 'POS', set()), ('two', 'POS', set()), ('hundred', 'POS', set()), ('and', 'POS', set()), ('sixty', 'POS', set()), ('eight', 'POS', set()), ('balloons', 'POS', set())]) self.assertTrue(success)
def testMatch(self): rule = RecognitionRule(r'<Friday~.+>', 'date', 'test') (sent, success) = rule.apply([('the', 'POS', set()), ('plane', 'POS', set()), ('leaves', 'POS', set()), ('on', 'POS', set()), ('Friday', 'POS', set())]) self.assertEquals([len(s[2]) for s in sent], [0,0,0,0,1], 'actual result was '+str(sent)) self.assertTrue(success)
def testMatchCaseSensitive1(self): rule = RecognitionRule(r'<wednesday~.+>', 'date', 'test', case_sensitive=True) (sent, success) = rule.apply([('the', 'POS', set()), ('plane', 'POS', set()), ('leaves', 'POS', set()), ('on', 'POS', set()), ('Wednesday', 'POS', set())]) self.assertEquals([len(s[2]) for s in sent], [0,0,0,0,0], 'actual result was '+str(sent)) self.assertFalse(success)
def testMatch(self): rule = RecognitionRule(r'<Friday~.+>', 'date', 'test') (sent, success) = rule.apply([('the', 'POS', set()), ('plane', 'POS', set()), ('leaves', 'POS', set()), ('on', 'POS', set()), ('Friday', 'POS', set())]) self.assertEquals([len(s[2]) for s in sent], [0, 0, 0, 0, 1], 'actual result was ' + str(sent)) self.assertTrue(success)
def testNegGuard1(self): rule = RecognitionRule(r'<Friday~.+>', 'date', 'test', guards=[r'!<plane~.+>']) (sent, success) = rule.apply([('the', 'POS', set()), ('plane', 'POS', set()), ('leaves', 'POS', set()), ('on', 'POS', set()), ('Friday', 'POS', set())]) self.assertEquals([len(s[2]) for s in sent], [0,0,0,0,0], 'actual result was '+str(sent)) self.assertFalse(success)
def testDeliminateNumbers2(self): rule = RecognitionRule(r'NUM_START<twenty-one~.+>NUM_END', 'date', 'test', deliminate_numbers=True) (sent, success) = rule.apply([('there', 'POS', set()), ('are', 'POS', set()), ('twenty-one', 'POS', set()), ('balloons', 'POS', set())]) self.assertTrue(success)
def testPosBefore1(self): rule = RecognitionRule(r'<Friday~.+>', 'date', 'test', before_guards=[r'<last~.+>$']) (sent, success) = rule.apply([('the', 'POS', set()), ('plane', 'POS', set()), ('left', 'POS', set()), ('last', 'POS', set()), ('Friday', 'POS', set())]) self.assertEquals([len(s[2]) for s in sent], [0,0,0,0,1], 'actual result was '+str(sent)) self.assertTrue(success)
def testPosAfter1(self): rule = RecognitionRule(r'<Friday~.+>', 'date', 'test', after_guards=[r'^<for~.+>']) (sent, success) = rule.apply([('the', 'POS', set()), ('plane', 'POS', set()), ('leaves', 'POS', set()), ('on', 'POS', set()), ('Friday', 'POS', set()), ('for', 'POS', set()), ('Atlanta', 'POS', set())]) self.assertEquals([len(s[2]) for s in sent], [0,0,0,0,1,0,0], 'actual result was '+str(sent)) self.assertTrue(success)
def testMatchCaseSensitive1(self): rule = RecognitionRule(r'<wednesday~.+>', 'date', 'test', case_sensitive=True) (sent, success) = rule.apply([('the', 'POS', set()), ('plane', 'POS', set()), ('leaves', 'POS', set()), ('on', 'POS', set()), ('Wednesday', 'POS', set())]) self.assertEquals([len(s[2]) for s in sent], [0, 0, 0, 0, 0], 'actual result was ' + str(sent)) self.assertFalse(success)
def testMatchMultiMiddle(self): rule = RecognitionRule(r'<Friday~.+><afternoon~.+>', 'time', 'test') (sent, success) = rule.apply([('the', 'POS', set()), ('plane', 'POS', set()), ('leaves', 'POS', set()), ('on', 'POS', set()), ('Friday', 'POS', set()), ('afternoon', 'POS', set()), ('for', 'POS', set()), ('Atlanta', 'POS', set())]) self.assertEquals([len(s[2]) for s in sent], [0,0,0,0,1,1,0,0], 'actual result was '+str(sent)) self.assertEquals(sent[4][2], sent[5][2]) self.assertTrue(success)
def testNegGuard1(self): rule = RecognitionRule(r'<Friday~.+>', 'date', 'test', guards=[r'!<plane~.+>']) (sent, success) = rule.apply([('the', 'POS', set()), ('plane', 'POS', set()), ('leaves', 'POS', set()), ('on', 'POS', set()), ('Friday', 'POS', set())]) self.assertEquals([len(s[2]) for s in sent], [0, 0, 0, 0, 0], 'actual result was ' + str(sent)) self.assertFalse(success)
def testDeliminateNumbers4(self): rule = RecognitionRule( r'NUM_ORD_START<first~.+>NUM_ORD_ENDNUM_START<two~.+>NUM_END', 'date', 'test', deliminate_numbers=True) (sent, success) = rule.apply([('these', 'POS', set()), ('are', 'POS', set()), ('the', 'POS', set()), ('first', 'POS', set()), ('two', 'POS', set()), ('balloons', 'POS', set())]) self.assertTrue(success)
def testPosBefore1(self): rule = RecognitionRule(r'<Friday~.+>', 'date', 'test', before_guards=[r'<last~.+>$']) (sent, success) = rule.apply([('the', 'POS', set()), ('plane', 'POS', set()), ('left', 'POS', set()), ('last', 'POS', set()), ('Friday', 'POS', set())]) self.assertEquals([len(s[2]) for s in sent], [0, 0, 0, 0, 1], 'actual result was ' + str(sent)) self.assertTrue(success)
def testMatchMultiMiddle(self): rule = RecognitionRule(r'<Friday~.+><afternoon~.+>', 'time', 'test') (sent, success) = rule.apply([('the', 'POS', set()), ('plane', 'POS', set()), ('leaves', 'POS', set()), ('on', 'POS', set()), ('Friday', 'POS', set()), ('afternoon', 'POS', set()), ('for', 'POS', set()), ('Atlanta', 'POS', set())]) self.assertEquals([len(s[2]) for s in sent], [0, 0, 0, 0, 1, 1, 0, 0], 'actual result was ' + str(sent)) self.assertEquals(sent[4][2], sent[5][2]) self.assertTrue(success)
def testPosAfter1(self): rule = RecognitionRule(r'<Friday~.+>', 'date', 'test', after_guards=[r'^<for~.+>']) (sent, success) = rule.apply([('the', 'POS', set()), ('plane', 'POS', set()), ('leaves', 'POS', set()), ('on', 'POS', set()), ('Friday', 'POS', set()), ('for', 'POS', set()), ('Atlanta', 'POS', set())]) self.assertEquals([len(s[2]) for s in sent], [0, 0, 0, 0, 1, 0, 0], 'actual result was ' + str(sent)) self.assertTrue(success)
def testApplyAll(self): rules = [ RecognitionRule(r'<Thursday~.+>', 'date', 'test'), RecognitionRule(r'<Friday~.+>', 'date', 'test2') ] b = RecognitionRuleBlock(None, [], 'all', rules) (sent, success) = b.apply([('the', 'POS', set()), ('plane', 'POS', set()), ('leaves', 'POS', set()), ('on', 'POS', set()), ('Thursday', 'POS', set()), ('and', 'POS', set()), ('Friday', 'POS', set())]) self.assertEquals([len(s[2]) for s in sent], [0, 0, 0, 0, 1, 0, 1], 'actual result was ' + str(sent)) self.assertTrue(success)
def testDeliminateNumbers5(self): rule = RecognitionRule( r'NUM_START<two~.+><hundred~.+><and~.+><sixty~.+><eight~.+>NUM_END', 'date', 'test', deliminate_numbers=True) (sent, success) = rule.apply([('these', 'POS', set()), ('are', 'POS', set()), ('the', 'POS', set()), ('first', 'POS', set()), ('two', 'POS', set()), ('hundred', 'POS', set()), ('and', 'POS', set()), ('sixty', 'POS', set()), ('eight', 'POS', set()), ('balloons', 'POS', set())]) self.assertTrue(success)
def _load_rule(self, filename, rulelines): """ Load a 'simple' recognition rule """ # get key/value dictionaries d = self._parse_rule(filename, rulelines) # Set defaults type = None match = None id = filename squelch = False guards = [] before_guards = [] after_guards = [] after = [] case_sensitive = False deliminate_numbers = False for key in d: # Only one 'Type field allowed if key == 'type': if len(d[key]) != 1: raise RuleLoadError(filename, "There must be exactly 1 'Type' field") else: type = d[key][0] # Only one 'Match' field allowed elif key == 'match': if len(d[key]) != 1: raise RuleLoadError(filename, "There must be exactly 1 'Match' field") else: match = d[key][0] # No more than one ID key allowed elif key == 'id': if len(d[key]) == 1: id = d[key][0] elif len(d[key]) > 1: raise RuleLoadError(filename, "Too many 'ID' fields") # Squelch is an optional field, defaulting to False, which accepts # either true or false (case-insensitive) as values elif key == 'squelch': if len(d[key]) == 1: squelch = d[key][0].lower() if squelch == 'true': squelch = True elif squelch == 'false': squelch = False else: raise RuleLoadError(filename, "Squelch must be either 'True' or 'False'") elif len(d[key]) > 1: raise RuleLoadError(filename, "Too many 'Squelch' fields") # Case-sensitive is an optional field, defaulting to False, which # accepts either true or false (case-insensitive) as values elif key == 'case-sensitive': if len(d[key]) == 1: case_sensitive = d[key][0].lower() if case_sensitive == 'true': case_sensitive = True elif case_sensitive == 'false': case_sensitive = False else: raise RuleLoadError(filename, "Case-Sensitive must be either 'True' or 'False'") elif (len(d[key]) > 1): raise RuleLoadError(filename, "Too many 'Case-Sensitive' fields") # Deliminate-Numbers is an optional field, defaulting to False, which # accepts either true or false (case-insensitive) as values elif key == 'deliminate-numbers': if len(d[key]) == 1: deliminate_numbers = d[key][0].lower() if deliminate_numbers == 'true': deliminate_numbers = True elif deliminate_numbers == 'false': deliminate_numbers = False else: raise RuleLoadError(filename, "Deliminate-Numbers must be either 'True' or 'False'") elif (len(d[key]) > 1): raise RuleLoadError(filename, "Too many 'Deliminate-Numbers' fields") # set optional fields elif key == 'guard': guards = d[key] elif key == 'after': after = d[key] elif key == 'before-guard': before_guards = d[key] elif key == 'after-guard': after_guards = d[key] # error on unknown fields else: raise RuleLoadError(filename, "Unknown field '" + key + "'") if type is None: raise RuleLoadError(filename, "'Type' is a compulsory field") if match is None: raise RuleLoadError(filename, "'Match' is a compulsory field") # Guard against any RE errors try: return RecognitionRule(match, type, id, guards, after_guards, before_guards, after, squelch, case_sensitive, deliminate_numbers) except re.error as e: raise RuleLoadError(filename, "Malformed regular expression: " + str(e)) except (SyntaxError, ValueError) as e: raise RuleLoadError(filename, "Malformed Python expression: " + str(e))
def testDeliminateNumbers4(self): rule = RecognitionRule(r'NUM_ORD_START<first~.+>NUM_ORD_ENDNUM_START<two~.+>NUM_END', 'date', 'test', deliminate_numbers=True) (sent, success) = rule.apply([('these', 'POS', set()), ('are', 'POS', set()), ('the', 'POS', set()), ('first', 'POS', set()), ('two', 'POS', set()), ('balloons', 'POS', set())]) self.assertTrue(success)