def PrepareJSandJM(nodes): nodes.head.ApplyFeature(utils.FeatureID_JS2) JSnode = Tokenization.SentenceNode('') JSnode.ApplyFeature(utils.FeatureID_JS) JSnode.ApplyFeature(utils.FeatureID_JS2) nodes.insert(JSnode, 0) p = nodes.head.next while p.next: if utils.FeatureID_SYM not in p.features: p.ApplyFeature(utils.FeatureID_JS2) break p.ApplyFeature(utils.FeatureID_JS2) p = p.next PUNCSet = {".", "?", "!", ";", "...", ":", "。"} if utils.FeatureID_SYM not in nodes.tail.features and \ nodes.tail.text not in PUNCSet : JMnode = Tokenization.SentenceNode('') JMnode.StartOffset = nodes.tail.EndOffset JMnode.EndOffset = nodes.tail.EndOffset JMnode.ApplyFeature(utils.FeatureID_punc) nodes.append(JMnode) nodes.tail.ApplyFeature(utils.FeatureID_JM) nodes.tail.ApplyFeature(utils.FeatureID_JM2) p = nodes.tail.prev while p.prev: if utils.FeatureID_SYM not in p.features: # first one that is not punc. the real JM2: p.ApplyFeature(utils.FeatureID_JM2) break p.ApplyFeature(utils.FeatureID_JM2) p = p.prev
def test_LogicNotOr(self): """Logic And/Or""" node = Tokenization.SentenceNode('d') strtokenlist = Tokenization.SentenceLinkedList() strtokenlist.append(node) RuleTokenList = [Rules.RuleToken()] self.assertFalse(LogicMatchFeatures(strtokenlist, 0, "!c|d|e", RuleTokenList, 0)) node.text = "f" self.assertTrue(LogicMatchFeatures(strtokenlist, 0, "!c|d|e", RuleTokenList, 0)) node.text = "e" self.assertTrue(LogicMatchFeatures(strtokenlist, 0, "!c d|e", RuleTokenList, 0)) node.text = "f" self.assertFalse(LogicMatchFeatures(strtokenlist, 0, "!c d|e", RuleTokenList, 0)) node.text = "c" self.assertTrue(LogicMatchFeatures(strtokenlist, 0, "c|d !d|e", RuleTokenList, 0)) node.text = "d" self.assertFalse(LogicMatchFeatures(strtokenlist, 0, "c|d !d|e", RuleTokenList, 0)) node.text = "e" self.assertFalse(LogicMatchFeatures(strtokenlist, 0, "c|e !d|f|g|e", RuleTokenList, 0)) node.text = "e" self.assertFalse(LogicMatchFeatures(strtokenlist, 0, "c|d !d|c", RuleTokenList, 0)) node.text = "f" self.assertFalse(LogicMatchFeatures(strtokenlist, 0, "c|d !d|e", RuleTokenList, 0))
def test_LogicOr(self): """Logic Or""" node = Tokenization.SentenceNode('being') strtokenlist = Tokenization.SentenceLinkedList() strtokenlist.append(node) self.assertTrue(LogicMatchFeatures(strtokenlist, 0, "being|getting", [Rules.RuleToken()], 0))
def test_simple(self): """exact match""" node = Tokenization.SentenceNode('') node.features.add(FeatureOntology.GetFeatureID('NN')) strtokenlist = Tokenization.SentenceLinkedList() strtokenlist.append(node) self.assertTrue(LogicMatchFeatures(strtokenlist, 0, "NN", None, 0))
def test_ApplyLexicon(self): node = Tokenization.SentenceNode('0') ApplyLexicon(node) CDFeatureID = GetFeatureID('CD') self.assertTrue(CDFeatureID in node.features) LoadLexicon(dir_path + '/../../../fsa/X/LexX-ChinesePunctuate.txt') node = Tokenization.SentenceNode(':') ApplyLexicon(node) self.assertTrue(utils.FeatureID_SYM in node.features) self.assertFalse(utils.FeatureID_OOV in node.features) node = Tokenization.SentenceNode(':') ApplyLexicon(node) self.assertTrue(utils.FeatureID_SYM in node.features) self.assertFalse(utils.FeatureID_OOV in node.features)
def test_LogicAndOr(self): """Logic And/Or""" node = Tokenization.SentenceNode('d') strtokenlist = Tokenization.SentenceLinkedList() strtokenlist.append(node) ruletokenlist = [Rules.RuleToken()] self.assertFalse(LogicMatchFeatures(strtokenlist, 0, "c|d c", ruletokenlist, 0)) node.text = "c" self.assertTrue(LogicMatchFeatures(strtokenlist, 0, "c|d c", ruletokenlist, 0))
def test_LogicAnd(self): """Logic And""" node = Tokenization.SentenceNode("c") strtokenlist = Tokenization.SentenceLinkedList() strtokenlist.append(node) ruletokenlist = [Rules.RuleToken()] self.assertFalse(LogicMatchFeatures(strtokenlist, 0, "c d", ruletokenlist, 0)) self.assertTrue(LogicMatchFeatures(strtokenlist, 0, "c c", ruletokenlist, 0))
def test_And(self): node = Tokenization.SentenceNode("abc") node.features.add(FeatureOntology.GetFeatureID('NN')) strtokenlist = Tokenization.SentenceLinkedList() strtokenlist.append(node) self.assertFalse(LogicMatchFeatures(strtokenlist, 0, "NN percent", [Rules.RuleToken()], 0)) node.features.add(FeatureOntology.GetFeatureID('percent')) self.assertTrue(LogicMatchFeatures(strtokenlist, 0, "NN percent", [Rules.RuleToken()], 0))