예제 #1
0
def PrepareJSandJM(nodes):
    nodes.head.ApplyFeature(utils.FeatureID_JS2)
    JSnode = Tokenization.SentenceNode('')
    JSnode.ApplyFeature(utils.FeatureID_JS)
    JSnode.ApplyFeature(utils.FeatureID_JS2)
    nodes.insert(JSnode, 0)
    p = nodes.head.next
    while p.next:
        if utils.FeatureID_SYM not in p.features:
            p.ApplyFeature(utils.FeatureID_JS2)
            break
        p.ApplyFeature(utils.FeatureID_JS2)
        p = p.next

    PUNCSet = {".", "?", "!", ";", "...", ":", "。"}
    if utils.FeatureID_SYM not in nodes.tail.features and \
            nodes.tail.text not in PUNCSet  :
        JMnode = Tokenization.SentenceNode('')
        JMnode.StartOffset = nodes.tail.EndOffset
        JMnode.EndOffset = nodes.tail.EndOffset
        JMnode.ApplyFeature(utils.FeatureID_punc)
        nodes.append(JMnode)
    nodes.tail.ApplyFeature(utils.FeatureID_JM)
    nodes.tail.ApplyFeature(utils.FeatureID_JM2)
    p = nodes.tail.prev
    while p.prev:
        if utils.FeatureID_SYM not in p.features:
            # first one that is not punc. the real JM2:
            p.ApplyFeature(utils.FeatureID_JM2)
            break
        p.ApplyFeature(utils.FeatureID_JM2)
        p = p.prev
예제 #2
0
    def test_LogicNotOr(self):
        """Logic And/Or"""
        node =  Tokenization.SentenceNode('d')
        strtokenlist = Tokenization.SentenceLinkedList()
        strtokenlist.append(node)

        RuleTokenList = [Rules.RuleToken()]

        self.assertFalse(LogicMatchFeatures(strtokenlist, 0, "!c|d|e", RuleTokenList, 0))
        node.text = "f"
        self.assertTrue(LogicMatchFeatures(strtokenlist, 0, "!c|d|e", RuleTokenList, 0))
        node.text = "e"
        self.assertTrue(LogicMatchFeatures(strtokenlist, 0, "!c d|e", RuleTokenList, 0))
        node.text = "f"
        self.assertFalse(LogicMatchFeatures(strtokenlist, 0, "!c d|e", RuleTokenList, 0))
        node.text = "c"
        self.assertTrue(LogicMatchFeatures(strtokenlist, 0, "c|d !d|e", RuleTokenList, 0))
        node.text = "d"
        self.assertFalse(LogicMatchFeatures(strtokenlist, 0, "c|d !d|e", RuleTokenList, 0))
        node.text = "e"
        self.assertFalse(LogicMatchFeatures(strtokenlist, 0, "c|e !d|f|g|e", RuleTokenList, 0))
        node.text = "e"
        self.assertFalse(LogicMatchFeatures(strtokenlist, 0, "c|d !d|c", RuleTokenList, 0))
        node.text = "f"
        self.assertFalse(LogicMatchFeatures(strtokenlist, 0, "c|d !d|e", RuleTokenList, 0))
예제 #3
0
    def test_LogicOr(self):
        """Logic Or"""
        node = Tokenization.SentenceNode('being')
        strtokenlist = Tokenization.SentenceLinkedList()
        strtokenlist.append(node)

        self.assertTrue(LogicMatchFeatures(strtokenlist, 0, "being|getting", [Rules.RuleToken()], 0))
예제 #4
0
    def test_simple(self):
        """exact match"""
        node =  Tokenization.SentenceNode('')
        node.features.add(FeatureOntology.GetFeatureID('NN'))
        strtokenlist = Tokenization.SentenceLinkedList()
        strtokenlist.append(node)

        self.assertTrue(LogicMatchFeatures(strtokenlist, 0, "NN", None, 0))
예제 #5
0
    def test_ApplyLexicon(self):
        node = Tokenization.SentenceNode('0')
        ApplyLexicon(node)
        CDFeatureID = GetFeatureID('CD')
        self.assertTrue(CDFeatureID in node.features)

        LoadLexicon(dir_path + '/../../../fsa/X/LexX-ChinesePunctuate.txt')

        node = Tokenization.SentenceNode(':')
        ApplyLexicon(node)
        self.assertTrue(utils.FeatureID_SYM in node.features)
        self.assertFalse(utils.FeatureID_OOV in node.features)

        node = Tokenization.SentenceNode(':')
        ApplyLexicon(node)
        self.assertTrue(utils.FeatureID_SYM in node.features)
        self.assertFalse(utils.FeatureID_OOV in node.features)
예제 #6
0
    def test_LogicAndOr(self):
        """Logic And/Or"""
        node =  Tokenization.SentenceNode('d')
        strtokenlist = Tokenization.SentenceLinkedList()
        strtokenlist.append(node)

        ruletokenlist = [Rules.RuleToken()]
        self.assertFalse(LogicMatchFeatures(strtokenlist, 0, "c|d c", ruletokenlist, 0))
        node.text = "c"
        self.assertTrue(LogicMatchFeatures(strtokenlist, 0, "c|d c", ruletokenlist, 0))
예제 #7
0
    def test_LogicAnd(self):
        """Logic And"""
        node =  Tokenization.SentenceNode("c")
        strtokenlist = Tokenization.SentenceLinkedList()
        strtokenlist.append(node)

        ruletokenlist = [Rules.RuleToken()]

        self.assertFalse(LogicMatchFeatures(strtokenlist, 0, "c d", ruletokenlist, 0))
        self.assertTrue(LogicMatchFeatures(strtokenlist, 0, "c c", ruletokenlist, 0))
예제 #8
0
    def test_And(self):
        node =  Tokenization.SentenceNode("abc")
        node.features.add(FeatureOntology.GetFeatureID('NN'))
        strtokenlist = Tokenization.SentenceLinkedList()
        strtokenlist.append(node)

        self.assertFalse(LogicMatchFeatures(strtokenlist, 0, "NN percent", [Rules.RuleToken()], 0))

        node.features.add(FeatureOntology.GetFeatureID('percent'))
        self.assertTrue(LogicMatchFeatures(strtokenlist, 0, "NN percent", [Rules.RuleToken()], 0))