def tagRawSentenceHash(self, rawLine, DICT, word_dict):
        line = initializeSentence(DICT, rawLine)

        sen = []
        wordTags = line.split()

        for i in range(len(wordTags)):
            fwObject = FWObject.getFWObject(wordTags, i)
            word, tag = getWordTag(wordTags[i])
            node = self.findFiredNode(fwObject)

            #Only hash word once and block out-of-lexicon words
            word_hash = murmurhash3_32(word, seed=0)
            try:
                word_cat = ct.get(word_hash, word_dict)
            except:
                word_cat = 0
                word_hash = 0

#Format and return
            if node.depth > 0:
                sen.append((word_hash, murmurhash3_32(node.conclusion,
                                                      seed=0), word_cat))
            else:  # Fired at root, return initialized tag
                sen.append((word_hash, murmurhash3_32(tag, seed=0), word_cat))

        return sen
 def tagRawSentence(self, rawLine, DICT, word_dict, pos_dict):
     line = initializeSentence(DICT, rawLine)
     sen = []
     wordTags = line.split()
     for i in range(len(wordTags)):
         fwObject = FWObject.getFWObject(wordTags, i)
         word, tag = getWordTag(wordTags[i])
         node = self.findFiredNode(fwObject)
         if node.depth > 0:
             current_dict = ct.get(word.lower(), word_dict, default=0)
             if current_dict == 0:
                 sen.append(
                     (0, ct.get(node.conclusion.lower(),
                                pos_dict,
                                default=0), 0))
             else:
                 sen.append(
                     (ct.get("index", current_dict),
                      ct.get(node.conclusion.lower(), pos_dict,
                             default=0), ct.get("domain", current_dict)))
         else:  # Fired at root, return initialized tag
             current_dict = ct.get(word.lower(), word_dict, default=0)
             if current_dict == 0:
                 sen.append((0, ct.get(tag.lower(), pos_dict), 0))
             else:
                 sen.append(
                     (ct.get("index", current_dict),
                      ct.get(tag.lower(), pos_dict,
                             default=0), ct.get("domain", current_dict)))
     return sen
Beispiel #3
0
	def tagRawSentenceHash(self, rawLine, DICT, word_dict):
		line = initializeSentence(DICT, rawLine)

		sen = []
		wordTags = line.split()

		for i in range(len(wordTags)):
			fwObject = FWObject.getFWObject(wordTags, i)
			word, tag = getWordTag(wordTags[i])
			node = self.findFiredNode(fwObject)
			
			#Format and return tagged word
			if node.depth > 0:
				tag = node.conclusion
	
			#Hash word / tag
			word = word + "/" + tag
			tag_hash = murmurhash3_32(tag, seed=0)
			word_hash = murmurhash3_32(word, seed=0)
			
			#Get semantic category
			try:
				word_cat = word_dict[word_hash]
				
			except:
				word_cat = 0
				word_hash = 0
			
			#Add to list
			sen.append((word_hash, tag_hash, word_cat))

		return sen
Beispiel #4
0
    def tagRawSentenceHash(self, rawLine, DICT, word_dict):
        line = initializeSentence(DICT, rawLine)

        sen = []
        wordTags = line.split()

        for i in range(len(wordTags)):
            fwObject = FWObject.getFWObject(wordTags, i)
            word, tag = getWordTag(wordTags[i])
            node = self.findFiredNode(fwObject)

            #Format and return tagged word
            if node.depth > 0:
                tag = node.conclusion

            #Hash word / tag
            word = word + "/" + tag
            tag_hash = murmurhash3_32(tag, seed=0)
            word_hash = murmurhash3_32(word, seed=0)

            #Get semantic category
            try:
                word_cat = word_dict[word_hash]

            except:
                word_cat = 0
                word_hash = 0

            #Add to list
            sen.append((word_hash, tag_hash, word_cat))

        return sen
    def tagRawSentenceGenSim(self, DICT, rawLine):
        line = initializeSentence(DICT, rawLine)

        sen = []
        wordTags = line.split()

        for i in range(len(wordTags)):
            fwObject = FWObject.getFWObject(wordTags, i)
            word, tag = getWordTag(wordTags[i])
            node = self.findFiredNode(fwObject)
            if node.depth > 0:
                sen.append(word + "/" + node.conclusion)
            else:  # Fired at root, return initialized tag
                sen.append(word + "/" + tag)
        return sen
Beispiel #6
0
	def tagRawSentenceGenSim(self, DICT, rawLine):
		line = initializeSentence(DICT, rawLine)

		sen = []
		wordTags = line.split()

		for i in range(len(wordTags)):
			fwObject = FWObject.getFWObject(wordTags, i)
			word, tag = getWordTag(wordTags[i])
			node = self.findFiredNode(fwObject)
			if node.depth > 0:
				sen.append(word + "/" + node.conclusion)
			else:# Fired at root, return initialized tag
				sen.append(word + "/" + tag)
		return sen
Beispiel #7
0
    def constructSCRDRtreeFromRDRfile(self, rulesFilePath):

        self.root = Node(FWObject(False), "NN", None, None, None, [], 0)
        currentNode = self.root
        currentDepth = 0

        try:
            rulesFile = codecs.open(rulesFilePath, "r", encoding="utf-8")
            lines = rulesFile.readlines()
        except:
            rulesFilePath = os.path.join("..", "c2xg", "c2xg", rulesFilePath)
            rulesFile = codecs.open(rulesFilePath, "r", encoding="utf-8")
            lines = rulesFile.readlines()

        for i in range(1, len(lines)):
            line = lines[i]
            depth = 0
            for c in line:
                if c == '\t':
                    depth = depth + 1
                else:
                    break

            line = line.strip()
            if len(line) == 0:
                continue

            temp = line.find("cc")
            if temp == 0:
                continue

            condition = getCondition(line.split(" : ", 1)[0].strip())
            conclusion = getConcreteValue(line.split(" : ", 1)[1].strip())

            node = Node(condition, conclusion, None, None, None, [], depth)

            if depth > currentDepth:
                currentNode.exceptChild = node
            elif depth == currentDepth:
                currentNode.elseChild = node
            else:
                while currentNode.depth != depth:
                    currentNode = currentNode.father
                currentNode.elseChild = node

            node.father = currentNode
            currentNode = node
            currentDepth = depth
Beispiel #8
0
	def tagRawSentence(self, rawLine, DICT, word_dict, pos_dict):
		line = initializeSentence(DICT, rawLine)
		sen = []
		wordTags = line.split()
		for i in range(len(wordTags)):
			fwObject = FWObject.getFWObject(wordTags, i)
			word, tag = getWordTag(wordTags[i])
			node = self.findFiredNode(fwObject)
			if node.depth > 0:
				current_dict = ct.get(word.lower(), word_dict, default = 0)
				if current_dict == 0:
					sen.append((0, ct.get(node.conclusion.lower(), pos_dict, default = 0), 0))
				else:
					sen.append((ct.get("index", current_dict), ct.get(node.conclusion.lower(), pos_dict, default = 0), ct.get("domain", current_dict)))
			else:# Fired at root, return initialized tag
				current_dict = ct.get(word.lower(), word_dict, default = 0)
				if current_dict == 0:
					sen.append((0, ct.get(tag.lower(), pos_dict), 0))
				else:
					sen.append((ct.get("index", current_dict), ct.get(tag.lower(), pos_dict, default = 0), ct.get("domain", current_dict)))
		return sen
def getCondition(strCondition):
    condition = FWObject(False)
    for rule in strCondition.split(" and "):
        rule = rule.strip()
        key = rule[rule.find(".") + 1:rule.find(" ")]
        value = getConcreteValue(rule)

        if key == "prevWord2":
            condition.context[0] = value
        elif key == "prevTag2":
            condition.context[1] = value
        elif key == "prevWord1":
            condition.context[2] = value
        elif key == "prevTag1":
            condition.context[3] = value
        elif key == "word":
            condition.context[4] = value
        elif key == "tag":
            condition.context[5] = value
        elif key == "nextWord1":
            condition.context[6] = value
        elif key == "nextTag1":
            condition.context[7] = value
        elif key == "nextWord2":
            condition.context[8] = value
        elif key == "nextTag2":
            condition.context[9] = value
        elif key == "suffixL2":
            condition.context[10] = value
        elif key == "suffixL3":
            condition.context[11] = value
        elif key == "suffixL4":
            condition.context[12] = value
    for i in range(13):
        if condition.context[i] is not None:
            condition.notNoneIds.append(i)
    return condition
Beispiel #10
0
def getCondition(strCondition):
    condition = FWObject(False)
    for rule in strCondition.split(" and "):
        rule = rule.strip()
        key = rule[rule.find(".") + 1 : rule.find(" ")]
        value = getConcreteValue(rule)
             
        if key == "prevWord2": 
            condition.context[0] = value
        elif key == "prevTag2":
            condition.context[1] = value
        elif key == "prevWord1":
            condition.context[2] = value
        elif key == "prevTag1":
            condition.context[3] = value
        elif key == "word":
            condition.context[4] = value
        elif key == "tag":
            condition.context[5] = value
        elif key == "nextWord1":
            condition.context[6] = value
        elif key == "nextTag1":
            condition.context[7] = value
        elif key == "nextWord2":
            condition.context[8] = value
        elif key == "nextTag2":
            condition.context[9] = value
        elif key == "suffixL2":
            condition.context[10] = value
        elif key == "suffixL3":
            condition.context[11] = value
        elif key == "suffixL4":
            condition.context[12] = value
    for i in range(13):
        if condition.context[i] is not None:
            condition.notNoneIds.append(i)        
    return condition