def tagRawSentenceHash(self, rawLine, DICT, word_dict): line = initializeSentence(DICT, rawLine) sen = [] wordTags = line.split() for i in range(len(wordTags)): fwObject = FWObject.getFWObject(wordTags, i) word, tag = getWordTag(wordTags[i]) node = self.findFiredNode(fwObject) #Only hash word once and block out-of-lexicon words word_hash = murmurhash3_32(word, seed=0) try: word_cat = ct.get(word_hash, word_dict) except: word_cat = 0 word_hash = 0 #Format and return if node.depth > 0: sen.append((word_hash, murmurhash3_32(node.conclusion, seed=0), word_cat)) else: # Fired at root, return initialized tag sen.append((word_hash, murmurhash3_32(tag, seed=0), word_cat)) return sen
def tagRawSentence(self, rawLine, DICT, word_dict, pos_dict): line = initializeSentence(DICT, rawLine) sen = [] wordTags = line.split() for i in range(len(wordTags)): fwObject = FWObject.getFWObject(wordTags, i) word, tag = getWordTag(wordTags[i]) node = self.findFiredNode(fwObject) if node.depth > 0: current_dict = ct.get(word.lower(), word_dict, default=0) if current_dict == 0: sen.append( (0, ct.get(node.conclusion.lower(), pos_dict, default=0), 0)) else: sen.append( (ct.get("index", current_dict), ct.get(node.conclusion.lower(), pos_dict, default=0), ct.get("domain", current_dict))) else: # Fired at root, return initialized tag current_dict = ct.get(word.lower(), word_dict, default=0) if current_dict == 0: sen.append((0, ct.get(tag.lower(), pos_dict), 0)) else: sen.append( (ct.get("index", current_dict), ct.get(tag.lower(), pos_dict, default=0), ct.get("domain", current_dict))) return sen
def tagRawSentenceHash(self, rawLine, DICT, word_dict): line = initializeSentence(DICT, rawLine) sen = [] wordTags = line.split() for i in range(len(wordTags)): fwObject = FWObject.getFWObject(wordTags, i) word, tag = getWordTag(wordTags[i]) node = self.findFiredNode(fwObject) #Format and return tagged word if node.depth > 0: tag = node.conclusion #Hash word / tag word = word + "/" + tag tag_hash = murmurhash3_32(tag, seed=0) word_hash = murmurhash3_32(word, seed=0) #Get semantic category try: word_cat = word_dict[word_hash] except: word_cat = 0 word_hash = 0 #Add to list sen.append((word_hash, tag_hash, word_cat)) return sen
def tagRawSentenceGenSim(self, DICT, rawLine): line = initializeSentence(DICT, rawLine) sen = [] wordTags = line.split() for i in range(len(wordTags)): fwObject = FWObject.getFWObject(wordTags, i) word, tag = getWordTag(wordTags[i]) node = self.findFiredNode(fwObject) if node.depth > 0: sen.append(word + "/" + node.conclusion) else: # Fired at root, return initialized tag sen.append(word + "/" + tag) return sen
def tagRawSentenceGenSim(self, DICT, rawLine): line = initializeSentence(DICT, rawLine) sen = [] wordTags = line.split() for i in range(len(wordTags)): fwObject = FWObject.getFWObject(wordTags, i) word, tag = getWordTag(wordTags[i]) node = self.findFiredNode(fwObject) if node.depth > 0: sen.append(word + "/" + node.conclusion) else:# Fired at root, return initialized tag sen.append(word + "/" + tag) return sen
def constructSCRDRtreeFromRDRfile(self, rulesFilePath): self.root = Node(FWObject(False), "NN", None, None, None, [], 0) currentNode = self.root currentDepth = 0 try: rulesFile = codecs.open(rulesFilePath, "r", encoding="utf-8") lines = rulesFile.readlines() except: rulesFilePath = os.path.join("..", "c2xg", "c2xg", rulesFilePath) rulesFile = codecs.open(rulesFilePath, "r", encoding="utf-8") lines = rulesFile.readlines() for i in range(1, len(lines)): line = lines[i] depth = 0 for c in line: if c == '\t': depth = depth + 1 else: break line = line.strip() if len(line) == 0: continue temp = line.find("cc") if temp == 0: continue condition = getCondition(line.split(" : ", 1)[0].strip()) conclusion = getConcreteValue(line.split(" : ", 1)[1].strip()) node = Node(condition, conclusion, None, None, None, [], depth) if depth > currentDepth: currentNode.exceptChild = node elif depth == currentDepth: currentNode.elseChild = node else: while currentNode.depth != depth: currentNode = currentNode.father currentNode.elseChild = node node.father = currentNode currentNode = node currentDepth = depth
def tagRawSentence(self, rawLine, DICT, word_dict, pos_dict): line = initializeSentence(DICT, rawLine) sen = [] wordTags = line.split() for i in range(len(wordTags)): fwObject = FWObject.getFWObject(wordTags, i) word, tag = getWordTag(wordTags[i]) node = self.findFiredNode(fwObject) if node.depth > 0: current_dict = ct.get(word.lower(), word_dict, default = 0) if current_dict == 0: sen.append((0, ct.get(node.conclusion.lower(), pos_dict, default = 0), 0)) else: sen.append((ct.get("index", current_dict), ct.get(node.conclusion.lower(), pos_dict, default = 0), ct.get("domain", current_dict))) else:# Fired at root, return initialized tag current_dict = ct.get(word.lower(), word_dict, default = 0) if current_dict == 0: sen.append((0, ct.get(tag.lower(), pos_dict), 0)) else: sen.append((ct.get("index", current_dict), ct.get(tag.lower(), pos_dict, default = 0), ct.get("domain", current_dict))) return sen
def getCondition(strCondition): condition = FWObject(False) for rule in strCondition.split(" and "): rule = rule.strip() key = rule[rule.find(".") + 1:rule.find(" ")] value = getConcreteValue(rule) if key == "prevWord2": condition.context[0] = value elif key == "prevTag2": condition.context[1] = value elif key == "prevWord1": condition.context[2] = value elif key == "prevTag1": condition.context[3] = value elif key == "word": condition.context[4] = value elif key == "tag": condition.context[5] = value elif key == "nextWord1": condition.context[6] = value elif key == "nextTag1": condition.context[7] = value elif key == "nextWord2": condition.context[8] = value elif key == "nextTag2": condition.context[9] = value elif key == "suffixL2": condition.context[10] = value elif key == "suffixL3": condition.context[11] = value elif key == "suffixL4": condition.context[12] = value for i in range(13): if condition.context[i] is not None: condition.notNoneIds.append(i) return condition
def getCondition(strCondition): condition = FWObject(False) for rule in strCondition.split(" and "): rule = rule.strip() key = rule[rule.find(".") + 1 : rule.find(" ")] value = getConcreteValue(rule) if key == "prevWord2": condition.context[0] = value elif key == "prevTag2": condition.context[1] = value elif key == "prevWord1": condition.context[2] = value elif key == "prevTag1": condition.context[3] = value elif key == "word": condition.context[4] = value elif key == "tag": condition.context[5] = value elif key == "nextWord1": condition.context[6] = value elif key == "nextTag1": condition.context[7] = value elif key == "nextWord2": condition.context[8] = value elif key == "nextTag2": condition.context[9] = value elif key == "suffixL2": condition.context[10] = value elif key == "suffixL3": condition.context[11] = value elif key == "suffixL4": condition.context[12] = value for i in range(13): if condition.context[i] is not None: condition.notNoneIds.append(i) return condition