def transDictToTagstring(self, content, tag_dict): # get labeled word list chunk_list = self.getChunkFromTag(content, tag_dict) all_search = DictionarySearcher() for (chunk, flag, tag) in chunk_list: if flag: all_search.addKey(chunk) # Check each occurs bFirst1 = True total_str = "" (matched_item, len_txt) = all_search.searchAll(content, "utf-8", True, False) for item in matched_item: word_len = len(item) word = item.encode("utf-8") tmp_str = "%s:" % (word) word_cnt = 1 bFirst2 = True for m_start in matched_item[item]: if self.isValidChunk(tag_dict, int(m_start), word_len): tmp_list = tag_dict[str(m_start)].split("-") if len(tmp_list) == 2: tag = tmp_list[1] else: tag = "" if bFirst2: bFirst2 = False tmp_str += str(word_cnt) + "_" + tag else: tmp_str += ("," + str(word_cnt) + "_" + tag) word_cnt += 1 if bFirst1: total_str += tmp_str bFirst1 = False else: total_str += (" " + tmp_str) return total_str
def transDictToTagstring(self, content, tag_dict): # get labeled word list chunk_list = self.getChunkFromTag(content, tag_dict) all_search = DictionarySearcher() for (chunk, flag,tag) in chunk_list: if flag: all_search.addKey(chunk) # Check each occurs bFirst1 = True total_str = "" (matched_item, len_txt) = all_search.searchAll(content, "utf-8", True, False) for item in matched_item: word_len = len(item) word = item.encode("utf-8") tmp_str = "%s:"%(word) word_cnt = 1 bFirst2 = True for m_start in matched_item[item]: if self.isValidChunk(tag_dict, int(m_start), word_len): tmp_list =tag_dict[str(m_start)].split("-") if len(tmp_list) == 2: tag = tmp_list[1] else: tag = "" if bFirst2: bFirst2 = False tmp_str += str(word_cnt)+"_"+tag else: tmp_str += (","+str(word_cnt)+"_"+tag) word_cnt += 1 if bFirst1: total_str += tmp_str bFirst1 = False else: total_str += (" " + tmp_str) return total_str