def find_keywords(self): root_set = set() for root in self.trees: pid_str = ",".join(str(pid) for pid in root.id_set); sql = "select content from keyword inner join keyword_paper_relation on keyword.id = kid where pid in (%s)" % pid_str result = self.database.executeSQL(sql) for row in result: root.keyword_list.append(row[0]) root.keyword = calc.CalcFatherv2(root.keyword_list, root_set) root_set.add(root.keyword) if root.keyword == '': print '@@@@', root.keyword_list else: print root.keyword, root.level, root.group_id for root in self.trees: is_used_keywords = set() | root_set node_queue = Queue.Queue(0) #0 means no max length queue for child in root.children_nodes: node_queue.put(child) while not node_queue.empty(): node = node_queue.get() pid_str = ",".join(str(pid) for pid in node.id_set); sql = "select content from keyword inner join keyword_paper_relation on keyword.id = kid where pid in (%s)" % pid_str result = self.database.executeSQL(sql) for row in result: node.keyword_list.append(row[0]) node.keyword = calc.CalcFatherv2(node.keyword_list, is_used_keywords) is_used_keywords.add(node.keyword) if node.keyword == '': print '@@@@', node.keyword_list else: print node.keyword, node.level, node.group_id for child in node.children_nodes: node_queue.put(child) print 'find keywords done.'
pid_set = paper_children[level][cate] pid_str = ','.join(pid_set) sql = 'select kid, content from keyword_paper_relation inner join keyword on kid = keyword.id where pid in (' + pid_str + ')' cursor.execute(sql) sql_result = cursor.fetchall() if len(sql_result) != 0: kid_set = set() content_list = list() for row in sql_result: kid_set.add(str(row[0])) content_list.append(row[1]) content = calc.CalcFatherv2(content_list, is_used) is_used.add(content) # find the keyword id of the content sql = "select id from keyword where content = '" + content + "'" cursor.execute(sql) row = cursor.fetchone() if row is not None: is_used.add(str(row[0])) # remvoe the used keyword for kid in is_used: if kid in kid_set: kid_set.remove(kid)
try: sql = 'insert into keyword_hierarchy_relation(group_id, father, child) values(%d, %d, %d)' % ( int(group), father, kid) executeSQL(sql) except Exception, e: if str(e).find('1062') != -1: #duplicate key sql = 'update keyword_hierarchy_relation set father = %d where group_id = %d and child = %d' % ( father, int(group), kid) executeSQL(sql) else: print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@' print sql print e print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@' content = calc.CalcFatherv2(content_list, is_selected) sql = 'select id from keyword where content = "%s"' % content try: result = executeSQL(sql) is_selected.add(content) except Exception, e: print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@' print sql print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@' print "%s %d %d" % (content, father, level) # if content == "": # print "[pids:%s content:%s]" % (pid_set, content_list) if level >= 1 and content != "":