예제 #1
0
파일: papers.py 프로젝트: xiaojimao18/BGLL
    def find_keywords(self):
        root_set = set()
        for root in self.trees:
            pid_str = ",".join(str(pid) for pid in root.id_set);
            sql = "select content from keyword inner join keyword_paper_relation on keyword.id = kid where pid in (%s)" % pid_str
            result = self.database.executeSQL(sql)
            
            for row in result:
                root.keyword_list.append(row[0])

            root.keyword = calc.CalcFatherv2(root.keyword_list, root_set)
            root_set.add(root.keyword)
            if root.keyword == '':
                print '@@@@', root.keyword_list
            else:
                print root.keyword, root.level, root.group_id

        for root in self.trees:
            is_used_keywords = set() | root_set

            node_queue = Queue.Queue(0) #0 means no max length queue
            for child in root.children_nodes:
                node_queue.put(child)

            while not node_queue.empty():
                node = node_queue.get()

                pid_str = ",".join(str(pid) for pid in node.id_set);
                sql = "select content from keyword inner join keyword_paper_relation on keyword.id = kid where pid in (%s)" % pid_str
                result = self.database.executeSQL(sql)

                for row in result:
                    node.keyword_list.append(row[0])

                node.keyword = calc.CalcFatherv2(node.keyword_list, is_used_keywords)
                is_used_keywords.add(node.keyword)
                if node.keyword == '':
                    print '@@@@', node.keyword_list
                else:
                    print node.keyword, node.level, node.group_id

                for child in node.children_nodes:
                    node_queue.put(child)

        print 'find keywords done.'
예제 #2
0
		pid_set = paper_children[level][cate]
		pid_str = ','.join(pid_set)

		sql = 'select kid, content from keyword_paper_relation inner join keyword on kid = keyword.id where pid in (' + pid_str + ')'
		cursor.execute(sql)
		sql_result = cursor.fetchall()
		
		if len(sql_result) != 0:
			kid_set = set()
			content_list = list()

			for row in sql_result:
				kid_set.add(str(row[0]))
				content_list.append(row[1])
		
			content = calc.CalcFatherv2(content_list, is_used)
			is_used.add(content)

			# find the keyword id of the content
			sql = "select id from keyword where content = '" + content + "'"
			cursor.execute(sql)
			row = cursor.fetchone()

			if row is not None:
				is_used.add(str(row[0]))

				# remvoe the used keyword
				for kid in is_used:
					if kid in kid_set:
						kid_set.remove(kid)
예제 #3
0
파일: paper_2.py 프로젝트: xiaojimao18/BGLL
            try:
                sql = 'insert into keyword_hierarchy_relation(group_id, father, child) values(%d, %d, %d)' % (
                    int(group), father, kid)
                executeSQL(sql)
            except Exception, e:
                if str(e).find('1062') != -1:  #duplicate key
                    sql = 'update keyword_hierarchy_relation set father = %d where group_id = %d and child = %d' % (
                        father, int(group), kid)
                    executeSQL(sql)
                else:
                    print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
                    print sql
                    print e
                    print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'

    content = calc.CalcFatherv2(content_list, is_selected)

    sql = 'select id from keyword where content = "%s"' % content
    try:
        result = executeSQL(sql)
        is_selected.add(content)
    except Exception, e:
        print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
        print sql
        print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'

    print "%s %d %d" % (content, father, level)
    # if content == "":
    # print "[pids:%s content:%s]" % (pid_set, content_list)

    if level >= 1 and content != "":