Ejemplo n.º 1
0
def show_similarity(gid_x, gid_y):
    """
    """
    novel_node_x = show_cluster_node(gid_x)
    novel_node_y = show_cluster_node(gid_y)

    cluster_similarity = NovelSimilarityModule()
    similarity, match_list = cluster_similarity.novel_node_similarity_calculation(novel_node_x, novel_node_y)
    print(similarity)
    print(match_list)
    def run(self):
        """
        """
        self.logger.info('novel cluster edge module start')

        similarity = NovelSimilarityModule()

        process_gid_list = self.process_gid_collection()
        for index, gid in enumerate(process_gid_list):

            cluster_db = ClusterDBModule()
            cluster_db.delete_novelclusteredgeinfo(gid)

            cluster_node = self.cluster_node_collection(gid)
            if not cluster_node:
                continue
            virtual_node = similarity.virtual_novel_node_generate(cluster_node)
            related_gid_list = self.related_gid_collection(cluster_node)
            if len(related_gid_list) == 0:
                continue

            book_name = cluster_node.book_name.encode('GBK')
            pen_name = cluster_node.pen_name.encode('GBK')
            self.logger.info('index: {0}/{1}'.format(index, len(process_gid_list)))
            self.logger.info('novel_info: {0}@{1}@{2}, '
                             'chater_number: {3}, related_gid_number: {4}'.format(
                gid, book_name, pen_name,
                len(virtual_node.chapter_list), len(related_gid_list)
            ))

            related_edge_list = []
            for related_gid in related_gid_list:
                related_cluster_node = self.cluster_node_collection(related_gid)
                if not related_cluster_node:
                    continue
                related_virtual_node = similarity.virtual_novel_node_generate(related_cluster_node)
                cluster_similarity = similarity.novel_cluster_similarity_calculation(virtual_node, related_virtual_node)
                if cluster_similarity >= 0.7:
                    cluster_edge = ClusterEdgeInfo(cluster_node.gid, related_cluster_node.gid, cluster_similarity)
                    related_edge_list.append(cluster_edge)

                    book_name = related_cluster_node.book_name.encode('GBK')
                    pen_name = related_cluster_node.pen_name.encode('GBK')
                    self.logger.info('novel_info: {0}@{1}@{2}, '
                                     'chapter_number: {3}, similarity: {4}'.format(
                        related_gid, book_name, pen_name,
                        len(related_virtual_node.chapter_list), cluster_similarity
                    ))
            self.cluster_edge_update(gid, related_edge_list)

        self.logger.info('novel cluster edge module end')
        return True
Ejemplo n.º 3
0
def show_cluster_node(gid):
    """
    """
    cluster_db = ClusterDBModule()

    cluster_edge = ClusterEdgeModule()
    cluster_similarity = NovelSimilarityModule()

    cluster_node = cluster_edge.cluster_node_collection(gid)
    virtual_novel_node = cluster_similarity.virtual_novel_node_generate(cluster_node)
    book_name = cluster_node.book_name.encode('GBK', 'ignore')
    pen_name = cluster_node.pen_name.encode('GBK', 'ignore')
    print('gid: {0}, book_name: {1}, pen_name: {2}'.format(gid, book_name, pen_name))
    print(', '.join('%s: %d' % (chapter.chapter_title.encode('GBK', 'ignore'), chapter.rank) for chapter in virtual_novel_node.chapter_list))
    return virtual_novel_node
Ejemplo n.º 4
0
    def run_test(self):
        """
            跑评估数据
        """
        gid_list = [int(line.strip()) for line in open('./data/rid.txt', 'r').readlines()]
        similarity = NovelSimilarityModule()

        for index, gid in enumerate(gid_list):
            cluster_node = self.cluster_node_collection(gid)
            if not cluster_node:
                continue
            print('gid: {0}, book_name: {1}, pen_name: {2}'.format(
                gid,
                cluster_node.book_name.encode('GBK', 'ignore'),
                cluster_node.pen_name.encode('GBK', 'ignore')
            ))
            novel_node = similarity.virtual_novel_node_generate(cluster_node)
            print(', '.join('%s' % chapter.chapter_title.encode('GBK', 'ignore') for chapter in novel_node.chapter_list))
            print('')
Ejemplo n.º 5
0
        novel_node = get_novel_node(conn, dir_id)

        id_list = []
        for table_id in xrange(1, 256):
            sql = 'SELECT dir_id FROM novel_cluster_dir_info{0} WHERE book_name = "{1}"'.format(table_id, novel_node.book_name.encode('GBK'))
            cursor = conn.cursor()
            cursor.execute(sql)
            for (id, ) in cursor.fetchall():
                id_list.append(id)
            cursor.close()
        
        if len(id_list) == 0:
            continue
        print_novel_node(novel_node, dir_id)
        similarity = NovelSimilarityModule()
        for id in id_list:
            node = get_novel_node(conn, id)
            print_novel_node(node, id)
            print(similarity.novel_node_similarity_calculation(novel_node, node))
            novel_node = node




    conn.close()


    here()