def run(self): """ """ self.logger.info('novel cluster edge module start') similarity = NovelSimilarityModule() process_gid_list = self.process_gid_collection() for index, gid in enumerate(process_gid_list): cluster_db = ClusterDBModule() cluster_db.delete_novelclusteredgeinfo(gid) cluster_node = self.cluster_node_collection(gid) if not cluster_node: continue virtual_node = similarity.virtual_novel_node_generate(cluster_node) related_gid_list = self.related_gid_collection(cluster_node) if len(related_gid_list) == 0: continue book_name = cluster_node.book_name.encode('GBK') pen_name = cluster_node.pen_name.encode('GBK') self.logger.info('index: {0}/{1}'.format(index, len(process_gid_list))) self.logger.info('novel_info: {0}@{1}@{2}, ' 'chater_number: {3}, related_gid_number: {4}'.format( gid, book_name, pen_name, len(virtual_node.chapter_list), len(related_gid_list) )) related_edge_list = [] for related_gid in related_gid_list: related_cluster_node = self.cluster_node_collection(related_gid) if not related_cluster_node: continue related_virtual_node = similarity.virtual_novel_node_generate(related_cluster_node) cluster_similarity = similarity.novel_cluster_similarity_calculation(virtual_node, related_virtual_node) if cluster_similarity >= 0.7: cluster_edge = ClusterEdgeInfo(cluster_node.gid, related_cluster_node.gid, cluster_similarity) related_edge_list.append(cluster_edge) book_name = related_cluster_node.book_name.encode('GBK') pen_name = related_cluster_node.pen_name.encode('GBK') self.logger.info('novel_info: {0}@{1}@{2}, ' 'chapter_number: {3}, similarity: {4}'.format( related_gid, book_name, pen_name, len(related_virtual_node.chapter_list), cluster_similarity )) self.cluster_edge_update(gid, related_edge_list) self.logger.info('novel cluster edge module end') return True
def show_cluster_node(gid): """ """ cluster_db = ClusterDBModule() cluster_edge = ClusterEdgeModule() cluster_similarity = NovelSimilarityModule() cluster_node = cluster_edge.cluster_node_collection(gid) virtual_novel_node = cluster_similarity.virtual_novel_node_generate(cluster_node) book_name = cluster_node.book_name.encode('GBK', 'ignore') pen_name = cluster_node.pen_name.encode('GBK', 'ignore') print('gid: {0}, book_name: {1}, pen_name: {2}'.format(gid, book_name, pen_name)) print(', '.join('%s: %d' % (chapter.chapter_title.encode('GBK', 'ignore'), chapter.rank) for chapter in virtual_novel_node.chapter_list)) return virtual_novel_node
def run_test(self): """ 跑评估数据 """ gid_list = [int(line.strip()) for line in open('./data/rid.txt', 'r').readlines()] similarity = NovelSimilarityModule() for index, gid in enumerate(gid_list): cluster_node = self.cluster_node_collection(gid) if not cluster_node: continue print('gid: {0}, book_name: {1}, pen_name: {2}'.format( gid, cluster_node.book_name.encode('GBK', 'ignore'), cluster_node.pen_name.encode('GBK', 'ignore') )) novel_node = similarity.virtual_novel_node_generate(cluster_node) print(', '.join('%s' % chapter.chapter_title.encode('GBK', 'ignore') for chapter in novel_node.chapter_list)) print('')