def user_post_similar(users): post_ids = [] for post in posts_by_users(users): post_ids.append(post.id) idf.append(post.id, post.content.encode('U8')) length = len(post_ids) similar = np.zeros(length**2, np.float) similar = similar.reshape(length, length) cache = {} for numi, i in enumerate(post_ids): for numj, j in enumerate(post_ids): key = '%s-%s'%(i, j) if key not in cache and '%s-%s'%(j, i) not in cache: cache[key] = idf.similar(i, j) else: key = '%s-%s'%(j, i) similar[numi][numj] = cache[key] return post_ids, similar
def user_comment_smilar(users): authors = [] for user in users: author = get_author_id_by_user_id(user) if author: authors.append(author) comments = [] post_ids, similar = user_post_similar(users) for comment in comment_by_users(users): comments.append([comment.id, comment.content.encode('U8')]) length = len(comments) comment_rela = np.zeros(length*len(post_ids), np.float) comment_rela = comment_rela.reshape(length, len(post_ids)) for num, (i, j) in enumerate(comments): idf.append(i, j) for x, p in enumerate(post_ids): comment_rela[num][x] = idf.similar(i, p) return post_ids, similar, [i[0] for i in comments], comment_rela