def insert_new_relation(post1): """ Arguments: - `post1`: newly added post """ posts = Post.objects() if post1.post_type == "pdf": return None for post2 in posts: if post2.post_type != "pdf" and post2.url != post1.url: # text similarity text1 = post1.content.lower() text2 = post2.content.lower() vector1 = text_to_vector(text1) vector2 = text_to_vector(text2) content_cosine = get_cosine(vector1, vector2) # title similarity title1 = post1.title.lower() title2 = post2.title.lower() tvector1 = text_to_vector(title1) tvector2 = text_to_vector(title2) title_cosine = get_cosine(tvector1, tvector2) category_point = get_category_point(post1, post2) cosine = content_cosine + title_cosine + category_point if cosine > 0.1: relation = Relation(post1, post2, cosine) relation.save() relation = Relation(post2, post1, cosine) relation.save()
def delete_post(): post_data = json.loads(request.data) post_id = post_data["postId"] post = Post.objects(id=post_id).first() author = User.objects(username=current_user.username).first() if post.author != author: return json.dumps({"status": "not-allowed"}) Relation.objects(post1=post).delete() Relation.objects(post2=post).delete() Post.objects(id=post_id).delete() return json.dumps({"status": "success"})
def build_relation_db(): """ Build a relation collection that includes every similarity between posts. Only includes relation when similarity > 0.2 This takes a lot of time, run this periodically. Eg. once a week or everynight. Use insert_new_relation() for new posts """ posts = Post.objects() posts2 = Post.objects() Relation.drop_collection() counter = 0 print counter for p1 in posts: for p2 in posts2: if p1.url != p2.url: if p1.post_type != "pdf" and p2.post_type != "pdf": counter = counter + 1 # text similarity text1 = p1.content.lower() text2 = p2.content.lower() vector1 = text_to_vector(text1) vector2 = text_to_vector(text2) content_cosine = get_cosine(vector1, vector2) # title similarity title1 = p1.title.lower() title2 = p2.title.lower() tvector1 = text_to_vector(title1) tvector2 = text_to_vector(title2) title_cosine = get_cosine(tvector1, tvector2) category_point = get_category_point(p1, p2) cosine = content_cosine + title_cosine + category_point if cosine > 0.1: relation = Relation(p1, p2, cosine) relation.save() print counter
def single_post(seq, slug): seq = int(seq) user = get_user() post = Post.objects(seq=seq).first() related_posts = Relation.objects(post1=post).order_by("-similarity")[:3] return render_template( 'single.html', user=user, post=post, related_posts=related_posts )
def after_tagging_calculation(post1): """ Arguments: - `post1`: tag added to this post """ relation = Relation.objects(post1=post1) if post1.post_type == "pdf": return None for r in relation: category_point = get_category_point(r.post1, r.post2) r.similarity += category_point r.save() relation = Relation.objects(post2=post1) for r in relation: category_point = get_category_point(r.post1, r.post2) r.similarity += category_point r.save()
def get_related_posts(post1): related_posts = Relation.objects(post1=post1).order_by("-similarity")[:4] return related_posts