Exemple #1
0
def insert_new_relation(post1):
    """

    Arguments:
    - `post1`: newly added post
    """
    posts = Post.objects()
    if post1.post_type == "pdf":
        return None

    for post2 in posts:
        if post2.post_type != "pdf" and post2.url != post1.url:
            # text similarity
            text1 = post1.content.lower()
            text2 = post2.content.lower()
            vector1 = text_to_vector(text1)
            vector2 = text_to_vector(text2)
            content_cosine = get_cosine(vector1, vector2)
            # title similarity
            title1 = post1.title.lower()
            title2 = post2.title.lower()
            tvector1 = text_to_vector(title1)
            tvector2 = text_to_vector(title2)
            title_cosine = get_cosine(tvector1, tvector2)

            category_point = get_category_point(post1, post2)
            cosine = content_cosine + title_cosine + category_point

            if cosine > 0.1:
                relation = Relation(post1, post2, cosine)
                relation.save()

                relation = Relation(post2, post1, cosine)
                relation.save()
Exemple #2
0
def delete_post():
    post_data = json.loads(request.data)
    post_id = post_data["postId"]

    post = Post.objects(id=post_id).first()
    author = User.objects(username=current_user.username).first()
    if post.author != author:
        return json.dumps({"status": "not-allowed"})

    Relation.objects(post1=post).delete()
    Relation.objects(post2=post).delete()
    Post.objects(id=post_id).delete()

    return json.dumps({"status": "success"})
Exemple #3
0
def build_relation_db():
    """
    Build a relation collection that includes
    every similarity between posts.

    Only includes relation when similarity > 0.2

    This takes a lot of time, run this periodically.
    Eg. once a week or everynight.

    Use insert_new_relation() for new posts

    """
    posts = Post.objects()
    posts2 = Post.objects()
    Relation.drop_collection()
    counter = 0
    print counter
    for p1 in posts:
        for p2 in posts2:
            if p1.url != p2.url:
                if p1.post_type != "pdf" and p2.post_type != "pdf":
                    counter = counter + 1

                    # text similarity
                    text1 = p1.content.lower()
                    text2 = p2.content.lower()
                    vector1 = text_to_vector(text1)
                    vector2 = text_to_vector(text2)
                    content_cosine = get_cosine(vector1, vector2)
                    # title similarity
                    title1 = p1.title.lower()
                    title2 = p2.title.lower()
                    tvector1 = text_to_vector(title1)
                    tvector2 = text_to_vector(title2)
                    title_cosine = get_cosine(tvector1, tvector2)

                    category_point = get_category_point(p1, p2)
                    cosine = content_cosine + title_cosine + category_point

                    if cosine > 0.1:
                        relation = Relation(p1, p2, cosine)
                        relation.save()
    print counter
Exemple #4
0
def single_post(seq, slug):
    seq = int(seq)
    user = get_user()
    post = Post.objects(seq=seq).first()
    related_posts = Relation.objects(post1=post).order_by("-similarity")[:3]

    return render_template(
        'single.html', user=user, post=post,
        related_posts=related_posts
    )
Exemple #5
0
def after_tagging_calculation(post1):
    """

    Arguments:
    - `post1`: tag added to this post
    """
    relation = Relation.objects(post1=post1)
    if post1.post_type == "pdf":
        return None

    for r in relation:
        category_point = get_category_point(r.post1, r.post2)
        r.similarity += category_point
        r.save()

    relation = Relation.objects(post2=post1)
    for r in relation:
        category_point = get_category_point(r.post1, r.post2)
        r.similarity += category_point
        r.save()
Exemple #6
0
def after_tagging_calculation(post1):
    """

    Arguments:
    - `post1`: tag added to this post
    """
    relation = Relation.objects(post1=post1)
    if post1.post_type == "pdf":
        return None

    for r in relation:
        category_point = get_category_point(r.post1, r.post2)
        r.similarity += category_point
        r.save()

    relation = Relation.objects(post2=post1)
    for r in relation:
        category_point = get_category_point(r.post1, r.post2)
        r.similarity += category_point
        r.save()
Exemple #7
0
def insert_new_relation(post1):
    """

    Arguments:
    - `post1`: newly added post
    """
    posts = Post.objects()
    if post1.post_type == "pdf":
        return None

    for post2 in posts:
        if post2.post_type != "pdf" and post2.url != post1.url:
            # text similarity
            text1 = post1.content.lower()
            text2 = post2.content.lower()
            vector1 = text_to_vector(text1)
            vector2 = text_to_vector(text2)
            content_cosine = get_cosine(vector1, vector2)
            # title similarity
            title1 = post1.title.lower()
            title2 = post2.title.lower()
            tvector1 = text_to_vector(title1)
            tvector2 = text_to_vector(title2)
            title_cosine = get_cosine(tvector1, tvector2)

            category_point = get_category_point(post1, post2)
            cosine = content_cosine + title_cosine + category_point

            if cosine > 0.1:
                relation = Relation(post1, post2, cosine)
                relation.save()

                relation = Relation(post2, post1, cosine)
                relation.save()
Exemple #8
0
def build_relation_db():
    """
    Build a relation collection that includes
    every similarity between posts.

    Only includes relation when similarity > 0.2

    This takes a lot of time, run this periodically.
    Eg. once a week or everynight.

    Use insert_new_relation() for new posts

    """
    posts = Post.objects()
    posts2 = Post.objects()
    Relation.drop_collection()
    counter = 0
    print counter
    for p1 in posts:
        for p2 in posts2:
            if p1.url != p2.url:
                if p1.post_type != "pdf" and p2.post_type != "pdf":
                    counter = counter + 1

                    # text similarity
                    text1 = p1.content.lower()
                    text2 = p2.content.lower()
                    vector1 = text_to_vector(text1)
                    vector2 = text_to_vector(text2)
                    content_cosine = get_cosine(vector1, vector2)
                    # title similarity
                    title1 = p1.title.lower()
                    title2 = p2.title.lower()
                    tvector1 = text_to_vector(title1)
                    tvector2 = text_to_vector(title2)
                    title_cosine = get_cosine(tvector1, tvector2)

                    category_point = get_category_point(p1, p2)
                    cosine = content_cosine + title_cosine + category_point

                    if cosine > 0.1:
                        relation = Relation(p1, p2, cosine)
                        relation.save()
    print counter
Exemple #9
0
def get_related_posts(post1):
    related_posts = Relation.objects(post1=post1).order_by("-similarity")[:4]
    return related_posts
Exemple #10
0
def get_related_posts(post1):
    related_posts = Relation.objects(post1=post1).order_by("-similarity")[:4]
    return related_posts