Ejemplo n.º 1
0
def show_post(slug):
    main_post = mongo_col.find_one({"slug": slug})
    main_post = {
        "url": main_post["canonical_url"],
        "title": main_post["title"],
        "slug": main_post["slug"],
        "content": main_post["contents"]
    }

    # preprocessing
    content = markdown_to_text(main_post["content"])
    text_corpus = make_texts_corpus([content])
    bow = id2word.doc2bow(next(text_corpus))
    doc_distribution = np.array(
        [doc_top[1] for doc_top in lda_model.get_document_topics(bow=bow)])

    # recommender posts
    most_sim_ids = list(
        get_most_similar_documents(doc_distribution, doc_topic_dist))[1:]

    most_sim_ids = [int(id_) for id_ in most_sim_ids]
    posts = mongo_col.find({"idrs": {"$in": most_sim_ids}})
    related_posts = [{
        "url": post["canonical_url"],
        "title": post["title"],
        "slug": post["slug"]
    } for post in posts][1:]

    return render_template('index.html',
                           main_post=main_post,
                           posts=related_posts)
Ejemplo n.º 2
0
def show_post(slug):
    main_post = mongo_col.find_one({"slug": slug})
    md = get_content_of_post(slug)
    main_post = {
        "url": main_post["url"],
        "title": main_post["title"],
        "slug": main_post["slug"],
        "content": md2html(md)
    }

    # preprocessing
    content = markdown_to_text(main_post["content"])
    text_corpus = make_texts_corpus([content])
    bow = id2word.doc2bow(next(text_corpus))
    # sử dụng dictionary và LDA model đã train và lưu lại để thu được vector document_dist, ứng với phân bố các topic của document đó
    doc_distribution = np.array(
        [doc_top[1] for doc_top in lda_model.get_document_topics(bow=bow)])

    # recommender posts
    most_sim_ids = list(
        get_most_similar_documents(doc_distribution, doc_topic_dist))[1:]

    most_sim_ids = [int(id_) for id_ in most_sim_ids]
    posts = mongo_col.find({"idrs": {"$in": most_sim_ids}})
    related_posts = [{
        "url": post["url"],
        "title": post["title"],
        "slug": post["slug"]
    } for post in posts][1:]

    return render_template('index.html',
                           main_post=main_post,
                           posts=related_posts)
Ejemplo n.º 3
0
def update_post(id):
    title = request.form['title']
    content = request.form['content']
    is_active = 'is_active' in request.form
    pp_content = markdown_to_text(content)

    mongo_col.update_one({"_id": ObjectId(id)}, {
        "$set": {
            "title": title,
            "content": content,
            "pp_content": pp_content
        }
    })

    main_post = mongo_col.find_one({"_id": ObjectId(id)})

    return redirect("/posts/" + main_post["slug"])
Ejemplo n.º 4
0
def search():
    if request.method == 'POST':
        search_text = request.form['search']
    else:
        search_text = request.args.get('search')

    if request.args.get('is_content') is None:
        posts = mongo_col.find({
            "title": {
                "$regex": search_text
            }
        }).sort("title").limit(30)
        random_posts = [{
            "idrs": post["idrs"],
            "url": post["url"],
            "title": post["title"],
            "slug": post["slug"],
            "id": post["_id"],
            "is_active": post["is_active"],
            "views_count": post["views_count"]
        } for post in posts]
    else:
        content = markdown_to_text(search_text)
        text_corpus = make_texts_corpus([content])
        bow = id2word.doc2bow(next(text_corpus))
        doc_distribution = np.array(
            [doc_top[1] for doc_top in lda_model.get_document_topics(bow=bow)])
        # recommender posts
        most_sim_ids = list(
            get_most_similar_documents(doc_distribution, doc_topic_dist))[1:]

        most_sim_ids = [int(id_) for id_ in most_sim_ids]
        posts = mongo_col.find({"idrs": {"$in": most_sim_ids}})
        random_posts = [{
            "idrs": post["idrs"],
            "url": post["url"],
            "title": post["title"],
            "slug": post["slug"],
            "id": post["_id"],
            "is_active": post["is_active"],
            "views_count": post["views_count"]
        } for post in posts][1:]
    return render_template('list-post.html', random_posts=random_posts)
Ejemplo n.º 5
0
def search():
    search_text = request.form['search']
    print('search text : ', search_text)
    content = markdown_to_text(search_text)
    text_corpus = make_texts_corpus([content])
    bow = id2word.doc2bow(next(text_corpus))
    doc_distribution = np.array(
        [doc_top[1] for doc_top in lda_model.get_document_topics(bow=bow)])
    # recommender posts
    most_sim_ids = list(
        get_most_similar_documents(doc_distribution, doc_topic_dist))[1:]

    most_sim_ids = [int(id_) for id_ in most_sim_ids]
    posts = mongo_col.find({"idrs": {"$in": most_sim_ids}})
    related_posts = [{
        "url": post["url"],
        "title": post["title"],
        "slug": post["slug"]
    } for post in posts][1:]
    return render_template('search.html', search=related_posts)
Ejemplo n.º 6
0
def add_post():
    try:
        title = request.form['title']
        content = request.form['content']
        is_active = 'is_active' in request.form
        pp_content = markdown_to_text(content)
        slug = get_random_string(11)
        idrs = mongo_col.count()
        main_post = mongo_col.insert_one({
            'id': '',
            'title': title,
            'slug': slug,
            'url': '',
            'content': markdownify(content),
            'idrs': idrs,
            'pp_content': pp_content,
            'is_active': is_active,
            'views_count': 0
        })

        return redirect("/posts/" + slug)
    except Exception as e:
        print(e)
        return render_template('not-found.html')
Ejemplo n.º 7
0
def show_post(slug):
    main_post = mongo_col.find_one({"slug": slug})
    mongo_col.update_one(
        {"_id": ObjectId(main_post["_id"])},
        {"$set": {
            "views_count": main_post["views_count"] + 1
        }})

    data = pd.read_csv("../src/preprocessing/major_word.csv")
    features = data.values
    tags = []
    dictionary = []
    for dic in features:
        if dic[1] >= 2000:
            dictionary.append([dic[0], dic[1]])
    dictionary = np.array(dictionary)

    text = main_post["content"]
    content_list = text.split()
    for content in list(set(content_list)):
        if content in dictionary[:, 0]:
            text = [{
                "text": word[0],
                "total": int(word[1])
            } for word in dictionary if word[0] == content]
            tags.append(text[0])

    print(tags)
    main_post = {
        "url": main_post["url"],
        "title": main_post["title"],
        "slug": main_post["slug"],
        # "content": md2html(text),
        "content": md2html(main_post["content"])
    }

    # # md = get_content_of_post(slug)
    # main_post = {
    #     "url": main_post["url"],
    #     "title": main_post["title"],
    #     "slug": main_post["slug"],
    #     "content": md2html(main_post["content"])
    # }

    # preprocessing
    content = markdown_to_text(main_post["content"])
    text_corpus = make_texts_corpus([content])
    bow = id2word.doc2bow(next(text_corpus))
    # sử dụng dictionary và LDA model đã train và lưu lại để thu được vector document_dist, ứng với phân bố các topic của document đó
    doc_distribution = np.array(
        [doc_top[1] for doc_top in lda_model.get_document_topics(bow=bow)])

    # recommender posts
    most_sim_ids = list(
        get_most_similar_documents(doc_distribution, doc_topic_dist))[1:]

    most_sim_ids = [int(id_) for id_ in most_sim_ids]
    posts = mongo_col.find({"idrs": {"$in": most_sim_ids}})
    related_posts = [{
        "url": post["url"],
        "title": post["title"],
        "slug": post["slug"],
    } for post in posts][1:]

    return render_template('index.html',
                           main_post=main_post,
                           posts=related_posts,
                           tags=tags)
Ejemplo n.º 8
0
#ls
#%%
from src.utils import markdown_to_text

posts = col.find()

type(posts)
for i, post in enumerate(posts):
    print(post['url'])
    if i == 10:
        break
posts = col.find()
test_post = next(posts)
raaw_content = test_post['content']
print(raaw_content)
content = markdown_to_text(raaw_content)
print(content)
test_post['_id']
for i, post in tqdm(enumerate(col.find()), total=col.count()):
    try:
        col.update_one({"_id": post["_id"]}, {"$set": {"idrs": i}})
        pp_content = markdown_to_text(post['content'])
        col.update_one({"_id": post["_id"]},
                       {"$set": {
                           "pp_content": pp_content
                       }})
    except Exception as e:
        print(e)
        continue
client = MongoClient('localhost', 27017)
db = client['rsframgia']