def show_post(slug): main_post = mongo_col.find_one({"slug": slug}) main_post = { "url": main_post["canonical_url"], "title": main_post["title"], "slug": main_post["slug"], "content": main_post["contents"] } # preprocessing content = markdown_to_text(main_post["content"]) text_corpus = make_texts_corpus([content]) bow = id2word.doc2bow(next(text_corpus)) doc_distribution = np.array( [doc_top[1] for doc_top in lda_model.get_document_topics(bow=bow)]) # recommender posts most_sim_ids = list( get_most_similar_documents(doc_distribution, doc_topic_dist))[1:] most_sim_ids = [int(id_) for id_ in most_sim_ids] posts = mongo_col.find({"idrs": {"$in": most_sim_ids}}) related_posts = [{ "url": post["canonical_url"], "title": post["title"], "slug": post["slug"] } for post in posts][1:] return render_template('index.html', main_post=main_post, posts=related_posts)
def show_post(slug): main_post = mongo_col.find_one({"slug": slug}) md = get_content_of_post(slug) main_post = { "url": main_post["url"], "title": main_post["title"], "slug": main_post["slug"], "content": md2html(md) } # preprocessing content = markdown_to_text(main_post["content"]) text_corpus = make_texts_corpus([content]) bow = id2word.doc2bow(next(text_corpus)) # sử dụng dictionary và LDA model đã train và lưu lại để thu được vector document_dist, ứng với phân bố các topic của document đó doc_distribution = np.array( [doc_top[1] for doc_top in lda_model.get_document_topics(bow=bow)]) # recommender posts most_sim_ids = list( get_most_similar_documents(doc_distribution, doc_topic_dist))[1:] most_sim_ids = [int(id_) for id_ in most_sim_ids] posts = mongo_col.find({"idrs": {"$in": most_sim_ids}}) related_posts = [{ "url": post["url"], "title": post["title"], "slug": post["slug"] } for post in posts][1:] return render_template('index.html', main_post=main_post, posts=related_posts)
def update_post(id): title = request.form['title'] content = request.form['content'] is_active = 'is_active' in request.form pp_content = markdown_to_text(content) mongo_col.update_one({"_id": ObjectId(id)}, { "$set": { "title": title, "content": content, "pp_content": pp_content } }) main_post = mongo_col.find_one({"_id": ObjectId(id)}) return redirect("/posts/" + main_post["slug"])
def search(): if request.method == 'POST': search_text = request.form['search'] else: search_text = request.args.get('search') if request.args.get('is_content') is None: posts = mongo_col.find({ "title": { "$regex": search_text } }).sort("title").limit(30) random_posts = [{ "idrs": post["idrs"], "url": post["url"], "title": post["title"], "slug": post["slug"], "id": post["_id"], "is_active": post["is_active"], "views_count": post["views_count"] } for post in posts] else: content = markdown_to_text(search_text) text_corpus = make_texts_corpus([content]) bow = id2word.doc2bow(next(text_corpus)) doc_distribution = np.array( [doc_top[1] for doc_top in lda_model.get_document_topics(bow=bow)]) # recommender posts most_sim_ids = list( get_most_similar_documents(doc_distribution, doc_topic_dist))[1:] most_sim_ids = [int(id_) for id_ in most_sim_ids] posts = mongo_col.find({"idrs": {"$in": most_sim_ids}}) random_posts = [{ "idrs": post["idrs"], "url": post["url"], "title": post["title"], "slug": post["slug"], "id": post["_id"], "is_active": post["is_active"], "views_count": post["views_count"] } for post in posts][1:] return render_template('list-post.html', random_posts=random_posts)
def search(): search_text = request.form['search'] print('search text : ', search_text) content = markdown_to_text(search_text) text_corpus = make_texts_corpus([content]) bow = id2word.doc2bow(next(text_corpus)) doc_distribution = np.array( [doc_top[1] for doc_top in lda_model.get_document_topics(bow=bow)]) # recommender posts most_sim_ids = list( get_most_similar_documents(doc_distribution, doc_topic_dist))[1:] most_sim_ids = [int(id_) for id_ in most_sim_ids] posts = mongo_col.find({"idrs": {"$in": most_sim_ids}}) related_posts = [{ "url": post["url"], "title": post["title"], "slug": post["slug"] } for post in posts][1:] return render_template('search.html', search=related_posts)
def add_post(): try: title = request.form['title'] content = request.form['content'] is_active = 'is_active' in request.form pp_content = markdown_to_text(content) slug = get_random_string(11) idrs = mongo_col.count() main_post = mongo_col.insert_one({ 'id': '', 'title': title, 'slug': slug, 'url': '', 'content': markdownify(content), 'idrs': idrs, 'pp_content': pp_content, 'is_active': is_active, 'views_count': 0 }) return redirect("/posts/" + slug) except Exception as e: print(e) return render_template('not-found.html')
def show_post(slug): main_post = mongo_col.find_one({"slug": slug}) mongo_col.update_one( {"_id": ObjectId(main_post["_id"])}, {"$set": { "views_count": main_post["views_count"] + 1 }}) data = pd.read_csv("../src/preprocessing/major_word.csv") features = data.values tags = [] dictionary = [] for dic in features: if dic[1] >= 2000: dictionary.append([dic[0], dic[1]]) dictionary = np.array(dictionary) text = main_post["content"] content_list = text.split() for content in list(set(content_list)): if content in dictionary[:, 0]: text = [{ "text": word[0], "total": int(word[1]) } for word in dictionary if word[0] == content] tags.append(text[0]) print(tags) main_post = { "url": main_post["url"], "title": main_post["title"], "slug": main_post["slug"], # "content": md2html(text), "content": md2html(main_post["content"]) } # # md = get_content_of_post(slug) # main_post = { # "url": main_post["url"], # "title": main_post["title"], # "slug": main_post["slug"], # "content": md2html(main_post["content"]) # } # preprocessing content = markdown_to_text(main_post["content"]) text_corpus = make_texts_corpus([content]) bow = id2word.doc2bow(next(text_corpus)) # sử dụng dictionary và LDA model đã train và lưu lại để thu được vector document_dist, ứng với phân bố các topic của document đó doc_distribution = np.array( [doc_top[1] for doc_top in lda_model.get_document_topics(bow=bow)]) # recommender posts most_sim_ids = list( get_most_similar_documents(doc_distribution, doc_topic_dist))[1:] most_sim_ids = [int(id_) for id_ in most_sim_ids] posts = mongo_col.find({"idrs": {"$in": most_sim_ids}}) related_posts = [{ "url": post["url"], "title": post["title"], "slug": post["slug"], } for post in posts][1:] return render_template('index.html', main_post=main_post, posts=related_posts, tags=tags)
#ls #%% from src.utils import markdown_to_text posts = col.find() type(posts) for i, post in enumerate(posts): print(post['url']) if i == 10: break posts = col.find() test_post = next(posts) raaw_content = test_post['content'] print(raaw_content) content = markdown_to_text(raaw_content) print(content) test_post['_id'] for i, post in tqdm(enumerate(col.find()), total=col.count()): try: col.update_one({"_id": post["_id"]}, {"$set": {"idrs": i}}) pp_content = markdown_to_text(post['content']) col.update_one({"_id": post["_id"]}, {"$set": { "pp_content": pp_content }}) except Exception as e: print(e) continue client = MongoClient('localhost', 27017) db = client['rsframgia']