class ArticleService: mysql = MySQL() mongodb = MongoDB() category_service = CategoryService() def get_articles_by_current_user_id(self, current_user_id: int): categories = self.category_service.get_category_by_current_user_id( user_id=current_user_id) category_names = [i.name for i in categories] valid_session_id = self.mysql.get_valid_session_id() number_of_articles_per_category = get_number_of_articles_per_category( Config.NUMBER_OF_ARTICLES, len(categories)) article_scores = [] for i in range(len(categories)): articles = self.mysql.fetch_articles_ranking( session_id=valid_session_id, category=category_names[i], limit=number_of_articles_per_category[i]) article_scores.extend(articles) result = [] for i in article_scores: article_mongo = self.mongodb.get_article_by_uuid(i.article_id) article_show = ArticleShow(id=article_mongo.id, url=article_mongo.url, domain=article_mongo.domain, title=article_mongo.title, category=article_mongo.category, time=article_mongo.time, content=article_mongo.content, audio_path=i.audio_path) result.append(article_show) return result def get_articles_no_login(self): category_names = Config.CATEGORIES valid_session_id = self.mysql.get_valid_session_id() number_of_articles_per_category = get_number_of_articles_per_category( Config.NUMBER_OF_ARTICLES, len(category_names)) article_scores = [] for i in range(len(category_names)): articles = self.mysql.fetch_articles_ranking( session_id=valid_session_id, category=category_names[i], limit=number_of_articles_per_category[i]) article_scores.extend(articles) result = [] for i in article_scores: article_mongo = self.mongodb.get_article_by_uuid(i.article_id) article_show = ArticleShow(id=article_mongo.id, url=article_mongo.url, domain=article_mongo.domain, title=article_mongo.title, category=article_mongo.category, time=article_mongo.time, content=article_mongo.content, audio_path=i.audio_path) result.append(article_show) return result
class ScoreService: mongo = MongoDB() mysql = MySQL() keyword = Config.SCORE_KEYWORD lsh = LSH() def check_contains_keyword(self, text: str, key_list: List[str]): for i in key_list: if i in text: return True return False def score_by_category(self, category: str): print( '\n------------------------------------------------------------------------' ) print('category: ', category) articles_by_category = self.mongo.get_articles_by_category( category=category) print('size: ', len(articles_by_category)) article_contents = [ article.content for article in articles_by_category ] matrix = self.lsh.init_matrix(list_docs=article_contents) min_hash = self.lsh.min_hashing(matrix=matrix, n_permutation=100) results = [] invalid_len_articles = [] for article in articles_by_category: print('article: ', article) score = 0 if len(article.content) < 500 or len(article.content) > 5000: score = -99999 invalid_len_articles.append(article.id) else: time_second = (datetime.datetime.now() - article.time).total_seconds() time_second_max = (datetime.datetime.now() - datetime.datetime.now().replace( day=datetime.datetime.now().day - 1, hour=17, minute=0, second=0)).total_seconds() time_score = (time_second_max - time_second) / time_second_max * 50 score = score + time_score if self.check_contains_keyword(article.title, self.keyword[category]): # print(article.title) score = score + 10 index = self.lsh.get_index_of_doc(article.content, article_contents) for i in range(len(articles_by_category)): if i != index: sim = self.lsh.jaccard_signature( min_hash[:, index], min_hash[:, i]) # sim = self.lsh.jaccard_signature(matrix[:, index], matrix[:, i]) if sim > Config.PROBABILITY_MIN_HASHING: if articles_by_category[ index].time < articles_by_category[i].time: score = score + 20 elif articles_by_category[index].time > articles_by_category[i].time and \ articles_by_category[i].id in invalid_len_articles: score = score + 20 else: score = score - 99999 print('------------------------') print('sim = ', sim) print(articles_by_category[index]) print(articles_by_category[i]) print('------------------------') score_insert = ScoreInsert(article_id=article.id, url=article.url, category=article.category, domain=article.domain, score=score) results.append(score_insert) return results def check_audio(self, audio: Audio): if audio.result == "Success": print("create gtts successfully with uuid: ", audio.uuid) path = Config.BASE_AUDIO_DIR + audio.uuid + ".mp3" self.mysql.add_audio_path(audio.uuid, path) else: print("create gtts failed with uuid: ", audio.uuid)