def main(): data = load_dataframes() term_w = shutil.get_terminal_size()[0] - 1 separater = "-" * term_w # 음식점 평점 순 출력, 최소리뷰 개수 필터링 stores_most_scored = sort_stores_by_score(data) print("[최고 평점 음식점]") print(f"{separater}\n") for i, store in stores_most_scored.iterrows(): print( "{rank}위: {store}({score}점)".format( rank=i + 1, store=store.store_name, score=store.score ) ) print(f"\n{separater}\n\n") # 가장 많은 리뷰를 받은 `n`개의 음식점을 정렬 stores_most_review = get_most_reviewed_stores(data) print("[가장 많은 리뷰를 받은 음식점]") print(f"{separater}\n") print(stores_most_review) # 가장 많은 리뷰를 작성한 `n`개의 유저를 정렬 users_most_review = get_most_active_users(data) print("[가장 많은 리뷰를 작성한 유저]") print(f"{separater}\n") print(users_most_review)
def main(): data = load_dataframes() term_w = shutil.get_terminal_size()[0] - 1 separater = "-" * term_w stores_most_scored = sort_stores_by_score(data) print("[최고 평점 음식점]") print(f"{separater}\n") for i, store in stores_most_scored.iterrows(): print("{rank}위: {store}({score}점)".format(rank=i + 1, store=store.store_name, score=store.score)) print(f"\n{separater}\n\n") stores_most_reviewed = get_most_reviewed_stores(data) print("[리뷰가 많은 음식점]") print(f"{separater}\n") for i, store in stores_most_reviewed.iterrows(): print("{rank}위: {store}({count}개)".format(rank=i + 1, store=store.store_name, count=store.cnt)) print(f"\n{separater}\n\n") users_most_reviewed = get_most_active_users(data) print("[리뷰 많이 작성한 유저]") print(f"{separater}\n") for i, user in users_most_reviewed.iterrows(): print("{rank}위: {user}({count}개)".format(rank=i + 1, user=user.user, count=user.cnt)) print(f"\n{separater}\n\n")
def main(): data = load_dataframes() term_w = shutil.get_terminal_size()[0] - 1 separater = "-" * term_w stores_most_scored = sort_stores_by_score(data) print("[가장 평점이 높은 음식점]") print(f"{separater}\n") for i, store in stores_most_scored.iterrows(): print("{rank}위: {store}({score}점)".format(rank=i + 1, store=store.store_name, score=store.score)) print(f"\n{separater}\n\n") most_reviewed_stores = get_most_reviewed_stores(data) print("[가장 많은 리뷰를 받은 음식점]") print(f"{separater}\n") for i, store in most_reviewed_stores.iterrows(): print("{rank}위: {store}({counts}개)".format(rank=i + 1, store=store.store_name, counts=store.counts)) print(f"\n{separater}\n\n") most_active_users = get_most_active_users(data) print("[가장 많은 리뷰를 작성한 유저]") print(f"{separater}\n") for i, user in most_active_users.iterrows(): print("{rank}위: id:{user}({score}개)".format(rank=i + 1, user=user.id, score=user.counts)) print(f"\n{separater}\n\n")
def main(tag_data, ingredi, user_name): data = load_dataframes() content_filtering_result = {} collaboratvie_filtering_result = [] # 변경되는 값 user_by_score = pd.merge(data["user"], data["score"], left_on="id", right_on="user_id") user_by_score_group = user_by_score.groupby( ['username'])['score'].agg(lambda g: g.values.tolist()).to_dict() result = {} result['collaborative_filtering_result'] = {} result['content_filtering_result'] = {} # result['collaborative'] # 사용자가 평점 데이터를 줬고 평점 데이터가 있다면 협업 필터링 알고리즘을 활용한다. # if len(user_by_score_group[user_name]) > 0 and len(user_by_score_group) > 10 : critics = algorithm.collaborative.recipe_score(data) collaboratvie_filtering_result = algorithm.collaborative.getRecommendation( critics, user_name) collaboratvie_filtering_result_temp = {} for idx, value in enumerate(collaboratvie_filtering_result): collaboratvie_filtering_result_temp[value[1]] = value[0] result['collaborative_filtering_result'].update( collaboratvie_filtering_result_temp) content_filtering_result = algorithm.tfidf.contents_based_filtering( item_id=999999, num=5, tag_data=tag_data, ingredi=ingredi) result['content_filtering_result'].update(content_filtering_result) print(result) return result
def main(): data = load_dataframes() term_w = shutil.get_terminal_size()[0] - 1 separater = "-" * term_w category_data = make_category_data(data) tag_data = make_tags_from_reviews(data)
def main(): set_config() data = load_dataframes() show_store_categories_graph(data) show_store_review_distribution_graph(data) show_store_average_ratings_graph(data) show_user_review_distribution_graph(data) show_user_age_gender_distribution_graph(data) show_stores_distribution_graph(data)
def main(): warnings.filterwarnings(action='ignore') set_config() data = load_dataframes() show_store_categories_graph(data) #0번째 show_store_review_distribution_graph(data) #1번째 - 음식점별 리뷰 개수 분포 show_store_average_ratings_graph(data) #2번째 - 음식점별 평균 평점 show_user_review_distribution_graph(data) #3번째 - 전체 유저별 리뷰 개수 분포 show_user_age_gender_distribution_graph(data) #4번째 - 연령, 성별 기준 리뷰 분포 show_stores_distribution_graph(data) #5번째 - 음식점 위치 분포
def main(): set_config() data = load_dataframes() # show_store_categories_graph(data) # 3-1. 음식점 리뷰 개수 분포 # show_store_review_distribution_graph(data) # 3-2. 음식점 평균 점수 분포 # show_store_average_ratings_graph(data) # 3-3. 유저 리뷰 개수 분포 # show_user_review_distribution_graph(data) # 3-4. 유저 성별/나이대별 분포 show_user_age_gender_distribution_graph(data)
def main(): data = load_dataframes() term_w = shutil.get_terminal_size()[0] - 1 separater = "-" * term_w print("[음식점을 추천해봅시다]") print("input user number") # 75794 for_user = int(input()) print(f"{separater}\n") UB_DF(data, for_user) print(f"{separater}\n") print('end')
def main(): data = load_dataframes() term_w = shutil.get_terminal_size()[0] - 1 separater = "-" * term_w stores_most_scored = sort_stores_by_score(data) print("[최고 평점 음식점]") print(f"{separater}\n") for i, store in stores_most_scored[0].iterrows(): print("{rank}위: {store}({score}점)".format(rank=i + 1, store=store.store_name, score=store.score)) print(f"\n{separater}\n\n")
def main(): data = load_dataframes() term_w = shutil.get_terminal_size()[0] - 1 separater = "-" * term_w # stores_most_scored = sort_stores_by_score(data) # print("[최고 평점 음식점]") # print(f"{separater}\n") # for i, store in stores_most_scored.iterrows(): # print( # "{rank}위: {store}({score}점)".format( # rank=i + 1, store=store.store_name, score=store.score # ) # ) # print(f"\n{separater}\n\n") already_rated, predictions = user_store_recommand(data, 7) print(already_rated.head(10)) print(predictions.head(10))
def main(): data = load_dataframes() term_w = shutil.get_terminal_size()[0] - 1 separater = "-" * term_w stores_most_scored = sort_stores_by_score(data) print(f"{separater}\n") print("[최고 평점 음식점]\n") for i, store in stores_most_scored.iterrows(): print("{rank}위: {store} ({score}점)".format(rank=i + 1, store=store.store_name, score=round(store.score, 2))) print(f"\n{separater}\n\n") most_reviewed_stores = get_most_reviewed_stores(data) print(f"{separater}\n") print("[많은 리뷰 음식점]\n") for i, store in most_reviewed_stores.iterrows(): print("{rank}위: {store} ({count} reviews)".format( rank=i + 1, store=store.store_name, count=store.counts)) print(f"\n{separater}\n\n") most_active_users = get_most_active_users(data) print(f"{separater}\n") print("[가장 많은 리뷰를 남겨주신 유저]\n") for i, user in most_active_users.iterrows(): print( "{rank}위: id: {user} gender: {gender} age: {age} ({reviews} reviews)" .format(rank=i + 1, user=user.user, gender=user.gender, age=user.age, reviews=user.reviews)) print(f"\n{separater}\n\n")
def main(): data = load_dataframes() start = time.time() preference_recommendation = {} # 생맥주 -> 음식점 메뉴 생맥주 음식점 가져왔어 -> 음식점 평점 -> Top5 # 음식점 - 리뷰, 음식점 - 메뉴 for i in data["menus"]["menu_name"]: if '초밥' in i: if i in preference_recommendation: preference_recommendation[i] += 1 else: preference_recommendation[i] = 1 sorted_preference_recommendation = sorted( preference_recommendation.items(), key=lambda preference_recommendation: preference_recommendation[1], reverse=True)[:3] # 선호가 가장 많이 포함된 메뉴를 선정 print("** 추천 레시피 **") print(sorted_preference_recommendation) restaurant = [] stores_menus = stores_by_menus(data) # 메뉴별 가게를 선정 for i, val in enumerate(stores_menus["menu_name"]): if sorted_preference_recommendation[0][0] in val: if stores_menus["store_name"][ i] not in restaurant and "서울특별시 용산구" in stores_menus[ "address"][i]: restaurant.append(stores_menus["store_name"][i]) # 가게 별 평점 stores_score = sort_stores_by_score(data) top_restaurant = {} for j, val_j in enumerate(stores_score["store_name"]): for k, val_k in enumerate(restaurant): if val_k == val_j: top_restaurant[val_k] = stores_score["score"][j] sorted_top_restaurant = sorted( top_restaurant.items(), key=lambda top_restaurant: top_restaurant[1], reverse=True)[:3] print("** 추천 음식점 **") print(sorted_top_restaurant) # print("time :", time.time() - start) # 현재시각 - 시작시간 = 실행 시간 print("-----------------------") loc_dict = {} # 전체 store_dict = {} for i, store in enumerate(sorted_top_restaurant): # print(store[0]) lat = {} lon = {} for j, target in enumerate(stores_menus["store_name"]): if store[0] == target: # print(stores_menus['latitude'][j]) store_dict[stores_menus['store_name'][j]] = {} lat['latitude'] = stores_menus['latitude'][j] lon['longitude'] = stores_menus['longitude'][j] store_dict[stores_menus['store_name'][j]].update(lat) store_dict[stores_menus['store_name'][j]].update(lon) loc_dict.update(store_dict) break # loc_dict = { store_name = { latidue = {}, logitude = {}}} print(loc_dict['엔스시'])
from sklearn.feature_extraction.text import TfidfVectorizer from konlpy.tag import Okt from konlpy.utils import pprint from parse import load_dataframes import pandas as pd from pandas import Series, DataFrame import scipy.sparse import sqlite3 mydoclist=[] data = load_dataframes() stores_reviews = data["stores"].head(100000) # indexD = stores_reviews[stores_reviews["menu"]==""].index # droped = stores_reviews.drop(indexD) for i, Each_row in stores_reviews.iterrows(): mydoclist.append(Each_row['menu']) okt = Okt() doc_nouns_list = [] count=0 for doc in mydoclist: count += 1 if count==50000: break nouns = okt.nouns(doc) doc_nouns = ''
def main(): set_config() data = load_dataframes()
def main(): set_config() data = load_dataframes() show_store_categories_graph(data)
def main(): # get_recommend_movies(df_svd_preds, 511, user_reviews,df_user_store_ratings) warnings.filterwarnings(action='ignore') data = load_dataframes() # print(data) term_w = shutil.get_terminal_size()[0] - 1 separater = "-" * term_w stores_most_scored = sort_stores_by_score(data) stores_mostCount_review = get_most_reviewed_stores(data) stores_mostCount_user = get_most_active_users(data) user_reviews = data["reviews"].head(10000) user_stores = data["stores"].head(10000) # ur = user_reviews[['user','store','score']] df_user_store_ratings = user_reviews.pivot(index='user', columns='store', values='score').fillna(0) # print(df_user_store_ratings.head()) # matrix는 pivot_table 값을 numpy matrix로 만든 것 matrix = df_user_store_ratings.as_matrix() # user_ratings_mean은 사용자의 평균 평점 user_ratings_mean = np.mean(matrix, axis=1) # R_user_mean : 사용자-영화에 대해 사용자 평균 평점을 뺀 것. matrix_user_mean = matrix - user_ratings_mean.reshape(-1, 1) # print(df_user_store_ratings) pd.DataFrame(matrix_user_mean, columns=df_user_store_ratings.columns).head() U, sigma, Vt = svds(matrix_user_mean, k=12) sigma = np.diag(sigma) svd_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1) df_svd_preds = pd.DataFrame(svd_user_predicted_ratings, columns=df_user_store_ratings.columns) # print(df_svd_preds) recommend_movies(df_svd_preds, 3019, user_reviews, user_reviews, 100) print("[최고 평점 음식점]") print(f"{separater}\n") for i, store in stores_most_scored.iterrows(): print("{rank}위: {store}({score}점)".format(rank=i + 1, store=store.store_name, score=store.score)) print(f"\n{separater}\n\n") print("[최다 리뷰 음식점]") print(f"{separater}\n") for i, store in stores_mostCount_review.iterrows(): print("{rank}위: {store}({counts}개)".format(rank=i + 1, store=store.store_name, counts=store.rcount)) print(f"\n{separater}\n\n") print("[최다 리뷰 작성자]") print(f"{separater}\n") for i, review in stores_mostCount_user.iterrows(): print("{rank}위: {review}({counts}개)".format(rank=i + 1, review=review.user, counts=review.rcount)) print(f"\n{separater}\n\n")
def main(): warnings.filterwarnings(action='ignore') data = load_dataframes() show_matrix(data) show_matrix2(data)
def main(): set_config() data = load_dataframes() # show_store_categories_graph(data) show_store_review_distribution_graph(data)
def main(): data = load_dataframes() show_stores_distribution_graph(data)