def main(): now = datetime.now() # Setting timegap days = 1~14 timegap = timedelta(days=10) now = now.date() - timegap MyDB.execute('select video_num from HISTORY where time >= \'%s\'' % now) recent_video = list(MyDB.fetchall()) recent_video_list = {i[0] for i in recent_video} recent_video_list = list(recent_video_list) MyDB.execute( 'select video_num from ROUTINE UNION select video_num from EXERCISE') all_video = list(MyDB.fetchall()) all_video_list = {i[0] for i in all_video} all_video_list = list(all_video_list) # Empty set case if not recent_video_list: MyDB.execute('select url from ROUTINE UNION select url from EXERCISE') all_video_url = list(MyDB.fetchall()) all_video_url_list = {i[0] for i in all_video_url} all_video_url_list = list(all_video_url_list) selected_video_url = sorted(all_video_url_list, key=lambda k: random.random())[:15] recommend_video_list = YT.youtube_rating_sort(selected_video_url) else: # Warning !! # if recent_video_list == all_video_list, all data will discared in v2v_top_k_video function video_similarity = dataProcessor.jaccard_similarity( recent_video_list, all_video_list) top_video_list = dataProcessor.v2v_top_k_video(video_similarity, 15) MyDB.execute( 'select url from ROUTINE where video_num in %s UNION select url from EXERCISE where video_num in %s' % (str(tuple(top_video_list)), str(tuple(top_video_list)))) video_url = list(MyDB.fetchall()) video_url_list = {i[0] for i in video_url} video_url_list = list(video_url_list) recommend_video_list = YT.youtube_rating_sort(video_url_list) for i in recommend_video_list: print(i)
def main(length, bodypart, excer_type, trainer, equipment, timing, level): # convert string to list type # 5, 10, 15, 20, 40, 60 length = int(length) if length == 5: front_length = 0 end_length = 6 elif length == 10: front_length = 7 end_length = 12 elif length == 15: front_length = 13 end_length = 17 elif length == 20: front_length = 18 end_length = 29 elif length == 40: front_length = 30 end_length = 49 elif length == 60: front_length = 50 end_length = 240 else: # user don't select length front_length = 0 end_length = 240 sql = ('select video_num from EXERCISE where length between %s and %s' % (str(front_length), str(end_length))) + ' and ' if bodypart != 'dc': sql += ('bodypart in %s' % (str(tuple([bodypart] + ['dc'])))) + ' and ' if excer_type != 'dc': sql += ('excer_type in %s' % (str(tuple([excer_type] + ['dc'])))) + ' and ' if trainer != 'dc': sql += ('trainer in %s' % (str(tuple([trainer] + ['dc'])))) + ' and ' if equipment != 'dc': sql += ('equipment in %s' % (str(tuple([equipment] + ['dc'])))) + ' and ' if timing != 'dc': sql += ('timing in %s' % (str(tuple([timing] + ['dc'])))) + ' and ' if level != 'dc': sql += ('level = \'' + level + '\'') + ' and ' sql = sql[:-5] # Assume you have enough data (at least one...) MyDB.execute(sql) exercise_vid_num = list(MyDB.fetchall()) exercise_vid_num_list = {i[0] for i in exercise_vid_num} exercise_vid_num_list = list(exercise_vid_num_list) RECOMMEND_LIST = [] if len(exercise_vid_num_list) < 15: MyDB.execute('select url from EXERCISE where video_num in ' + str(tuple(exercise_vid_num_list))) exercise_url = MyDB.fetchall() exercise_url_list = {i[0] for i in exercise_url} exercise_url_list = list(exercise_url_list) RECOMMEND_LIST += YT.youtube_rating_sort(exercise_url_list) # number of video list >= 15 else: MyDB.dic_execute('select * from HISTORY where video_num in ' + str(tuple(exercise_vid_num_list))) history_db = MyDB.dic_fetchall() if not history_db: # if history_db query return empty set, you can't make dataframe MyDB.execute('select url from EXERCISE where video_num in ' + str(tuple(exercise_vid_num_list))) exercise_url = MyDB.fetchall() exercise_url_list = {i[0] for i in exercise_url} exercise_url_list = list(exercise_url_list) selected_video_url = sorted(exercise_url_list, key=lambda k: random.random())[:15] RECOMMEND_LIST += YT.youtube_rating_sort(selected_video_url) else: # have no errors history_db_df = pd.DataFrame(history_db) default_crosstab = pd.crosstab(history_db_df.user_num, history_db_df.video_num) # convert nonzero value to 1 for i in default_crosstab.index: default_crosstab.loc[ default_crosstab.index == i] = default_crosstab.where( default_crosstab.loc[default_crosstab.index == i] == 0, 1) semi_recommend = pd.DataFrame(0, index=['total'], columns=default_crosstab.columns) for i in default_crosstab.columns: semi_recommend.loc['total'][i] = default_crosstab[i].sum() semi_recommend = semi_recommend.sort_values(by='total', ascending=False, axis=1) semi_recommend_list = [] for i in semi_recommend.columns: semi_recommend_list.append(i) for i in semi_recommend_list: MyDB.execute('select url from EXERCISE where video_num = ' + str(i)) RECOMMEND_LIST += MyDB.fetchone() if len(RECOMMEND_LIST) < 15: MyDB.execute('select url from EXERCISE where video_num in ' + str(tuple(exercise_vid_num_list))) exercise_url = MyDB.fetchall() exercise_url_list = {i[0] for i in exercise_url} exercise_url_list = list(exercise_url_list) selected_video_url = sorted(exercise_url_list, key=lambda k: random.random())[:15] temp_yt_url = YT.youtube_rating_sort(selected_video_url) temp_yt_url = [ x for x in temp_yt_url if x not in RECOMMEND_LIST ] RECOMMEND_LIST += temp_yt_url RECOMMEND_LIST = RECOMMEND_LIST[:15] for i in RECOMMEND_LIST: print(i)
def main(user_num, length, bodypart, timing, level): # convert string to list type length = int(length) if length == 5: front_length = 0 end_length = 6 elif length == 10: front_length = 7 end_length = 12 elif length == 15: front_length = 13 end_length = 17 elif length == 20: front_length = 18 end_length = 29 elif length == 40: front_length = 30 end_length = 49 elif length == 60: front_length = 50 end_length = 240 else: front_length = 0 end_length = 240 sql = ('select video_num from ROUTINE where length between %s and %s' % (str(front_length), str(end_length))) + ' and ' if bodypart != 'dc': sql += ('bodypart in %s' % (str(tuple([bodypart] + ['dc'])))) + ' and ' if timing != 'dc': sql += ('timing in %s' % (str(tuple([timing] + ['dc'])))) + ' and ' if level != 'dc': sql += ('level = \'' + level + '\'') + ' and ' sql = sql[:-5] active_user_data = dataProcessor.get_active_user_data(user_num) MyDB.execute(sql) routine_vid_num = list(MyDB.fetchall()) routine_vid_num_list = {i[0] for i in routine_vid_num} routine_vid_num_list = list(routine_vid_num_list) RECOMMEND_LIST = [] if len(routine_vid_num_list) < 15: ac_user_num = active_user_data.iloc[0]['user_num'] MyDB.execute('select video_num from HISTORY where user_num = ' + str(ac_user_num)) ac_users_history = MyDB.fetchall() ac_users_vid_list = {i[0] for i in ac_users_history} ac_users_vid_list = list(ac_users_vid_list) if not ac_users_vid_list: # if ac_users doesn't see anything MyDB.execute('select url from ROUTINE where video_num in ' + str(tuple(routine_vid_num_list))) routine_url = MyDB.fetchall() routine_url_list = {i[0] for i in routine_url} routine_url_list = list(routine_url_list) RECOMMEND_LIST += YT.youtube_rating_sort(routine_url_list) else: # if ac_users have history data video_similarity = dataProcessor.jaccard_similarity( ac_users_vid_list, routine_vid_num_list) top_video_list = dataProcessor.v2v_top_k_video( video_similarity, 15) MyDB.execute('select url from ROUTINE where video_num in ' + str(tuple(top_video_list))) video_url = list(MyDB.fetchall()) video_url_list = {i[0] for i in video_url} video_url_list = list(video_url_list) RECOMMEND_LIST += YT.youtube_rating_sort(video_url_list) else: MyDB.dic_execute('select * from HISTORY where video_num in ' + str(tuple(routine_vid_num_list))) history_db = MyDB.dic_fetchall() if not history_db: # if history_db_query return empty set, you can't make dataframe MyDB.execute('select url from ROUTINE where video_num in ' + str(tuple(routine_vid_num_list))) routine_url = MyDB.fetchall() routine_url_list = {i[0] for i in routine_url} routine_url_list = list(routine_url_list) selected_video_url = sorted(routine_url_list, key=lambda k: random.random())[:15] RECOMMEND_LIST += YT.youtube_rating_sort(selected_video_url) else: # have no errors history_db_df = pd.DataFrame(history_db) default_crosstab = pd.crosstab(history_db_df.user_num, history_db_df.video_num) # selected attribute video not played yet. # best case -> same len if len(routine_vid_num_list) != len( default_crosstab.columns.values): # add not played video column to default_df, all value is zero. for i in routine_vid_num_list: if i not in default_crosstab.columns.values: temp_df = pd.DataFrame( 0, index=default_crosstab.index.values, columns=[i]) default_crosstab = default_crosstab.join(temp_df) default_crosstab = default_crosstab.sort_index(axis=1) if user_num in default_crosstab.index.values: default_crosstab = default_crosstab.append( pd.DataFrame(0, index=[user_num], columns=default_crosstab.columns)) default_crosstab = default_crosstab.sort_index(axis=0) row_size = default_crosstab.shape[0] k = int((row_size / 3) * 2) weighted_crosstab = dataProcessor.add_weight( default_crosstab, active_user_data) RECOMMEND_LIST += dataProcessor.SVD_recommend( default_crosstab, weighted_crosstab, user_num, K=k, N=10) else: row_size = default_crosstab.shape[0] k = int((row_size / 3) * 2) weighted_crosstab = dataProcessor.add_weight( default_crosstab, active_user_data) RECOMMEND_LIST += dataProcessor.SVD_recommend( default_crosstab, weighted_crosstab, user_num, K=k, N=10) for i in RECOMMEND_LIST: print(i)
def main(user_num): RECOMMEND_VIDEO_LIST = [] active_user_data = dataProcessor.get_active_user_data(user_num) ac_user_sex = active_user_data.iloc[0]['sex'] ac_user_bodypart = active_user_data.iloc[0]['bodypart'] ac_user_hp = active_user_data.iloc[0]['health_point'] ac_user_label = active_user_data.iloc[0]['label'] ac_user_level = '' if ac_user_sex == 'f': if ac_user_hp >= 40: ac_user_level = 'h' elif ac_user_hp < 40 and ac_user_hp >= 33: ac_user_level = 'm' else: ac_user_level = 'l' else: if ac_user_hp >= 50: ac_user_level = 'h' elif ac_user_hp < 50 and ac_user_hp >= 44: ac_user_level = 'm' else: ac_user_level = 'l' # user_data don't have 'dc' value. ac_user_bodypart = [ac_user_bodypart] + ['dc'] ac_user_sex = [ac_user_sex] + ['dc'] ac_user_level = [ac_user_level] + ['dc'] MyDB.dic_execute('select * from HISTORY') history_db = MyDB.dic_fetchall() # if history_db return empty set if not history_db: MyDB.execute( 'select url from ROUTINE where bodypart in %s and sex in %s and level in %s' % (str(tuple(ac_user_bodypart)), str( tuple(ac_user_sex)), str(tuple(ac_user_level)))) same_cate_video = MyDB.fetchall() MyDB.execute( 'select url from EXERCISE where bodypart in %s and sex in %s and level in %s' % (str(tuple(ac_user_bodypart)), str( tuple(ac_user_sex)), str(tuple(ac_user_level)))) same_cate_video += MyDB.fetchall() same_cate_video_list = {i[0] for i in same_cate_video} same_cate_video_list = list(same_cate_video_list) selected_video_url = sorted(same_cate_video_list, key=lambda k: random.random())[:15] RECOMMEND_VIDEO_LIST += YT.youtube_rating_sort(selected_video_url) else: history_db_df = pd.DataFrame(history_db) # have no errors default_crosstab = pd.crosstab(history_db_df.user_num, history_db_df.video_num) # convert nonzero value to 1 for i in default_crosstab.index: default_crosstab.loc[ default_crosstab.index == i] = default_crosstab.where( default_crosstab.loc[default_crosstab.index == i] == 0, 1) # case 1 : default_crosstab don't have active_user's history if default_crosstab.loc[default_crosstab.index == int(user_num)].empty: # if you want to recommend less 15, change value 15 -> v if len(default_crosstab.columns) < 15: semi_recommend = pd.DataFrame(0, index=['total'], columns=default_crosstab.columns) for i in default_crosstab.columns: semi_recommend.loc['total'][i] = default_crosstab[i].sum() semi_recommend = semi_recommend.sort_values(by='total', ascending=False, axis=1) semi_recommend_list = [] for i in semi_recommend.columns: semi_recommend_list.append(i) for i in semi_recommend_list: MyDB.execute( 'select url from ROUTINE where video_num = %s UNION select url from EXERCISE where video_num = %s' % (str(i), str(i))) temp_url = MyDB.fetchone()[0] RECOMMEND_VIDEO_LIST.append(temp_url) # ============= append more random url list -> change list size if you want============ MyDB.execute( 'select url from ROUTINE where bodypart in %s and sex in %s and level in %s' % (str(tuple(ac_user_bodypart)), str( tuple(ac_user_sex)), str(tuple(ac_user_level)))) same_cate_video = MyDB.fetchall() MyDB.execute( 'select url from EXERCISE where bodypart in %s and sex in %s and level in %s' % (str(tuple(ac_user_bodypart)), str( tuple(ac_user_sex)), str(tuple(ac_user_level)))) same_cate_video += MyDB.fetchall() same_cate_video_list = {i[0] for i in same_cate_video} same_cate_video_list = list(same_cate_video_list) selected_video_url = sorted(same_cate_video_list, key=lambda k: random.random())[:15] RECOMMEND_VIDEO_LIST += YT.youtube_rating_sort( selected_video_url) # ===================================================================================== else: # video_list's len > 15 # add active_user's watching rate(all zero) into the crosstab to add weight default_crosstab = default_crosstab.append( pd.DataFrame(0, index=[user_num], columns=default_crosstab.columns)) # add_weight : input=crosstab, output = weighted crosstab # add weight to video by active user's attribute and same label user's history weighted_crosstab = dataProcessor.add_weight( default_crosstab, active_user_data) # N : return recommend video, K = decomposition matrix size # Adjust K value to find the optimum matrix size row_size = default_crosstab.shape[0] k = int((row_size / 3) * 2) RECOMMEND_VIDEO_LIST += dataProcessor.SVD_recommend( default_crosstab, weighted_crosstab, user_num, K=k, N=10) else: # case 2 : default_crosstab have active user data == active user has seen some video. MyDB.execute('select user_num from USER where label = ' + str(ac_user_label)) same_user_group = MyDB.fetchall() same_user_group = [i[0] for i in same_user_group] other_user_group = [ x for x in default_crosstab.index if x not in same_user_group ] reduced_size_crosstab = default_crosstab.drop( index=other_user_group) if len(reduced_size_crosstab.columns) < 15: reduced_semi_recommend = pd.DataFrame( 0, index=['total'], columns=reduced_size_crosstab.columns) for i in reduced_size_crosstab.columns: reduced_semi_recommend.loc[ 'total', i] = reduced_size_crosstab[i].sum() reduced_semi_recommend = reduced_semi_recommend.sort_values( by='total', ascending=False, axis=1) reduced_semi_recommend_list = [] for i in reduced_semi_recommend.columns: reduced_semi_recommend_list.append(i) for i in reduced_semi_recommend_list: MyDB.execute( 'select url from ROUTINE where video_num = %s UNION select url from EXERCISE where video_num = %s' % (str(i), str(i))) RECOMMEND_VIDEO_LIST += MyDB.fetchone() # ============= append more random url list -> change list size if you want============ MyDB.execute( 'select url from ROUTINE where bodypart in %s and sex in %s and level in %s' % (str(tuple(ac_user_bodypart)), str( tuple(ac_user_sex)), str(tuple(ac_user_level)))) same_cate_video = MyDB.fetchall() MyDB.execute( 'select url from EXERCISE where bodypart in %s and sex in %s and level in %s' % (str(tuple(ac_user_bodypart)), str( tuple(ac_user_sex)), str(tuple(ac_user_level)))) same_cate_video += MyDB.fetchall() same_cate_video_list = {i[0] for i in same_cate_video} same_cate_video_list = list(same_cate_video_list) selected_video_url = sorted(same_cate_video_list, key=lambda k: random.random())[:15] RECOMMEND_VIDEO_LIST += YT.youtube_rating_sort( selected_video_url) # ===================================================================================== else: row_size = default_crosstab.shape[0] k = int((row_size / 3) * 2) weighted_crosstab = dataProcessor.add_weight( default_crosstab, active_user_data) RECOMMEND_VIDEO_LIST += dataProcessor.SVD_recommend( default_crosstab, weighted_crosstab, user_num, K=k, N=10) RECOMMEND_VIDEO_LIST = list(OrderedDict.fromkeys(RECOMMEND_VIDEO_LIST)) for i in RECOMMEND_VIDEO_LIST[:10]: print(i)