u2i[u]=i print ('song to users on %s'%f_triplets_tr) s2u_tr=MSD_util.song_to_users(f_triplets_tr) #creates dict with (song, set of users who have listened to this song) print ("converting users to indexes") #converts the userIDs in s2u_tr to their index uu for s in s2u_tr: s_set = set() for u in s2u_tr[s]: s_set.add(u2i[u]) s2u_tr[s]=s_set del u2i print ('user to songs on %s'%f_triplets_tev) u2s_v=MSD_util.user_to_songs(f_triplets_tev) #creates dict (user,songs which he has listened to) based on the evaluation set print ('Creating predictor..') _A = 0.15 _Q = 3 ### calibrated ### pr=MSD_rec.PredSIc(s2u_tr, _A, _Q, "songs_scores.txt") ### uncalibrated song-based predictor pr=MSD_rec.PredSI(s2u_tr, _A, _Q) print ('Creating recommender..') cp = MSD_rec.SReco(songs_ordered) # the input songs to the recommender is from the train_triplets. cp.Add(pr) cp.Gamma=[1.0] # the prob. on how to choose different predictors, here we only have one predictor so it's just [1.0]
print ' song to users on %s ' % f_triplets_tr s2u_tr = MSD_util.song_to_users(f_triplets_tr) print ' converting users to indexes' for s in s2u_tr: s_set = set() for u in s2u_tr[s]: s_set.add(u2i[u]) s2u_tr[s]=s_set del u2i print 'user to songs on %s ' % f_triplets_tev u2s_v = MSD_util.user_to_songs(f_triplets_tev) print 'creating predictor...' _A = 0.15 _Q = 3 pr = MSD_rec.PredSI(s2u_tr,_A,_Q) print 'creating recommender..' cp = MSD_rec.SReco(songs_ordered) cp.Add(pr) cp.Gamma = [1.0]
u2i[u] = i print 'song to users on %s' % f_triplets_tr s2u_tr = MSD_util.song_to_users(f_triplets_tr) print "converting users to indexes" for s in s2u_tr: s_set = set() for u in s2u_tr[s]: s_set.add(u2i[u]) s2u_tr[s] = s_set del u2i print 'user to songs on %s' % f_triplets_tev u2s_v = MSD_util.user_to_songs(f_triplets_tev) u2s_h = MSD_util.user_to_songs(f_triplets_teh) print 'Creating predictor..' _A = 0.15 _Q = 3 ### calibrated ### pr=MSD_rec.PredSIc(s2u_tr, _A, _Q, "songs_scores.txt") ### uncalibrated pr = MSD_rec_shefali.PredSI(s2u_tr, _A, _Q) pr2 = MSD_rec_shefali.PredSU( s2u_tr, 0.3, 5) #with only this, 0.0 for 10 users in contrast to 0.33 in SI
# paths to data f_triplets_tr = "../data/train_data.txt" f_triplets_vv = "../data/valid_visible.txt" f_triplets_vp = "../data/valid_predict.txt" # parameters _tau = 500 print 'default ordering by popularity' sys.stdout.flush() songs_ordered = MSD_util.sort_dict_dec( MSD_util.song_to_count(f_triplets_tr, binary=False)) print 'user to songs on %s' % f_triplets_vv u2s_vv = MSD_util.user_to_songs(f_triplets_vv) print 'user to songs on %s' % f_triplets_vp u2s_vp = MSD_util.user_to_songs(f_triplets_vp) # recommend top N most popular songs (extremely unpersonalized :|) all_recs = [] for u in u2s_vv: recs_500 = set(songs_ordered[:500]) - u2s_vv[u] recs4u = list(recs_500) if len(recs4u) < 500: n_more = 500 - len(recs4u) recs4u += songs_ordered[500:500 + n_more] all_recs.append(recs4u) map_all = MSD_rec.mAP(u2s_vv.keys(), all_recs, u2s_vp, _tau) print
def main(argv): if len(argv) < 3: print( "Nee more arguments, Example:MSD_subm_rec.py user_min user_max resultFile.txt" ) user_min = 1 user_max = 110000 osfile = "resultfull.txt" #exit() else: user_min = argv[0] user_max = argv[1] osfile = argv[2] user_min = int(user_min) user_max = int(user_max) print("user_min: %d , user_max: %d" % (user_min, user_max)) sys.stdout.flush() # TRIPLETS f_triplets_tr = "train_triplets.txt" f_triplets_tev = "kaggle_visible_evaluation_triplets.txt" print('loading users in %s' % "kaggle_users.txt") sys.stdout.flush() users_v = list(MSD_util.load_users("kaggle_users.txt")) print('default ordering by popularity') sys.stdout.flush() songs_ordered = MSD_util.sort_dict_dec( MSD_util.song_to_count(f_triplets_tr)) print("loading unique users indexes") uu = MSD_util.unique_users(f_triplets_tr) u2i = {} for i, u in enumerate(uu): u2i[u] = i print('song to users on %s' % f_triplets_tr) s2u_tr = MSD_util.song_to_users(f_triplets_tr) print("converting users to indexes") for s in s2u_tr: s_set = set() for u in s2u_tr[s]: s_set.add(u2i[u]) s2u_tr[s] = s_set del u2i print('user to songs on %s' % f_triplets_tev) u2s_v = MSD_util.user_to_songs(f_triplets_tev) print('Creating predictor..') _A = 0.15 _Q = 3 ### calibrated ### pr=MSD_rec.PredSIc(s2u_tr, _A, _Q, "songs_scores.txt") ### uncalibrated pr = MSD_rec.PredSI(s2u_tr, _A, _Q) print('Creating recommender..') cp = MSD_rec.SReco(songs_ordered) cp.Add(pr) cp.Gamma = [1.0] r = cp.RecommendToUsers(users_v[user_min:user_max], u2s_v) MSD_util.save_recommendations(r, "kaggle_songs.txt", osfile)