예제 #1
0
    u2i[u]=i

print ('song to users on %s'%f_triplets_tr)
s2u_tr=MSD_util.song_to_users(f_triplets_tr) #creates dict with (song, set of users who have listened to this song)

print ("converting users to indexes") #converts the userIDs in s2u_tr to their index uu
for s in s2u_tr:
    s_set = set()
    for u in s2u_tr[s]:
        s_set.add(u2i[u])
    s2u_tr[s]=s_set

del u2i

print ('user to songs on %s'%f_triplets_tev)
u2s_v=MSD_util.user_to_songs(f_triplets_tev) #creates dict (user,songs which he has listened to) based on the evaluation set

print ('Creating predictor..')

_A = 0.15
_Q = 3
### calibrated
### pr=MSD_rec.PredSIc(s2u_tr, _A, _Q, "songs_scores.txt")

### uncalibrated song-based predictor
pr=MSD_rec.PredSI(s2u_tr, _A, _Q)

print ('Creating recommender..')
cp = MSD_rec.SReco(songs_ordered) # the input songs to the recommender is from the train_triplets.
cp.Add(pr)
cp.Gamma=[1.0] # the prob. on how to choose different predictors, here we only have one predictor so it's just [1.0]
예제 #2
0
print ' song to users on %s ' % f_triplets_tr
s2u_tr = MSD_util.song_to_users(f_triplets_tr)

print ' converting users to indexes'
for s in s2u_tr:
    s_set = set()
    for u in s2u_tr[s]:
        s_set.add(u2i[u])
    s2u_tr[s]=s_set

del u2i


print 'user to songs on %s ' % f_triplets_tev
u2s_v = MSD_util.user_to_songs(f_triplets_tev)


print 'creating predictor...'

_A = 0.15
_Q = 3


pr = MSD_rec.PredSI(s2u_tr,_A,_Q)

print 'creating recommender..'
cp = MSD_rec.SReco(songs_ordered)
cp.Add(pr)
cp.Gamma = [1.0]
    u2i[u] = i

print 'song to users on %s' % f_triplets_tr
s2u_tr = MSD_util.song_to_users(f_triplets_tr)

print "converting users to indexes"
for s in s2u_tr:
    s_set = set()
    for u in s2u_tr[s]:
        s_set.add(u2i[u])
    s2u_tr[s] = s_set

del u2i

print 'user to songs on %s' % f_triplets_tev
u2s_v = MSD_util.user_to_songs(f_triplets_tev)
u2s_h = MSD_util.user_to_songs(f_triplets_teh)

print 'Creating predictor..'

_A = 0.15
_Q = 3
### calibrated
### pr=MSD_rec.PredSIc(s2u_tr, _A, _Q, "songs_scores.txt")

### uncalibrated
pr = MSD_rec_shefali.PredSI(s2u_tr, _A, _Q)
pr2 = MSD_rec_shefali.PredSU(
    s2u_tr, 0.3,
    5)  #with only this, 0.0 for 10 users in contrast to 0.33 in SI
# paths to data
f_triplets_tr = "../data/train_data.txt"
f_triplets_vv = "../data/valid_visible.txt"
f_triplets_vp = "../data/valid_predict.txt"

# parameters
_tau = 500

print 'default ordering by popularity'
sys.stdout.flush()
songs_ordered = MSD_util.sort_dict_dec(
    MSD_util.song_to_count(f_triplets_tr, binary=False))

print 'user to songs on %s' % f_triplets_vv
u2s_vv = MSD_util.user_to_songs(f_triplets_vv)
print 'user to songs on %s' % f_triplets_vp
u2s_vp = MSD_util.user_to_songs(f_triplets_vp)

# recommend top N most popular songs (extremely unpersonalized :|)
all_recs = []
for u in u2s_vv:
    recs_500 = set(songs_ordered[:500]) - u2s_vv[u]
    recs4u = list(recs_500)
    if len(recs4u) < 500:
        n_more = 500 - len(recs4u)
        recs4u += songs_ordered[500:500 + n_more]
    all_recs.append(recs4u)

map_all = MSD_rec.mAP(u2s_vv.keys(), all_recs, u2s_vp, _tau)
print
예제 #5
0
def main(argv):
    if len(argv) < 3:
        print(
            "Nee more arguments, Example:MSD_subm_rec.py user_min user_max resultFile.txt"
        )
        user_min = 1
        user_max = 110000
        osfile = "resultfull.txt"
        #exit()
    else:
        user_min = argv[0]
        user_max = argv[1]
        osfile = argv[2]

    user_min = int(user_min)
    user_max = int(user_max)

    print("user_min: %d , user_max: %d" % (user_min, user_max))
    sys.stdout.flush()

    # TRIPLETS
    f_triplets_tr = "train_triplets.txt"
    f_triplets_tev = "kaggle_visible_evaluation_triplets.txt"

    print('loading users in %s' % "kaggle_users.txt")
    sys.stdout.flush()
    users_v = list(MSD_util.load_users("kaggle_users.txt"))

    print('default ordering by popularity')
    sys.stdout.flush()
    songs_ordered = MSD_util.sort_dict_dec(
        MSD_util.song_to_count(f_triplets_tr))

    print("loading unique users indexes")
    uu = MSD_util.unique_users(f_triplets_tr)
    u2i = {}
    for i, u in enumerate(uu):
        u2i[u] = i

    print('song to users on %s' % f_triplets_tr)
    s2u_tr = MSD_util.song_to_users(f_triplets_tr)

    print("converting users to indexes")
    for s in s2u_tr:
        s_set = set()
        for u in s2u_tr[s]:
            s_set.add(u2i[u])
        s2u_tr[s] = s_set

    del u2i

    print('user to songs on %s' % f_triplets_tev)
    u2s_v = MSD_util.user_to_songs(f_triplets_tev)

    print('Creating predictor..')

    _A = 0.15
    _Q = 3
    ### calibrated
    ### pr=MSD_rec.PredSIc(s2u_tr, _A, _Q, "songs_scores.txt")

    ### uncalibrated
    pr = MSD_rec.PredSI(s2u_tr, _A, _Q)

    print('Creating recommender..')
    cp = MSD_rec.SReco(songs_ordered)
    cp.Add(pr)
    cp.Gamma = [1.0]

    r = cp.RecommendToUsers(users_v[user_min:user_max], u2s_v)
    MSD_util.save_recommendations(r, "kaggle_songs.txt", osfile)