Beispiel #1
0
    def __init__(self, root, split, transform_name):
        super().__init__(root, split, transform_name)

        self.pointsJSON = ut.jload(os.path.join( 
                                    '/mnt/datasets/public/issam/VOCdevkit/VOC2012',
                                    'whats_the_point/data', 
                                    "pascal2012_trainval_main.json"))
def train_mod_comb(test_set, bucket, lp, s_id2spt, selection, K, q):
    modello = Models.CF_KOMD_selected(q, lp=lp, K=K, items_list=s_id2spt)
    modello.train(test_set, bucket, selection)
    recommendation = {}
    for i in test_set:
        recommendation[i] = modello.get_recommendation(i, bucket)

    spt2id = utils.jload("../data/s_spt2id.json")
    recommendation = {
        i: [spt2id[s] for s in recommendation[i]]
        for i in recommendation
    }
    return recommendation
Beispiel #3
0
def load_test_set(dir_name, item_class="track"):
    if (item_class == "track"):
        file_name = "s_id2spt.json"
    elif (item_class == "artist"):
        file_name = "/artists/index_of_artist.txt"
    with open(dir_name + file_name, "r") as F:
        it_idx = utils.jload(file_name)
        item_indexes = {t: i for i, t in enumerate(it_idx)}
    test_set = {'0': {}, '5': {}, '10': {}, '25': {}, '100': {}, '1': {}}
    with open(dir_name + "challenge_set.json") as F:
        challenge_struc = json.load(F)
        for p in challenge_struc['playlists']:
            test_set[str(p['num_samples'])][p['pid']] = []
            for t in p['tracks']:
                test_set[str(p['num_samples'])][int(p['pid'])].append(
                    item_indexes[t[item_class + '_uri']])

    return (test_set)
Beispiel #4
0
import sys
import numpy as np
import os
import math
import scipy.sparse as sps
sys.path.append(
    os.path.dirname(os.path.dirname(os.path.dirname(
        os.path.abspath(__file__)))))
import utils
'''
File used to build the Kernel between songs: songs are represented with playlists they belongs to. In each
element of the kernel matrix we will have the cosine similarity between representation of two songs
'''

#s2p = utils.jload("../data/validation/s2p.json")
s2p = utils.jload("./s2p.json")
for s in s2p:
    s2p[s] = set(s2p[s])

print "Creating s2d...",
utils.fl()
s2d = {}
for ss in s2p:
    s = int(ss)
    if s not in s2d:
        s2d[s] = {}
    for p in s2p[ss]:
        if p not in s2d[s]:
            s2d[s][p] = 1.
        else:
            s2d[s][p] += 1.
import numpy as np
import utils
import utils_matrix
import time
import scipy.sparse as sps
import math

#the script implements a user based recommendation technique based on the output of build_Ku.py.
#it loads some needed files, calculates the global popularity to fill recommendations that are not
#long enought, calculates the rating matrix for the training set and calculates Ku*R. by using
#it takes then the first 500 higher elements for each row and uses them as recommendation

test_seed = '1'
q = 1

s_id2spt = utils.jload("./s_id2spt.json")
####--------------- TEST ---------------####
s2p = utils.jload("./s2p.json")
p2s = utils.jload("./p2s.json")
p2s_test = utils_matrix.load_test_set("./", "track")
Ku = sps.load_npz("./1_0.5.npz")
####--------------- TEST ---------------####

global_popularity = np.zeros(len(s2p))
for s in s2p:
    global_popularity[int(s)] = len(s2p[s])
global_popularity = np.argsort(-global_popularity)

utr_str = "noutr"
utr_id2utr = None
utr2utr_id = {p: i for i, p in enumerate(sorted(list(map(int, p2s.keys()))))}
    row = []

    for title in t2t_id:
        for rep in t2rep[title]:
            data.append(1.0)
            row.append(t2t_id[title])
            col.append(rep)

    A = sps.csr_matrix((data, (row, col)))
    AT = sps.csr_matrix((data, (col, row)))
    K = (A.dot(AT)).todense()
    for i in range(K.shape[0]):
        if K[i, i] == 0:
            K[i, i] = 1.0
    n = K.shape[0]
    d = np.array([[K[i, i] for i in range(n)]])
    Kn = K / np.sqrt(np.dot(d.T, d))

    return Kn


t_id2t = utils.jload("./t_id2t.json")
t2t_id = {t: i for i, t in enumerate(t_id2t)}

t2rep = utils.jload("./t2s.json")
t2rep = {t: set(rep) for t, rep in t2rep.items()}
rep_size = len(utils.jload("./s2p.json"))

sim = titles_similarity_sparse(t2rep, t2t_id, rep_size)
np.save("./S_titles.npy", sim)
Beispiel #7
0
import os
import numpy as np
import utils
import utils_matrix
import time
import scipy.sparse as sps
import time
import math
t = time.time()

#Script used to build the similarity matrix between users; it needs in input the number of the seed i want to use to
# build the matrix (0, 1, 5, 10, 25, 100) and alpha, used to combine two matrices: U_tr to Songs and (U_test to Song) transposed.
# our submission's results are obtained with alpha = 0.5

####--------------- TEST ---------------####
s2p = utils.jload("./s2p.json")
p2s = utils.jload("./p2s.json")
p2s_test = utils_matrix.load_test_set("./", "track")
####--------------- TEST ---------------####

# ####------------ VALIDATION ------------####
# s2p = utils.jload("../data/validation/s2p.json")
# p2s = utils.jload("../data/validation/p2s.json")
# p2s_test = utils.jload("../data/validation/test/p2s.json")
# ####------------ VALIDATION ------------####

test_seed = sys.argv[1]
alpha = float(sys.argv[2])

if (len(sys.argv) > 3):
    for i in range(3, len(sys.argv), 2):
    recommendation = {
        i: [spt2id[s] for s in recommendation[i]]
        for i in recommendation
    }
    return recommendation


val_set_keys = set()

##------------------TEST------------------###
K = sps.load_npz("K.npz")
print("K loaded")
utils.fl()
q = np.load("q.npy")
p2s_test = utils_matrix.load_test_set("./", "track")
p2s_train = utils.jload("./p2s.json")
p2t = utils.jload("./p2t_c.json")
s2p = utils.jload("./s2p.json")
##----------------END TEST----------------###

s_id2spt = utils.jload("./s_id2spt.json")

for s in s2p:
    s2p[s] = set(s2p[s])

###---------------BUILD SELECTION---------------###
t2p = utils.jload("./t2p_filt.json")

t_id2t = utils.jload("./t_id2t.json")
t2t_id = {}
for i, t in enumerate(t_id2t):
p2t_c = {}

for i, pl in enumerate(chplaylists):
    if "name" in pl:
        #t = pl["name"].lower()
        t = pl["name"].lower()
        t = ''.join(c if c not in punctuation else " " for c in t)
        t = re.sub(' +', ' ', t)
        t = t.strip()
        t = parsing.stem_text(t)
        ch_t.append(t)
        count += t not in t2pl
        p2t_c[pl['pid']] = t
    else:
        skipped += 1
        p2t_c[pl['pid']] = ''

json.dump(p2t_c, open("./p2t_c.json", "w"), indent=4)
titles_list = [t for t in t2pl if t != ""]
json.dump(titles_list, open("./t_id2t.json", "w"))

p2s = utils.jload("./p2s.json")
t2s = {}
for t in t2pl:
    if t != '':
        t2s[t] = []
        for p in t2pl[t]:
            t2s[t] += p2s[str(p)]
json.dump(t2s, open("./t2s.json", "w"))
import json
import utils
import utils_matrix
import random
import math

#this script is used to build the recommendation for seed 5, 10 and 25. it takes in input alpha (we used 0.7)
#q (we used 0.4) and the seed we want to build the recommendation for

alpha = float(sys.argv[1])
q = float(sys.argv[2])
test_seed = sys.argv[3]

#p2s_test = utils.jload("../data/validation/test/p2s.json")
p2s_test = utils_matrix.load_test_set("./")
s_id2spt = utils.jload("./s_id2spt.json")
print "loading P...",
utils.fl()
P = sps.load_npz("./P.npz")
print "done"
utils.fl()

print "loading P csc...",
utils.fl()
P_csc = sps.load_npz("./P_csc.npz")
print "done"
utils.fl()

print("prediction with alpha=%f, q=%f started" % (alpha, q))

recommendation = {}