def __init__(self, root, split, transform_name): super().__init__(root, split, transform_name) self.pointsJSON = ut.jload(os.path.join( '/mnt/datasets/public/issam/VOCdevkit/VOC2012', 'whats_the_point/data', "pascal2012_trainval_main.json"))
def train_mod_comb(test_set, bucket, lp, s_id2spt, selection, K, q): modello = Models.CF_KOMD_selected(q, lp=lp, K=K, items_list=s_id2spt) modello.train(test_set, bucket, selection) recommendation = {} for i in test_set: recommendation[i] = modello.get_recommendation(i, bucket) spt2id = utils.jload("../data/s_spt2id.json") recommendation = { i: [spt2id[s] for s in recommendation[i]] for i in recommendation } return recommendation
def load_test_set(dir_name, item_class="track"): if (item_class == "track"): file_name = "s_id2spt.json" elif (item_class == "artist"): file_name = "/artists/index_of_artist.txt" with open(dir_name + file_name, "r") as F: it_idx = utils.jload(file_name) item_indexes = {t: i for i, t in enumerate(it_idx)} test_set = {'0': {}, '5': {}, '10': {}, '25': {}, '100': {}, '1': {}} with open(dir_name + "challenge_set.json") as F: challenge_struc = json.load(F) for p in challenge_struc['playlists']: test_set[str(p['num_samples'])][p['pid']] = [] for t in p['tracks']: test_set[str(p['num_samples'])][int(p['pid'])].append( item_indexes[t[item_class + '_uri']]) return (test_set)
import sys import numpy as np import os import math import scipy.sparse as sps sys.path.append( os.path.dirname(os.path.dirname(os.path.dirname( os.path.abspath(__file__))))) import utils ''' File used to build the Kernel between songs: songs are represented with playlists they belongs to. In each element of the kernel matrix we will have the cosine similarity between representation of two songs ''' #s2p = utils.jload("../data/validation/s2p.json") s2p = utils.jload("./s2p.json") for s in s2p: s2p[s] = set(s2p[s]) print "Creating s2d...", utils.fl() s2d = {} for ss in s2p: s = int(ss) if s not in s2d: s2d[s] = {} for p in s2p[ss]: if p not in s2d[s]: s2d[s][p] = 1. else: s2d[s][p] += 1.
import numpy as np import utils import utils_matrix import time import scipy.sparse as sps import math #the script implements a user based recommendation technique based on the output of build_Ku.py. #it loads some needed files, calculates the global popularity to fill recommendations that are not #long enought, calculates the rating matrix for the training set and calculates Ku*R. by using #it takes then the first 500 higher elements for each row and uses them as recommendation test_seed = '1' q = 1 s_id2spt = utils.jload("./s_id2spt.json") ####--------------- TEST ---------------#### s2p = utils.jload("./s2p.json") p2s = utils.jload("./p2s.json") p2s_test = utils_matrix.load_test_set("./", "track") Ku = sps.load_npz("./1_0.5.npz") ####--------------- TEST ---------------#### global_popularity = np.zeros(len(s2p)) for s in s2p: global_popularity[int(s)] = len(s2p[s]) global_popularity = np.argsort(-global_popularity) utr_str = "noutr" utr_id2utr = None utr2utr_id = {p: i for i, p in enumerate(sorted(list(map(int, p2s.keys()))))}
row = [] for title in t2t_id: for rep in t2rep[title]: data.append(1.0) row.append(t2t_id[title]) col.append(rep) A = sps.csr_matrix((data, (row, col))) AT = sps.csr_matrix((data, (col, row))) K = (A.dot(AT)).todense() for i in range(K.shape[0]): if K[i, i] == 0: K[i, i] = 1.0 n = K.shape[0] d = np.array([[K[i, i] for i in range(n)]]) Kn = K / np.sqrt(np.dot(d.T, d)) return Kn t_id2t = utils.jload("./t_id2t.json") t2t_id = {t: i for i, t in enumerate(t_id2t)} t2rep = utils.jload("./t2s.json") t2rep = {t: set(rep) for t, rep in t2rep.items()} rep_size = len(utils.jload("./s2p.json")) sim = titles_similarity_sparse(t2rep, t2t_id, rep_size) np.save("./S_titles.npy", sim)
import os import numpy as np import utils import utils_matrix import time import scipy.sparse as sps import time import math t = time.time() #Script used to build the similarity matrix between users; it needs in input the number of the seed i want to use to # build the matrix (0, 1, 5, 10, 25, 100) and alpha, used to combine two matrices: U_tr to Songs and (U_test to Song) transposed. # our submission's results are obtained with alpha = 0.5 ####--------------- TEST ---------------#### s2p = utils.jload("./s2p.json") p2s = utils.jload("./p2s.json") p2s_test = utils_matrix.load_test_set("./", "track") ####--------------- TEST ---------------#### # ####------------ VALIDATION ------------#### # s2p = utils.jload("../data/validation/s2p.json") # p2s = utils.jload("../data/validation/p2s.json") # p2s_test = utils.jload("../data/validation/test/p2s.json") # ####------------ VALIDATION ------------#### test_seed = sys.argv[1] alpha = float(sys.argv[2]) if (len(sys.argv) > 3): for i in range(3, len(sys.argv), 2):
recommendation = { i: [spt2id[s] for s in recommendation[i]] for i in recommendation } return recommendation val_set_keys = set() ##------------------TEST------------------### K = sps.load_npz("K.npz") print("K loaded") utils.fl() q = np.load("q.npy") p2s_test = utils_matrix.load_test_set("./", "track") p2s_train = utils.jload("./p2s.json") p2t = utils.jload("./p2t_c.json") s2p = utils.jload("./s2p.json") ##----------------END TEST----------------### s_id2spt = utils.jload("./s_id2spt.json") for s in s2p: s2p[s] = set(s2p[s]) ###---------------BUILD SELECTION---------------### t2p = utils.jload("./t2p_filt.json") t_id2t = utils.jload("./t_id2t.json") t2t_id = {} for i, t in enumerate(t_id2t):
p2t_c = {} for i, pl in enumerate(chplaylists): if "name" in pl: #t = pl["name"].lower() t = pl["name"].lower() t = ''.join(c if c not in punctuation else " " for c in t) t = re.sub(' +', ' ', t) t = t.strip() t = parsing.stem_text(t) ch_t.append(t) count += t not in t2pl p2t_c[pl['pid']] = t else: skipped += 1 p2t_c[pl['pid']] = '' json.dump(p2t_c, open("./p2t_c.json", "w"), indent=4) titles_list = [t for t in t2pl if t != ""] json.dump(titles_list, open("./t_id2t.json", "w")) p2s = utils.jload("./p2s.json") t2s = {} for t in t2pl: if t != '': t2s[t] = [] for p in t2pl[t]: t2s[t] += p2s[str(p)] json.dump(t2s, open("./t2s.json", "w"))
import json import utils import utils_matrix import random import math #this script is used to build the recommendation for seed 5, 10 and 25. it takes in input alpha (we used 0.7) #q (we used 0.4) and the seed we want to build the recommendation for alpha = float(sys.argv[1]) q = float(sys.argv[2]) test_seed = sys.argv[3] #p2s_test = utils.jload("../data/validation/test/p2s.json") p2s_test = utils_matrix.load_test_set("./") s_id2spt = utils.jload("./s_id2spt.json") print "loading P...", utils.fl() P = sps.load_npz("./P.npz") print "done" utils.fl() print "loading P csc...", utils.fl() P_csc = sps.load_npz("./P_csc.npz") print "done" utils.fl() print("prediction with alpha=%f, q=%f started" % (alpha, q)) recommendation = {}