def load_data(user_item_train_df, user_num, item_num): #read training data stored as triplets <user> <item> <rating> rating_mat = get_rating_mat(user_item_train_df, user_num, item_num) trainmat = SLIMatrix(rating_mat) return trainmat
def create_SLIM_graph(user_item_spm, l1r=1, l2r=1, test=False, test_set=None): model = SLIM() params = {'algo': 'cd', 'nthreads': 32, 'l1r': l1r, 'l2r': l2r} trainmat = SLIMatrix(user_item_spm.tocsr()) model.train(params, trainmat) item_spm = model.to_csr() if test and test_set is not None: users_test, items_test, neg_test = test_set eval_SLIM(model, trainmat, items_test, neg_test) return item_spm
param.add_argument('--traindata', type=str, default='../../test/AutomotiveTrain.ijv') param.add_argument('--valdata', type=str, default='../../test/AutomotiveTest.ijv') param.add_argument('--dbglvl', type=int, default=1) param.add_argument('--nnbrs', type=int, default=0) param.add_argument('--simtype', type=str, default='cos') param.add_argument('--algo', type=str, default='cd') param.add_argument('--nthreads', type=int, default=1) param.add_argument('--niters', type=int, default=100) param.add_argument('--l1r', type=float, default=1.) param.add_argument('--l2r', type=float, default=1.) param.add_argument('--optTol', type=float, default=1e-7) config = param.parse_args() traindata = pd.read_csv(config.traindata, delimiter=' ', header=None) valdata = pd.read_csv(config.valdata, delimiter=' ', header=None) trainmat = SLIMatrix(traindata) valmat = SLIMatrix(valdata, trainmat) model = SLIM() model.train(config, trainmat) model.save_model(modelfname='model.csr', mapfname='map.csv') model.load_model('model.csr', mapfname='map.csv') topn = model.predict(trainmat, nrcmds=10, outfile='output.txt')
for i in range(1, 10): test_deg_dict[i] = np.nonzero(test_deg == i)[0] for i in range(1, 10): test_deg_dict[i * 10] = np.nonzero( np.logical_and(i * 10 <= test_deg, test_deg < (i + 1) * 10))[0] test_deg_dict[100] = np.nonzero(test_deg >= 100)[0] tmp = 0 for key, deg in test_deg_dict.items(): print(key, len(deg)) tmp += len(deg) print(num_users, tmp) from SLIM import SLIM, SLIMatrix model = SLIM() params = {'algo': 'cd', 'nthreads': 16, 'l1r': 2, 'l2r': 1} trainmat = SLIMatrix(user_movie_spm.tocsr()) model.train(params, trainmat) model.save_model(modelfname='slim_model.csr', mapfname='slim_map.csr') from slim_load import read_csr movie_spm = read_csr('slim_model.csr') print('#edges:', movie_spm.nnz) print('most similar:', np.max(movie_spm.data)) print('most unsimilar:', np.min(movie_spm.data)) deg = movie_spm.dot(np.ones((num_movies))) print(np.sum(deg == 0)) print(len(deg)) print(movie_spm.sum(0))
def run_SLIM(X_train, train_users, X_meta, X_val, X_test, val_dict, test_dict, side_info, eval_style='strong'): # Values for grid-search NDCG_values = [] optimal_model_SLIM = None best_values = None l1_values = [0, 2.5, 5.0, 10.0, 20] l2_values = [0, 5.0, 10.0, 20, 50, 100] al_values = [.5, 1.0, 2.5, 5.0, 10.0] if side_info else [1.0] for l1r, l2r, alpha in itertools.product(l1_values, l2_values, al_values): print('L1: {0}\tL2: {1}\tAlpha: {2}'.format(l1r, l2r, alpha)) # Set up parameters params = {'algo': 'cd', 'nthreads': 16, 'l1r': l1r, 'l2r': l2r} # Build training matrix trainmat = X_train if side_info: trainmat = vstack((trainmat, alpha * X_meta)) trainmat = SLIMatrix(trainmat) # Train model model = SLIM() model.train(params, trainmat) print('Converting out of SLIM format...') # To CSR works, but densifying it crashes sometimes? Very strange # S_SLIM = model.to_csr().todense() # Work-around by writing to disk and reading in model.save_model(modelfname='slim_model.csr', mapfname='slim_map.csr') def read_csr(filename): f = open(filename, 'r') all_rows = [] all_cols = [] all_vals = [] for i, line in enumerate(f.readlines()): strs = line.split(' ') cols = [int(s) for s in strs[1::2]] vals = [float(s) for s in strs[2::2]] all_cols.extend(cols) all_vals.extend(vals) all_rows.extend([i for _ in cols]) all_rows = np.array(all_rows, dtype=np.int64) all_cols = np.array(all_cols, dtype=np.int64) all_vals = np.array(all_vals, dtype=np.float32) mat = coo_matrix((all_vals, (all_rows, all_cols)), shape=(X_train.shape[1], X_train.shape[1])) return mat S_SLIM = read_csr('slim_model.csr') print('... done!') S_SLIM = S_SLIM.todense() # Evaluate on validation data print('Evaluating...') val_users = list(val_dict.keys()) val_scores = X_val[val_users, :] @ S_SLIM - 987654321 * X_val[ val_users, :] # Evaluate and pretty print NDCG = util.evaluate(X_val, val_scores, val_dict)[1][100] NDCG_values.append(NDCG) print('\tNDCG@100:\t{0}'.format(NDCG)) if np.max(NDCG_values) == NDCG: optimal_model_SLIM = S_SLIM best_values = (l1r, l2r, alpha) print('Best grid-search values:', best_values) # Compute prediction scores for all test users - subtract already seen items test_users = list(test_dict.keys()) test_scores = X_test[ test_users, :] @ optimal_model_SLIM - 987654321 * X_test[test_users, :] # Evaluate and pretty print results_SLIM = util.evaluate(X_test, test_scores, test_dict) return results_SLIM