Esempio n. 1
0
def load_data(user_item_train_df, user_num, item_num):
    #read training data stored as triplets <user> <item> <rating>

    rating_mat = get_rating_mat(user_item_train_df, user_num, item_num)
    trainmat = SLIMatrix(rating_mat)

    return trainmat
Esempio n. 2
0
def create_SLIM_graph(user_item_spm, l1r=1, l2r=1, test=False, test_set=None):
    model = SLIM()
    params = {'algo': 'cd', 'nthreads': 32, 'l1r': l1r, 'l2r': l2r}
    trainmat = SLIMatrix(user_item_spm.tocsr())
    model.train(params, trainmat)
    item_spm = model.to_csr()
    if test and test_set is not None:
        users_test, items_test, neg_test = test_set
        eval_SLIM(model, trainmat, items_test, neg_test)
    return item_spm
Esempio n. 3
0
    param.add_argument('--traindata',
                       type=str,
                       default='../../test/AutomotiveTrain.ijv')
    param.add_argument('--valdata',
                       type=str,
                       default='../../test/AutomotiveTest.ijv')
    param.add_argument('--dbglvl', type=int, default=1)
    param.add_argument('--nnbrs', type=int, default=0)
    param.add_argument('--simtype', type=str, default='cos')
    param.add_argument('--algo', type=str, default='cd')
    param.add_argument('--nthreads', type=int, default=1)
    param.add_argument('--niters', type=int, default=100)

    param.add_argument('--l1r', type=float, default=1.)
    param.add_argument('--l2r', type=float, default=1.)
    param.add_argument('--optTol', type=float, default=1e-7)

    config = param.parse_args()

    traindata = pd.read_csv(config.traindata, delimiter=' ', header=None)
    valdata = pd.read_csv(config.valdata, delimiter=' ', header=None)

    trainmat = SLIMatrix(traindata)
    valmat = SLIMatrix(valdata, trainmat)

    model = SLIM()
    model.train(config, trainmat)
    model.save_model(modelfname='model.csr', mapfname='map.csv')
    model.load_model('model.csr', mapfname='map.csv')
    topn = model.predict(trainmat, nrcmds=10, outfile='output.txt')
Esempio n. 4
0
for i in range(1, 10):
    test_deg_dict[i] = np.nonzero(test_deg == i)[0]
for i in range(1, 10):
    test_deg_dict[i * 10] = np.nonzero(
        np.logical_and(i * 10 <= test_deg, test_deg < (i + 1) * 10))[0]
test_deg_dict[100] = np.nonzero(test_deg >= 100)[0]
tmp = 0
for key, deg in test_deg_dict.items():
    print(key, len(deg))
    tmp += len(deg)
print(num_users, tmp)

from SLIM import SLIM, SLIMatrix
model = SLIM()
params = {'algo': 'cd', 'nthreads': 16, 'l1r': 2, 'l2r': 1}
trainmat = SLIMatrix(user_movie_spm.tocsr())
model.train(params, trainmat)
model.save_model(modelfname='slim_model.csr', mapfname='slim_map.csr')

from slim_load import read_csr

movie_spm = read_csr('slim_model.csr')
print('#edges:', movie_spm.nnz)
print('most similar:', np.max(movie_spm.data))
print('most unsimilar:', np.min(movie_spm.data))

deg = movie_spm.dot(np.ones((num_movies)))
print(np.sum(deg == 0))
print(len(deg))
print(movie_spm.sum(0))
Esempio n. 5
0
def run_SLIM(X_train,
             train_users,
             X_meta,
             X_val,
             X_test,
             val_dict,
             test_dict,
             side_info,
             eval_style='strong'):
    # Values for grid-search
    NDCG_values = []
    optimal_model_SLIM = None
    best_values = None
    l1_values = [0, 2.5, 5.0, 10.0, 20]
    l2_values = [0, 5.0, 10.0, 20, 50, 100]
    al_values = [.5, 1.0, 2.5, 5.0, 10.0] if side_info else [1.0]
    for l1r, l2r, alpha in itertools.product(l1_values, l2_values, al_values):
        print('L1: {0}\tL2: {1}\tAlpha: {2}'.format(l1r, l2r, alpha))
        # Set up parameters
        params = {'algo': 'cd', 'nthreads': 16, 'l1r': l1r, 'l2r': l2r}

        # Build training matrix
        trainmat = X_train
        if side_info:
            trainmat = vstack((trainmat, alpha * X_meta))
        trainmat = SLIMatrix(trainmat)

        # Train model
        model = SLIM()
        model.train(params, trainmat)
        print('Converting out of SLIM format...')
        # To CSR works, but densifying it crashes sometimes? Very strange
        # S_SLIM = model.to_csr().todense()
        # Work-around by writing to disk and reading in
        model.save_model(modelfname='slim_model.csr', mapfname='slim_map.csr')

        def read_csr(filename):
            f = open(filename, 'r')
            all_rows = []
            all_cols = []
            all_vals = []
            for i, line in enumerate(f.readlines()):
                strs = line.split(' ')
                cols = [int(s) for s in strs[1::2]]
                vals = [float(s) for s in strs[2::2]]
                all_cols.extend(cols)
                all_vals.extend(vals)
                all_rows.extend([i for _ in cols])
            all_rows = np.array(all_rows, dtype=np.int64)
            all_cols = np.array(all_cols, dtype=np.int64)
            all_vals = np.array(all_vals, dtype=np.float32)
            mat = coo_matrix((all_vals, (all_rows, all_cols)),
                             shape=(X_train.shape[1], X_train.shape[1]))
            return mat

        S_SLIM = read_csr('slim_model.csr')
        print('... done!')
        S_SLIM = S_SLIM.todense()

        # Evaluate on validation data
        print('Evaluating...')
        val_users = list(val_dict.keys())
        val_scores = X_val[val_users, :] @ S_SLIM - 987654321 * X_val[
            val_users, :]

        # Evaluate and pretty print
        NDCG = util.evaluate(X_val, val_scores, val_dict)[1][100]
        NDCG_values.append(NDCG)

        print('\tNDCG@100:\t{0}'.format(NDCG))
        if np.max(NDCG_values) == NDCG:
            optimal_model_SLIM = S_SLIM
            best_values = (l1r, l2r, alpha)

    print('Best grid-search values:', best_values)

    # Compute prediction scores for all test users - subtract already seen items
    test_users = list(test_dict.keys())
    test_scores = X_test[
        test_users, :] @ optimal_model_SLIM - 987654321 * X_test[test_users, :]

    # Evaluate and pretty print
    results_SLIM = util.evaluate(X_test, test_scores, test_dict)
    return results_SLIM