Beispiel #1
0
def train(sppmi_mat, data_dir=DATA_DIR):
    '''
        Train
    '''
    # load file
    n_users = len(uid2idx)
    print('n_users:{}'.format(n_users))
    n_items = len(sid2idx)
    print('n_items:{}'.format(n_items))
    fn_train = os.path.join(data_dir, 'data_train.txt')
    fn_dev = os.path.join(data_dir, 'data_dev.txt')
    fn_test = os.path.join(data_dir, 'data_test.txt')
    train_data = util.load_input_data(fn_train, shape=(n_users, n_items))
    vad_data = util.load_input_data(fn_dev, shape=(n_users, n_items))
    test_data = util.load_input_data(fn_test, shape=(n_users, n_items))
    # train model
    coder = cofacto.CoFacto(n_components=N_COMPONENTS,
                            max_iter=max_iter,
                            batch_size=1000,
                            init_std=0.01,
                            n_jobs=N_JOBS,
                            random_state=98765,
                            save_params=True,
                            save_dir=save_dir,
                            early_stopping=True,
                            verbose=True,
                            lam_theta=lam_theta,
                            lam_beta=lam_beta,
                            lam_gamma=lam_gamma,
                            c0=c0,
                            c1=c1)
    coder.fit(train_data,
              sppmi_mat,
              vad_data=vad_data,
              batch_users=5000,
              k=100)
    # test model
    n_params = len(glob.glob(os.path.join(save_dir, '*.npz')))
    last_iter_num = n_params - 1
    params = np.load(
        os.path.join(save_dir,
                     'CoFacto_K%d_iter%d.npz' % (N_COMPONENTS, last_iter_num)))
    U, V = params['U'], params['V']
    print('Test Recall@20: %.4f' % rec_eval.recall_at_k(
        train_data, test_data, U, V, k=20, vad_data=vad_data))
    print('Test Recall@50: %.4f' % rec_eval.recall_at_k(
        train_data, test_data, U, V, k=50, vad_data=vad_data))
    print('Test NDCG@100: %.4f' % rec_eval.normalized_dcg_at_k(
        train_data, test_data, U, V, k=100, vad_data=vad_data))
    print('Test MAP@100: %.4f' % rec_eval.map_at_k(
        train_data, test_data, U, V, k=100, vad_data=vad_data))
    # save
    model_save_fn = os.path.join(
        DATA_DIR, 'Model_K{}_{}.npz'.format(N_COMPONENTS, DATA_SET_NAME))
    np.savez(model_save_fn, U=U, V=V)
    print('saved')
Beispiel #2
0
U = model[0]
C = model[1]
G = model[2]

vad_data = None
try:
    train_data = T1_train
    test_data = T1_test
    print 'Testing USER-GAME MATRIX'
    print 'Test Recall@20: %f' % rec_eval.recall_at_k(
        train_data, test_data, U, G, k=20, vad_data=vad_data)
    print 'Test Recall@50: %f' % rec_eval.recall_at_k(
        train_data, test_data, U, G, k=50, vad_data=vad_data)
    print 'Test NDCG@100: %f' % rec_eval.normalized_dcg_at_k(
        train_data, test_data, U, G, k=100, vad_data=vad_data)
    print 'Test MAP@100: %f' % rec_eval.map_at_k(
        train_data, test_data, U, G, k=100, vad_data=vad_data)
except:
    print 'Error'

try:
    train_data = T2_train
    test_data = T2_test

    print 'Testing USER-GROUP MATRIX'
    print 'Test Recall@20: %f' % rec_eval.recall_at_k(
        train_data, test_data, U, C, k=20, vad_data=vad_data)
    print 'Test Recall@50: %f' % rec_eval.recall_at_k(
        train_data, test_data, U, C, k=50, vad_data=vad_data)
    print 'Test NDCG@100: %f' % rec_eval.normalized_dcg_at_k(
        train_data, test_data, U, C, k=100, vad_data=vad_data)
    print 'Test MAP@100: %f' % rec_eval.map_at_k(
test_data, _ = load_data(os.path.join(DATA_DIR, 'test.csv'))
test_data.data = np.ones_like(test_data.data)

# In[32]:

n_params = len(glob.glob(os.path.join(save_dir, '*.npz')))

params = np.load(
    os.path.join(save_dir,
                 'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1)))
U, V = params['U'], params['V']

#write file for map

# In[33]:
#user_idx = rec_eval.user_idx_generator(test_data.shape[1], test_data)
#batch_user=user_idx.stop-user_idx.start
#X_pred = rec_eval._make_prediction(train_data,  U, V, user_idx, batch_user,  vad_data=test_data)
#print 'Test Recall@20: %.4f' % rec_eval.recall_at_k(train_data, test_data, U, V, k=20, vad_data=vad_data)
#print 'Test Recall@50: %.4f' % rec_eval.recall_at_k(train_data, test_data, U, V, k=50, vad_data=vad_data)
#print 'Test NDCG@10: %.4f' % rec_eval.normalized_dcg_at_k(train_data, test_data, U, V, k=10, vad_data=vad_data)
print 'Test MAP@10: %.4f' % rec_eval.map_at_k(
    train_data, test_data, U, V, k=10, vad_data=vad_data)

# In[34]:

np.savez('CoFactor_K100_ML20M.npz', U=U, V=V)

# In[ ]: