Ejemplo n.º 1
0
pseudo_num = 1

linucb_regret_matrix = np.zeros((loop, iteration))
lints_regret_matrix = np.zeros((loop, iteration))
giro_regret_matrix = np.zeros((loop, iteration))
lse_soft_regret_matrix = np.zeros((loop, iteration))
online_regret_matrix = np.zeros((loop, iteration))
offline_prob_matrix = np.zeros((loop, iteration))
online_prob_matrix = np.zeros((loop, iteration))
online_beta_matrix = np.zeros((loop, iteration))

user_feature = np.random.normal(size=dimension)
user_feature = user_feature / np.linalg.norm(user_feature)
# train model
lse_soft_model = LSE_soft(dimension, iteration, item_num, user_feature, alpha,
                          sigma, step_size_beta, step_size_gamma, weight1,
                          beta, gamma)

# lse_soft_regret_list_train, lse_soft_beta_list_train=lse_soft_model.train(train_loops, item_num)

# test data
item_features = np.random.multivariate_normal(mean=np.zeros(dimension),
                                              cov=np.linalg.pinv(
                                                  np.identity(dimension)),
                                              size=item_num)
item_features = Normalizer().fit_transform(item_features)
item_features = Normalizer().fit_transform(
    np.random.normal(size=(item_num, dimension)))
true_payoffs = np.dot(item_features, user_feature)
best_arm = np.argmax(true_payoffs)
Ejemplo n.º 2
0
weight1 = 0.01
loop = 1
train_loops = 100
beta = 5
gamma = 0
loop_num = 10

user_feature = np.random.normal(size=dimension)
user_feature = user_feature / np.linalg.norm(user_feature)

beta_matrix = np.zeros((loop_num, train_loops))
regret_matrix = np.zeros((loop_num, train_loops))
for l in range(loop_num):

    lse_soft_model = LSE_soft(dimension, iteration, item_num, user_feature,
                              alpha, sigma, step_size_beta, step_size_gamma,
                              weight1, beta, gamma)
    lse_soft_regret_list_train, lse_soft_beta_list_train, lse_soft_prob_matrix, lse_soft_beta_gradient = lse_soft_model.train(
        train_loops, item_num)
    beta_matrix[l] = lse_soft_beta_list_train
    regret_matrix[l] = lse_soft_regret_list_train

beta_mean = np.mean(beta_matrix, axis=0)
beta_std = beta_matrix.std(0)
regret_mean = np.mean(regret_matrix, axis=0)
regret_std = regret_matrix.std(0)

np.save(
    path + 'lse_soft_offline_beta_mean_item_%s_d_%s_t_%s.npy' %
    (item_num, dimension, phase_num), beta_mean)
np.save(