Ejemplo n.º 1
0
max_ranking_size = np.min((cutoff, data.max_query_size()))

click_model = clk.get_click_model(click_model_name)

alpha, beta = click_model(np.arange(max_ranking_size))

model_params = {'hidden units': [32, 32],}

model = nn.init_model(model_params)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

first_results = evl.evaluate_policy(
                      model,
                      data.test,
                      data.test.label_vector*0.25,
                      alpha,
                      beta,
                    )

# print(first_results)

model, vali_reward = opt.optimize_policy(model, optimizer,
                    data_train=data.train,
                    train_doc_weights=data.train.label_vector*0.25,
                    train_alpha=alpha+beta,
                    train_beta=np.zeros_like(beta),
                    data_vali=data.validation,
                    vali_doc_weights=data.validation.label_vector*0.25,
                    vali_alpha=alpha+beta,
                    vali_beta=np.zeros_like(beta),
Ejemplo n.º 2
0
    'run name': 'affine estimator',
    'number of updates': n_updates,
    'number of evaluation points': n_eval,
    'update iterations': [int(x) for x in update_points],
    'evaluation iterations': [int(x) for x in eval_points],
    'model hyperparameters': model_params,
    'results': results,
}

if args.pretrained_model:
    output['initial model'] = args.pretrained_model

logging_policy_metrics = evl.evaluate_policy(
    model,
    data.test,
    true_test_doc_weights,
    alpha,
    beta,
)

train_policy_scores[:] = logging_model(data.train.feature_matrix)[:, 0].numpy()
vali_policy_scores[:] = logging_model(
    data.validation.feature_matrix)[:, 0].numpy()

i_update = 0
for n_queries_sampled in eval_points:

    (
        new_train_clicks,
        new_train_displays,
        new_train_query_freq,