max_ranking_size = np.min((cutoff, data.max_query_size())) click_model = clk.get_click_model(click_model_name) alpha, beta = click_model(np.arange(max_ranking_size)) model_params = {'hidden units': [32, 32],} model = nn.init_model(model_params) optimizer = tf.keras.optimizers.SGD(learning_rate=0.01) first_results = evl.evaluate_policy( model, data.test, data.test.label_vector*0.25, alpha, beta, ) # print(first_results) model, vali_reward = opt.optimize_policy(model, optimizer, data_train=data.train, train_doc_weights=data.train.label_vector*0.25, train_alpha=alpha+beta, train_beta=np.zeros_like(beta), data_vali=data.validation, vali_doc_weights=data.validation.label_vector*0.25, vali_alpha=alpha+beta, vali_beta=np.zeros_like(beta),
'run name': 'affine estimator', 'number of updates': n_updates, 'number of evaluation points': n_eval, 'update iterations': [int(x) for x in update_points], 'evaluation iterations': [int(x) for x in eval_points], 'model hyperparameters': model_params, 'results': results, } if args.pretrained_model: output['initial model'] = args.pretrained_model logging_policy_metrics = evl.evaluate_policy( model, data.test, true_test_doc_weights, alpha, beta, ) train_policy_scores[:] = logging_model(data.train.feature_matrix)[:, 0].numpy() vali_policy_scores[:] = logging_model( data.validation.feature_matrix)[:, 0].numpy() i_update = 0 for n_queries_sampled in eval_points: ( new_train_clicks, new_train_displays, new_train_query_freq,