def main(argv): # if len(argv) > 1: # raise app.UsageError('Too many command-line arguments.') # # runner_lib.load_gin_configs(FLAGS.gin_files, FLAGS.gin_bindings) seed = 0 slate_size = 2 np.random.seed(seed) env_config = { 'num_candidates': 5, 'slate_size': slate_size, 'resample_documents': True, 'seed': seed, } tmp_base_dir = 'C:/Users/Moham/PycharmProjects/ExpertDrivenRec/testing/' runner = runner_lib.TrainRunner( base_dir=tmp_base_dir, create_agent_fn=create_agent, env=interest_evolution.create_environment(env_config), episode_log_file='', max_training_steps=50, num_iterations=10) runner.run_experiment() runner = runner_lib.EvalRunner( base_dir=tmp_base_dir, create_agent_fn=create_agent, env=interest_evolution.create_environment(env_config), max_eval_episodes=5, test_mode=True) runner.run_experiment()
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') runner_lib.load_gin_configs(FLAGS.gin_files, FLAGS.gin_bindings) seed = 0 slate_size = 2 np.random.seed(seed) env_config = { 'num_candidates': 5, 'slate_size': slate_size, 'resample_documents': True, 'seed': seed, } runner = runner_lib.TrainRunner( base_dir=FLAGS.base_dir, create_agent_fn=create_agent, env=interest_evolution.create_environment(env_config), episode_log_file=FLAGS.episode_log_file, max_training_steps=50, num_iterations=10) runner.run_experiment() runner = runner_lib.EvalRunner( base_dir=FLAGS.base_dir, create_agent_fn=create_agent, env=interest_evolution.create_environment(env_config), max_eval_episodes=5, test_mode=True) runner.run_experiment()
def test_recsim_interest_evolution(self): num_candidate = 10 env_config = { "num_candidates": num_candidate, "slate_size": 3, "resample_documents": False, "seed": 1, } env = interest_evolution.create_environment(env_config) env = ValueWrapper(env, ValueMode.INNER_PROD) obs_preprocessor = make_default_obs_preprocessor(env) obs = env.reset() state = obs_preprocessor(obs) self.assertFalse(state.has_float_features_only) self.assertEqual(state.float_features.shape, (1, obs["user"].shape[0])) self.assertEqual(state.float_features.dtype, torch.float32) self.assertEqual(state.float_features.device, torch.device("cpu")) npt.assert_array_almost_equal(obs["user"], state.float_features.squeeze(0)) doc_float_features = state.candidate_docs.float_features self.assertIsNotNone(doc_float_features) self.assertEqual( doc_float_features.shape, (1, num_candidate, obs["doc"]["0"].shape[0]) ) self.assertEqual(doc_float_features.dtype, torch.float32) self.assertEqual(doc_float_features.device, torch.device("cpu")) for i, v in enumerate(obs["doc"].values()): npt.assert_array_almost_equal(v, doc_float_features[0, i])
def make_recsim_env(config): DEFAULT_ENV_CONFIG = { "num_candidates": 10, "slate_size": 2, "resample_documents": True, "seed": 0, "convert_to_discrete_action_space": False, } env_config = DEFAULT_ENV_CONFIG.copy() env_config.update(config) env = interest_evolution.create_environment(env_config) env = RecSimResetWrapper(env) env = RecSimObservationSpaceWrapper(env) if config and config["convert_to_discrete_action_space"]: env = MultiDiscreteToDiscreteActionWrapper(env) return env
def test_recsim_intereset_evolution(self): num_candidate = 10 env_config = { "num_candidates": num_candidate, "slate_size": 3, "resample_documents": False, "seed": 1, } env = interest_evolution.create_environment(env_config) replay_buffer, inserted = _create_replay_buffer_and_insert(env) batch = replay_buffer.sample_transition_batch_tensor(indices=np.array([0])) npt.assert_array_almost_equal( inserted[0]["observation"]["user"].astype(np.float32), batch.state.squeeze(0), ) npt.assert_array_almost_equal( inserted[1]["observation"]["user"], batch.next_state.squeeze(0) ) docs = list(inserted[0]["observation"]["doc"].values()) next_docs = list(inserted[1]["observation"]["doc"].values()) for i in range(num_candidate): npt.assert_array_equal(docs[i], batch.doc.squeeze(0)[i]) npt.assert_array_equal(next_docs[i], batch.next_doc.squeeze(0)[i]) npt.assert_array_equal(inserted[0]["action"], batch.action.squeeze(0)) npt.assert_array_equal(inserted[1]["action"], batch.next_action.squeeze(0)) npt.assert_array_equal([0, 0, 0], batch.response_click.squeeze(0)) npt.assert_array_equal([0, 0, 0], batch.response_cluster_id.squeeze(0)) npt.assert_array_equal([0, 0, 0], batch.response_liked.squeeze(0)) npt.assert_array_equal([0.0, 0.0, 0.0], batch.response_quality.squeeze(0)) npt.assert_array_equal([0.0, 0.0, 0.0], batch.response_watch_time.squeeze(0)) resp = inserted[1]["observation"]["response"] for i in range(env_config["slate_size"]): npt.assert_array_equal( resp[i]["click"], batch.next_response_click.squeeze(0)[i] ) npt.assert_array_equal( resp[i]["cluster_id"], batch.next_response_cluster_id.squeeze(0)[i] ) npt.assert_array_equal( resp[i]["liked"], batch.next_response_liked.squeeze(0)[i] ) npt.assert_array_almost_equal( resp[i]["quality"], batch.next_response_quality.squeeze(0)[i] ) npt.assert_array_almost_equal( resp[i]["watch_time"], batch.next_response_watch_time.squeeze(0)[i] )
def make(self) -> gym.Env: env_config = { "slate_size": self.slate_size, "seed": self.initial_seed, "num_candidates": self.num_candidates, "resample_documents": self.resample_documents, } if self.is_interest_exploration: env = interest_exploration.create_environment(env_config) return ValueWrapper(env, lambda user, doc: 0.0) if self.single_selection: env = interest_evolution.create_environment(env_config) return ValueWrapper(env, dot_value_fn) else: env = create_multiclick_environment(env_config) return ValueWrapper(env, multi_selection_value_fn)
def test_create_from_recsim_interest_evolution(self): env_config = { "num_candidates": 20, "slate_size": 3, "resample_documents": False, "seed": 1, } env = interest_evolution.create_environment(env_config) replay_buffer = ReplayBuffer.create_from_env(env, replay_memory_size=100, batch_size=10, store_log_prob=True) obs = env.reset() observation = obs["user"] action = env.action_space.sample() log_prob = -1.0 doc_features = np.stack(list(obs["doc"].values()), axis=0) next_obs, reward, terminal, _env = env.step(action) response = next_obs["response"] click = np.array([r["click"] for r in response]) response_quality = np.stack([r["quality"] for r in response], axis=0) repsonse_cluster_id = np.array([r["cluster_id"] for r in response]) response_watch_time = np.stack([r["watch_time"] for r in response], axis=0) response_liked = np.array([r["liked"] for r in response]) replay_buffer.add( observation, action, reward, terminal, mdp_id=0, sequence_number=0, doc=doc_features, response_click=click, response_cluster_id=repsonse_cluster_id, response_quality=response_quality, response_liked=response_liked, response_watch_time=response_watch_time, log_prob=log_prob, )
env_config = { 'num_candidates': num_candidates, 'slate_size': slate_size, 'resample_documents': True, 'seed': 0, } env_config1 = { 'num_candidates': num_candidates, 'slate_size': slate_size, 'resample_documents': True, 'seed': 0, 'reward_function': clicked_quality_reward } # User simulation environment: interest evolution model presented in the paper of SlateQ recsim_gym_env = interest_evolution.create_environment(env_config1) results_f = [] with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: #agent = full_slate_q_agent.FullSlateQAgent(sess, #recsim_gym_env.observation_space, recsim_gym_env.action_space) agent = greedy_pctr_agent.GreedyPCTRAgent( sess, recsim_gym_env.observation_space, recsim_gym_env.action_space) #agent = cluster_bandit_agent.ClusterBanditAgent(sess,recsim_gym_env.observation_space,recsim_gym_env.action_space) for i in range(10): steps_f, watch, time_f, q, q_vid, w_vid = evalRun_one_episode(