def create_multiclick_environment(env_config): """Creates an interest evolution environment.""" def choice_model_ctor(*args, **kwargs): return choice_model.DependentClickModel( next_probs=[0.8**(i + 1) for i in range(env_config["slate_size"])], slate_size=env_config["slate_size"], score_scaling=1.0, ) user_model = MulticlickIEvUserModel( env_config["slate_size"], choice_model_ctor=choice_model_ctor, response_model_ctor=interest_evolution.IEvResponse, user_state_ctor=UserState, seed=env_config["seed"], ) document_sampler = interest_evolution.UtilityModelVideoSampler( doc_ctor=interest_evolution.IEvVideo, seed=env_config["seed"]) ievenv = environment.Environment( user_model, document_sampler, env_config["num_candidates"], env_config["slate_size"], resample_documents=env_config["resample_documents"], ) return recsim_gym.RecSimGymEnv( ievenv, interest_evolution.clicked_watchtime_reward, utils.aggregate_video_cluster_metrics, utils.write_video_cluster_metrics, )
def test_step(self): # Create a simple user slate_size = 2 num_candidates = 5 action_space = spaces.MultiDiscrete(num_candidates * np.ones((slate_size,))) user_model = ie.IEUserModel( slate_size, user_state_ctor=ie.IEUserState, response_model_ctor=ie.IEResponse) # Create a set of documents document_sampler = ie.IETopicDocumentSampler(seed=1) ieenv = environment.Environment( user_model, document_sampler, num_candidates, slate_size, resample_documents=True) # Create agent agent = greedy_pctr_agent.GreedyPCTRAgent(action_space, user_model.avg_user_state) # This agent doesn't use the previous user response observation, documents = ieenv.reset() slate = agent.step(1, dict(user=observation, doc=documents)) scores = [ user_model.avg_user_state.score_document(doc_obs) for doc_obs in list(documents.values()) ] expected_slate = sorted(np.argsort(scores)[-2:]) self.assertAllEqual(sorted(slate), expected_slate)
def test_env(): path = '../master_capston/the-movies-dataset/' features_embedding_movies = pd.read_csv( os.path.join(path, 'movie_embedding_features.csv')) sampler = LTSDocumentSampler(dataset=features_embedding_movies) # this mean the number of items in the recommendation return from the agent slate_size = 3 # i am assuming this number mean the # of possible items to send to the agent for recommend for each slate num_candidates = 10 format_data = data_preprocess.load_data(path) # print(format_data.head()) # print(format_data.shape) features_embedding_movies = pd.read_csv( os.path.join(path, 'movie_embedding_features.csv')) positive_user_ids, positive_history_data = data_preprocess.get_user_positive( format_data) user_sampler = LTSStaticUserSampler(positive_user_ids, positive_history_data, features_embedding_movies) LTSUserModel = UserModel(user_sampler, slate_size, LTSResponse) ltsenv = environment.Environment(LTSUserModel, sampler, num_candidates, slate_size, resample_documents=True) lts_gym_env = recsim_gym.RecSimGymEnv(ltsenv, clicked_engagement_reward) observation_0 = lts_gym_env.reset() # print(observation_0['user'][:5]) # print('Observation 0') # print('Available documents') # doc_strings = ['doc_id ' + key + " kaleness " + str(value) for key, value # in observation_0['doc'].items()] # print('\n'.join(doc_strings)) recommendation_slate_0 = [0, 1, 2] observation_1, reward, done, _ = lts_gym_env.step(recommendation_slate_0) print(observation_1['user'][:5]) # print('Noisy user state observation') # print(observation_0['user']) print(lts_gym_env.observation_space) print(lts_gym_env.action_space) # test_doc_model() # test_user_model() # test_env()
def create_env(): slate_size = 3 num_candidates = 10 ltsenv = environment.Environment(LTSUserModel(slate_size), LTSDocumentSampler(), num_candidates, slate_size, resample_documents=True) lts_gym_env = recsim_gym.RecSimGymEnv(ltsenv, clicked_engagement_reward)
def setUp(self): super(EnvironmentTest, self).setUp() self._slate_size = 2 self._num_candidates = 20 user_model = ie.IEUserModel(self._slate_size, user_state_ctor=ie.IEUserState, response_model_ctor=ie.IEResponse) document_sampler = ie.IETopicDocumentSampler() self._environment = environment.Environment(user_model, document_sampler, self._num_candidates, self._slate_size)
def build_restaurant_recs_recsim_env(config): """Returns a recsim_gym environment object.""" _, user_model = build_user_components(config) restaurants, restaurant_sampler = build_document_components(config) env = environment.Environment(user_model=user_model, document_sampler=restaurant_sampler, num_candidates=len(restaurants), slate_size=1, resample_documents=False) reward_aggregator = functools.partial(weighted_reward, weights=config.reward_weights) return recsim_gym.RecSimGymEnv(env, reward_aggregator)
def create_environment(env_config): """Creates a long-term satisfaction environment.""" user_model = LTSUserModel(env_config['slate_size'], user_state_ctor=LTSUserState, response_model_ctor=LTSResponse) document_sampler = LTSDocumentSampler() ltsenv = environment.Environment( user_model, document_sampler, env_config['num_candidates'], env_config['slate_size'], resample_documents=env_config['resample_documents']) return recsim_gym.RecSimGymEnv(ltsenv, clicked_engagement_reward)
def create_environment(env_config): """Creates an simple sequential testing environment.""" if env_config['num_candidates'] < 4: raise ValueError('num_candidates must be at least 4.') SimpleSequentialResponse.MAX_DOC_ID = env_config['num_candidates'] - 1 user_model = SimpleSequentialUserModel( env_config['slate_size'], seed=env_config['seed'], starting_probs=env_config['starting_probs']) document_sampler = SimpleSequentialDocumentSampler(seed=env_config['seed']) simple_seq_env = environment.Environment( user_model, document_sampler, env_config['num_candidates'], env_config['slate_size'], resample_documents=env_config['resample_documents']) return recsim_gym.RecSimGymEnv(simple_seq_env, total_reward, lambda _, __, ___: None, lambda _, __: None)
def create_multiclick_environment(env_config, choice_model): document_sampler = IETopicDocumentSampler(seed=env_config['seed']) IEDocument.NUM_CLUSTERS = document_sampler.num_clusters IEResponse.NUM_CLUSTERS = document_sampler.num_clusters user_model = MulticlickIEUserModel(env_config['slate_size'], user_state_ctor=IEUserState, response_model_ctor=IEResponse, seed=env_config['seed'], choice_model=choice_model) ieenv = environment.Environment( user_model, document_sampler, env_config['num_candidates'], env_config['slate_size'], resample_documents=env_config['resample_documents']) return recsim_gym.RecSimGymEnv(ieenv, total_clicks_reward, utils.aggregate_video_cluster_metrics, utils.write_video_cluster_metrics)
def test_slate_indices_and_length(self): # Initialize agent slate_size = 2 num_candidates = 100 action_space = spaces.MultiDiscrete(num_candidates * np.ones((slate_size,))) user_model = iev.IEvUserModel( slate_size, choice_model_ctor=choice_model.MultinomialLogitChoiceModel, response_model_ctor=iev.IEvResponse) agent = random_agent.RandomAgent(action_space, random_seed=0) # Create a set of documents document_sampler = iev.IEvVideoSampler() ievenv = environment.Environment(user_model, document_sampler, num_candidates, slate_size) # Test that slate indices in correct range and length is correct observation, documents = ievenv.reset() slate = agent.step(1, dict(user=observation, doc=documents)) self.assertLen(slate, slate_size) self.assertAllInSet(slate, range(num_candidates))
def test_step(self): # Create a simple user slate_size = 2 user_model = iev.IEvUserModel( slate_size, choice_model_ctor=choice_model.MultinomialLogitChoiceModel, response_model_ctor=iev.IEvResponse) # Create a candidate_set with 5 items num_candidates = 5 document_sampler = iev.IEvVideoSampler() ievsim = environment.Environment(user_model, document_sampler, num_candidates, slate_size) # Create agent action_space = spaces.MultiDiscrete(num_candidates * np.ones((slate_size,))) agent = random_agent.RandomAgent(action_space, random_seed=0) # This agent doesn't use the previous user response observation, documents = ievsim.reset() slate = agent.step(1, dict(user=observation, doc=documents)) self.assertAllEqual(slate, [2, 0])
def create_environment(env_config): """Creates an interest evolution environment.""" user_model = IEvUserModel( env_config['slate_size'], choice_model_ctor=choice_model.MultinomialProportionalChoiceModel, response_model_ctor=IEvResponse, user_state_ctor=IEvUserState, seed=env_config['seed']) document_sampler = IEvVideoSampler(doc_ctor=IEvVideo, seed=env_config['seed']) ievenv = environment.Environment( user_model, document_sampler, env_config['num_candidates'], env_config['slate_size'], resample_documents=env_config['resample_documents']) return recsim_gym.RecSimGymEnv(ievenv, env_config['reward_function'], utils.aggregate_video_cluster_metrics, utils.write_video_cluster_metrics)
def _build_components(deterministic_transitions=False): """Returns recsim components.""" rec_types = [ restaurant_toy_recsim.RestaurantType.JUNK, restaurant_toy_recsim.RestaurantType.HEALTHY ] user_states = ['Neutral', 'UnhealthySt', 'HealthySt'] num_states = len(user_states) num_actions = len(rec_types) transition_matrix_constructor = ( _always_moving_transition_matrix if deterministic_transitions else restaurant_toy_recsim.TransitionMatrix.RandomMatrix) user_config = restaurant_toy_recsim.UserConfig( user_states_names=user_states, state_transition_matrix=transition_matrix_constructor( num_states, num_actions), reward_matrix=np.random.rand(num_states, num_actions)) seeds = restaurant_toy_recsim.SimulationSeeds(2, 5) config = restaurant_toy_recsim.EnvConfig(user_config, rec_types, seeds) user_sampler, user_model = restaurant_toy_recsim.build_user_components( config) restaurants, document_sampler = restaurant_toy_recsim.build_document_components( config) env = environment.Environment(user_model, document_sampler, num_candidates=num_actions, slate_size=1, resample_documents=False) recsim_env = recsim_gym.RecSimGymEnv(env, restaurant_toy_recsim.rating_reward) return (config, user_sampler, user_model, restaurants, document_sampler, recsim_env)
num_candidates = 15 format_data = data_preprocess.load_data(path) # print(format_data.head()) # print(format_data.shape) features_embedding_movies = pd.read_csv( os.path.join(path, 'movie_embedding_features.csv')) positive_user_ids, positive_history_data = data_preprocess.get_user_positive( format_data) user_sampler = LTSStaticUserSampler(positive_user_ids, positive_history_data, features_embedding_movies) LTSUserModel = UserModel(user_sampler, slate_size, LTSResponse) ltsenv = environment.Environment(LTSUserModel, sampler, num_candidates, slate_size, resample_documents=True) lts_gym_env = recsim_gym.RecSimGymEnv(ltsenv, clicked_engagement_reward) def create_agent(sess, environment, eval_mode, summary_writer=None): return Actor_Critic_Agent(environment.observation_space, environment.action_space) tmp_base_dir = 'detmp/recsim/' # runner = runner_lib.EvalRunner( # base_dir=tmp_base_dir, # create_agent_fn=create_agent,