Ejemplo n.º 1
0
def create_multiclick_environment(env_config):
    """Creates an interest evolution environment."""
    def choice_model_ctor(*args, **kwargs):
        return choice_model.DependentClickModel(
            next_probs=[0.8**(i + 1) for i in range(env_config["slate_size"])],
            slate_size=env_config["slate_size"],
            score_scaling=1.0,
        )

    user_model = MulticlickIEvUserModel(
        env_config["slate_size"],
        choice_model_ctor=choice_model_ctor,
        response_model_ctor=interest_evolution.IEvResponse,
        user_state_ctor=UserState,
        seed=env_config["seed"],
    )

    document_sampler = interest_evolution.UtilityModelVideoSampler(
        doc_ctor=interest_evolution.IEvVideo, seed=env_config["seed"])

    ievenv = environment.Environment(
        user_model,
        document_sampler,
        env_config["num_candidates"],
        env_config["slate_size"],
        resample_documents=env_config["resample_documents"],
    )

    return recsim_gym.RecSimGymEnv(
        ievenv,
        interest_evolution.clicked_watchtime_reward,
        utils.aggregate_video_cluster_metrics,
        utils.write_video_cluster_metrics,
    )
Ejemplo n.º 2
0
  def test_step(self):
    # Create a simple user
    slate_size = 2
    num_candidates = 5
    action_space = spaces.MultiDiscrete(num_candidates * np.ones((slate_size,)))
    user_model = ie.IEUserModel(
        slate_size,
        user_state_ctor=ie.IEUserState,
        response_model_ctor=ie.IEResponse)

    # Create a set of documents
    document_sampler = ie.IETopicDocumentSampler(seed=1)
    ieenv = environment.Environment(
        user_model,
        document_sampler,
        num_candidates,
        slate_size,
        resample_documents=True)

    # Create agent
    agent = greedy_pctr_agent.GreedyPCTRAgent(action_space,
                                              user_model.avg_user_state)

    # This agent doesn't use the previous user response
    observation, documents = ieenv.reset()
    slate = agent.step(1, dict(user=observation, doc=documents))
    scores = [
        user_model.avg_user_state.score_document(doc_obs)
        for doc_obs in list(documents.values())
    ]
    expected_slate = sorted(np.argsort(scores)[-2:])
    self.assertAllEqual(sorted(slate), expected_slate)
Ejemplo n.º 3
0
def test_env():
    path = '../master_capston/the-movies-dataset/'
    features_embedding_movies = pd.read_csv(
        os.path.join(path, 'movie_embedding_features.csv'))
    sampler = LTSDocumentSampler(dataset=features_embedding_movies)

    # this mean the number of items in the recommendation return from the agent
    slate_size = 3

    # i am assuming this number mean the # of possible items to send to the agent for recommend for each slate
    num_candidates = 10

    format_data = data_preprocess.load_data(path)
    # print(format_data.head())
    # print(format_data.shape)

    features_embedding_movies = pd.read_csv(
        os.path.join(path, 'movie_embedding_features.csv'))
    positive_user_ids, positive_history_data = data_preprocess.get_user_positive(
        format_data)
    user_sampler = LTSStaticUserSampler(positive_user_ids,
                                        positive_history_data,
                                        features_embedding_movies)

    LTSUserModel = UserModel(user_sampler, slate_size, LTSResponse)

    ltsenv = environment.Environment(LTSUserModel,
                                     sampler,
                                     num_candidates,
                                     slate_size,
                                     resample_documents=True)
    lts_gym_env = recsim_gym.RecSimGymEnv(ltsenv, clicked_engagement_reward)

    observation_0 = lts_gym_env.reset()
    # print(observation_0['user'][:5])
    # print('Observation 0')
    # print('Available documents')
    # doc_strings = ['doc_id ' + key + " kaleness " + str(value) for key, value
    #                in observation_0['doc'].items()]
    # print('\n'.join(doc_strings))

    recommendation_slate_0 = [0, 1, 2]
    observation_1, reward, done, _ = lts_gym_env.step(recommendation_slate_0)

    print(observation_1['user'][:5])
    # print('Noisy user state observation')
    # print(observation_0['user'])

    print(lts_gym_env.observation_space)
    print(lts_gym_env.action_space)


# test_doc_model()

# test_user_model()
# test_env()
Ejemplo n.º 4
0
def create_env():
    slate_size = 3
    num_candidates = 10
    ltsenv = environment.Environment(LTSUserModel(slate_size),
                                     LTSDocumentSampler(),
                                     num_candidates,
                                     slate_size,
                                     resample_documents=True)

    lts_gym_env = recsim_gym.RecSimGymEnv(ltsenv, clicked_engagement_reward)
Ejemplo n.º 5
0
 def setUp(self):
     super(EnvironmentTest, self).setUp()
     self._slate_size = 2
     self._num_candidates = 20
     user_model = ie.IEUserModel(self._slate_size,
                                 user_state_ctor=ie.IEUserState,
                                 response_model_ctor=ie.IEResponse)
     document_sampler = ie.IETopicDocumentSampler()
     self._environment = environment.Environment(user_model,
                                                 document_sampler,
                                                 self._num_candidates,
                                                 self._slate_size)
def build_restaurant_recs_recsim_env(config):
    """Returns a recsim_gym environment object."""
    _, user_model = build_user_components(config)
    restaurants, restaurant_sampler = build_document_components(config)
    env = environment.Environment(user_model=user_model,
                                  document_sampler=restaurant_sampler,
                                  num_candidates=len(restaurants),
                                  slate_size=1,
                                  resample_documents=False)
    reward_aggregator = functools.partial(weighted_reward,
                                          weights=config.reward_weights)
    return recsim_gym.RecSimGymEnv(env, reward_aggregator)
Ejemplo n.º 7
0
def create_environment(env_config):
    """Creates a long-term satisfaction environment."""

    user_model = LTSUserModel(env_config['slate_size'],
                              user_state_ctor=LTSUserState,
                              response_model_ctor=LTSResponse)

    document_sampler = LTSDocumentSampler()

    ltsenv = environment.Environment(
        user_model,
        document_sampler,
        env_config['num_candidates'],
        env_config['slate_size'],
        resample_documents=env_config['resample_documents'])

    return recsim_gym.RecSimGymEnv(ltsenv, clicked_engagement_reward)
Ejemplo n.º 8
0
def create_environment(env_config):
    """Creates an simple sequential testing environment."""
    if env_config['num_candidates'] < 4:
        raise ValueError('num_candidates must be at least 4.')

    SimpleSequentialResponse.MAX_DOC_ID = env_config['num_candidates'] - 1
    user_model = SimpleSequentialUserModel(
        env_config['slate_size'],
        seed=env_config['seed'],
        starting_probs=env_config['starting_probs'])
    document_sampler = SimpleSequentialDocumentSampler(seed=env_config['seed'])
    simple_seq_env = environment.Environment(
        user_model,
        document_sampler,
        env_config['num_candidates'],
        env_config['slate_size'],
        resample_documents=env_config['resample_documents'])

    return recsim_gym.RecSimGymEnv(simple_seq_env, total_reward,
                                   lambda _, __, ___: None, lambda _, __: None)
Ejemplo n.º 9
0
def create_multiclick_environment(env_config, choice_model):
    document_sampler = IETopicDocumentSampler(seed=env_config['seed'])
    IEDocument.NUM_CLUSTERS = document_sampler.num_clusters
    IEResponse.NUM_CLUSTERS = document_sampler.num_clusters

    user_model = MulticlickIEUserModel(env_config['slate_size'],
                                       user_state_ctor=IEUserState,
                                       response_model_ctor=IEResponse,
                                       seed=env_config['seed'],
                                       choice_model=choice_model)

    ieenv = environment.Environment(
        user_model,
        document_sampler,
        env_config['num_candidates'],
        env_config['slate_size'],
        resample_documents=env_config['resample_documents'])
    return recsim_gym.RecSimGymEnv(ieenv, total_clicks_reward,
                                   utils.aggregate_video_cluster_metrics,
                                   utils.write_video_cluster_metrics)
Ejemplo n.º 10
0
  def test_slate_indices_and_length(self):
    # Initialize agent
    slate_size = 2
    num_candidates = 100
    action_space = spaces.MultiDiscrete(num_candidates * np.ones((slate_size,)))

    user_model = iev.IEvUserModel(
        slate_size,
        choice_model_ctor=choice_model.MultinomialLogitChoiceModel,
        response_model_ctor=iev.IEvResponse)
    agent = random_agent.RandomAgent(action_space, random_seed=0)

    # Create a set of documents
    document_sampler = iev.IEvVideoSampler()
    ievenv = environment.Environment(user_model, document_sampler,
                                     num_candidates, slate_size)

    # Test that slate indices in correct range and length is correct
    observation, documents = ievenv.reset()
    slate = agent.step(1, dict(user=observation, doc=documents))
    self.assertLen(slate, slate_size)
    self.assertAllInSet(slate, range(num_candidates))
Ejemplo n.º 11
0
  def test_step(self):
    # Create a simple user
    slate_size = 2
    user_model = iev.IEvUserModel(
        slate_size,
        choice_model_ctor=choice_model.MultinomialLogitChoiceModel,
        response_model_ctor=iev.IEvResponse)

    # Create a candidate_set with 5 items
    num_candidates = 5
    document_sampler = iev.IEvVideoSampler()
    ievsim = environment.Environment(user_model, document_sampler,
                                     num_candidates, slate_size)

    # Create agent
    action_space = spaces.MultiDiscrete(num_candidates * np.ones((slate_size,)))
    agent = random_agent.RandomAgent(action_space, random_seed=0)

    # This agent doesn't use the previous user response
    observation, documents = ievsim.reset()
    slate = agent.step(1, dict(user=observation, doc=documents))
    self.assertAllEqual(slate, [2, 0])
Ejemplo n.º 12
0
def create_environment(env_config):
    """Creates an interest evolution environment."""

    user_model = IEvUserModel(
        env_config['slate_size'],
        choice_model_ctor=choice_model.MultinomialProportionalChoiceModel,
        response_model_ctor=IEvResponse,
        user_state_ctor=IEvUserState,
        seed=env_config['seed'])

    document_sampler = IEvVideoSampler(doc_ctor=IEvVideo,
                                       seed=env_config['seed'])

    ievenv = environment.Environment(
        user_model,
        document_sampler,
        env_config['num_candidates'],
        env_config['slate_size'],
        resample_documents=env_config['resample_documents'])

    return recsim_gym.RecSimGymEnv(ievenv, env_config['reward_function'],
                                   utils.aggregate_video_cluster_metrics,
                                   utils.write_video_cluster_metrics)
def _build_components(deterministic_transitions=False):
    """Returns recsim components."""
    rec_types = [
        restaurant_toy_recsim.RestaurantType.JUNK,
        restaurant_toy_recsim.RestaurantType.HEALTHY
    ]
    user_states = ['Neutral', 'UnhealthySt', 'HealthySt']
    num_states = len(user_states)
    num_actions = len(rec_types)

    transition_matrix_constructor = (
        _always_moving_transition_matrix if deterministic_transitions else
        restaurant_toy_recsim.TransitionMatrix.RandomMatrix)

    user_config = restaurant_toy_recsim.UserConfig(
        user_states_names=user_states,
        state_transition_matrix=transition_matrix_constructor(
            num_states, num_actions),
        reward_matrix=np.random.rand(num_states, num_actions))

    seeds = restaurant_toy_recsim.SimulationSeeds(2, 5)
    config = restaurant_toy_recsim.EnvConfig(user_config, rec_types, seeds)
    user_sampler, user_model = restaurant_toy_recsim.build_user_components(
        config)
    restaurants, document_sampler = restaurant_toy_recsim.build_document_components(
        config)

    env = environment.Environment(user_model,
                                  document_sampler,
                                  num_candidates=num_actions,
                                  slate_size=1,
                                  resample_documents=False)

    recsim_env = recsim_gym.RecSimGymEnv(env,
                                         restaurant_toy_recsim.rating_reward)
    return (config, user_sampler, user_model, restaurants, document_sampler,
            recsim_env)
num_candidates = 15

format_data = data_preprocess.load_data(path)
# print(format_data.head())
# print(format_data.shape)

features_embedding_movies = pd.read_csv(
    os.path.join(path, 'movie_embedding_features.csv'))
positive_user_ids, positive_history_data = data_preprocess.get_user_positive(
    format_data)
user_sampler = LTSStaticUserSampler(positive_user_ids, positive_history_data,
                                    features_embedding_movies)
LTSUserModel = UserModel(user_sampler, slate_size, LTSResponse)
ltsenv = environment.Environment(LTSUserModel,
                                 sampler,
                                 num_candidates,
                                 slate_size,
                                 resample_documents=True)
lts_gym_env = recsim_gym.RecSimGymEnv(ltsenv, clicked_engagement_reward)


def create_agent(sess, environment, eval_mode, summary_writer=None):
    return Actor_Critic_Agent(environment.observation_space,
                              environment.action_space)


tmp_base_dir = 'detmp/recsim/'

# runner = runner_lib.EvalRunner(
#   base_dir=tmp_base_dir,
#   create_agent_fn=create_agent,