def _save_test_set_predictions(self, agent: BanditAgent) -> None: print("Saving test set predictions...") obs: List[Dict[str, Any]] = self.test_data_frame.to_dict("records") self.clean() for ob in tqdm(obs, total=len(obs)): if self.embeddings_for_metadata is not None: ob[ITEM_METADATA_KEY] = self.embeddings_for_metadata else: ob[ITEM_METADATA_KEY] = None if (self.project_config.available_arms_column_name and len(ob[self.project_config.available_arms_column_name]) == 0): ob[self.project_config.available_arms_column_name] = [ ob[self.project_config.item_column.name] ] ( arm_contexts_list, arms_list, arm_indices_list, arm_scores_list, ) = self._prepare_for_agent(agent, obs) sorted_actions_list = [] proba_actions_list = [] for arm_contexts, arms, arm_indices, arm_scores in tqdm( zip(arm_contexts_list, arms_list, arm_indices_list, arm_scores_list), total=len(arm_contexts_list), ): sorted_actions, proba_actions = agent.rank( arms, arm_indices, arm_contexts, arm_scores ) sorted_actions_list.append(sorted_actions) proba_actions_list.append(proba_actions) action_scores_list = [ list(reversed(sorted(action_scores))) for action_scores in arm_scores_list ] del obs # Create evaluation file df = pd.read_csv(self.test_data_frame_path) df["sorted_actions"] = sorted_actions_list df["prob_actions"] = proba_actions_list df["action_scores"] = action_scores_list # join with train interaction information df_train = self.get_data_frame_interactions() df_train['trained'] = 1 df = df.merge(df_train, on = [self.project_config.user_column.name, self.project_config.item_column.name], how='left') df['trained'] = df['trained'].fillna(0) self.plot_scores([score for arm_scores in arm_scores_list for score in arm_scores]) self._to_csv_test_set_predictions(df)
def _act(self, agent: BanditAgent, ob: dict) -> int: ( arm_contexts_list, _, arm_indices_list, arm_scores_list, ) = self._prepare_for_agent(agent, [ob]) return agent.act(arm_indices_list[0], arm_contexts_list[0], arm_scores_list[0])
def create_agent(self) -> BanditAgent: bandit_class = load_attr(self.bandit_policy_class, Type[BanditPolicy]) bandit = bandit_class(reward_model=self.get_trained_module(), **self.bandit_policy_params) return BanditAgent(bandit)
def _save_test_set_predictions(self, agent: BanditAgent) -> None: print("Saving test set predictions...") if self.sample_size_eval and len( self.test_data_frame) > self.sample_size_eval: obs: List[Dict[str, Any]] = self.test_data_frame.sample( self.sample_size_eval, random_state=self.seed).to_dict("records") else: obs: List[Dict[str, Any]] = self.test_data_frame.to_dict("records") for ob in tqdm(obs, total=len(obs)): if self.embeddings_for_metadata is not None: ob[ITEM_METADATA_KEY] = self.embeddings_for_metadata else: ob[ITEM_METADATA_KEY] = None print("...prepare_for_agent") ( arm_contexts_list, arms_list, arm_indices_list, arm_scores_list, ) = self._prepare_for_agent(agent, obs) print("...") sorted_actions_list = [] proba_actions_list = [] for arm_contexts, arms, arm_indices, arm_scores in tqdm( zip(arm_contexts_list, arms_list, arm_indices_list, arm_scores_list), total=len(arm_contexts_list), ): sorted_actions, proba_actions = agent.rank(arms, arm_indices, arm_contexts, arm_scores) sorted_actions_list.append(sorted_actions) proba_actions_list.append(proba_actions) action_scores_list = [ list(reversed(sorted(action_scores))) for action_scores in arm_scores_list ] del obs # Create evaluation file df = pd.read_csv(self.test_data_frame_path) if self.sample_size_eval and len( self.test_data_frame) > self.sample_size_eval: df = df.sample(self.sample_size_eval, random_state=self.seed) df["sorted_actions"] = sorted_actions_list df["prob_actions"] = proba_actions_list df["action_scores"] = action_scores_list # join with train interaction information df_train = self.get_data_frame_interactions()[[ self.project_config.user_column.name, self.project_config.item_column.name ]] df_train['trained'] = 1 df = df.merge(df_train, on=[ self.project_config.user_column.name, self.project_config.item_column.name ], how='left') df['trained'] = df['trained'].fillna(0) # Add indexed information df['item_indexed'] = df[self.project_config.item_column.name].apply( lambda i: self.index_mapping[self.project_config.item_column.name][ str(i)] > 0) scores = [ score for arm_scores in arm_scores_list for score in arm_scores ] self.plot_scores(scores) self._to_csv_test_set_predictions(df)