def _get_arm_scores(self, agent: BanditAgent, ob_dataset: Dataset) -> List[float]: batch_sampler = FasterBatchSampler(ob_dataset, self.batch_size, shuffle=False) generator = NoAutoCollationDataLoader( ob_dataset, batch_sampler=batch_sampler, num_workers=self.generator_workers, pin_memory=self.pin_memory if self.device == "cuda" else False, ) model = agent.bandit.reward_model model.to(self.torch_device) model.eval() scores = [] with torch.no_grad(): for i, (x, _) in tqdm(enumerate(generator), total=len(generator)): input_params = x if isinstance(x, list) or isinstance( x, tuple) else [x] input_params = [ t.to(self.torch_device) if isinstance(t, torch.Tensor) else t for t in input_params ] scores_tensor: torch.Tensor = model.recommendation_score( *input_params) scores_batch: List[float] = scores_tensor.cpu().numpy( ).reshape(-1).tolist() scores.extend(scores_batch) return scores
def _direct_estimator_predict(self, df): _df = preprocess_interactions_data_frame( df.copy(), self.direct_estimator.project_config) transform_with_indexing(_df, self.direct_estimator.index_mapping, self.direct_estimator.project_config) dataset = InteractionsDataset( data_frame=_df, embeddings_for_metadata=self.direct_estimator. embeddings_for_metadata, project_config=self.direct_estimator.project_config, index_mapping=self.direct_estimator.index_mapping) batch_sampler = FasterBatchSampler(dataset, self.direct_estimator.batch_size, shuffle=False) data_loader = NoAutoCollationDataLoader(dataset, batch_sampler=batch_sampler) trial = (Trial( self.direct_estimator.get_trained_module(), criterion=lambda *args: torch.zeros(1, device=self.direct_estimator. torch_device, requires_grad=True), ).with_generators(val_generator=data_loader).to( self.direct_estimator.torch_device).eval()) with torch.no_grad(): rewards_tensor: torch.Tensor = trial.predict( verbose=0, data_key=torchbearer.VALIDATION_DATA) rewards: np.ndarray = rewards_tensor[:, 0].cpu().numpy() return rewards
def _get_arm_scores(self, agent: BanditAgent, ob_dataset: Dataset) -> List[float]: batch_sampler = FasterBatchSampler(ob_dataset, self.batch_size, shuffle=False) generator = NoAutoCollationDataLoader( ob_dataset, batch_sampler=batch_sampler, num_workers=self.generator_workers, pin_memory=self.pin_memory if self.device == "cuda" else False, ) trial = ( Trial( agent.bandit.reward_model, criterion=lambda *args: torch.zeros( 1, device=self.torch_device, requires_grad=True ), ) .with_test_generator(generator) .to(self.torch_device) .eval() ) with torch.no_grad(): model_output: Union[torch.Tensor, Tuple[torch.Tensor]] = trial.predict( verbose=0 ) scores_tensor: torch.Tensor = model_output if isinstance( model_output, torch.Tensor ) else model_output[0][0] scores: List[float] = scores_tensor.cpu().numpy().reshape(-1).tolist() return scores
def get_test_generator(self) -> DataLoader: batch_sampler = FasterBatchSampler(self.test_dataset, self.batch_size, shuffle=False) return NoAutoCollationDataLoader( self.test_dataset, batch_sampler=batch_sampler, num_workers=self.generator_workers, pin_memory=True if self.device == "cuda" else False, )
def get_val_generator(self) -> Optional[DataLoader]: if len(self.val_data_frame) == 0: return None batch_sampler = FasterBatchSampler(self.val_dataset, self.batch_size, shuffle=False) return NoAutoCollationDataLoader( self.val_dataset, batch_sampler=batch_sampler, num_workers=self.generator_workers, pin_memory=self.pin_memory if self.device == "cuda" else False, )
def get_test_generator(self, df) -> Optional[DataLoader]: dataset = InteractionsDataset( data_frame=df, embeddings_for_metadata=self.model_training. embeddings_for_metadata, project_config=self.model_training.project_config, index_mapping=self.model_training.index_mapping) batch_sampler = FasterBatchSampler(dataset, self.batch_size, shuffle=False) return NoAutoCollationDataLoader( dataset, batch_sampler=batch_sampler, num_workers=self.generator_workers, pin_memory=self.pin_memory if self.device == "cuda" else False, )
def fill_ps(self, df: pd.DataFrame, pool: Pool): policy_estimator_df = preprocess_interactions_data_frame(df.copy(), self.policy_estimator.project_config) transform_with_indexing( policy_estimator_df, self.policy_estimator.index_mapping, self.policy_estimator.project_config, ) if self.available_arms_column: policy_estimator_df[self.available_arms_column] = policy_estimator_df[ self.available_arms_column ].map( functools.partial( map_array, mapping=self.policy_estimator.index_mapping[ self.policy_estimator.project_config.item_column.name ], ) ) dataset = InteractionsDataset( data_frame=policy_estimator_df, embeddings_for_metadata=self.policy_estimator.embeddings_for_metadata, project_config=self.policy_estimator.project_config, index_mapping=self.policy_estimator.index_mapping ) batch_sampler = FasterBatchSampler( dataset, self.policy_estimator.batch_size, shuffle=False ) data_loader = NoAutoCollationDataLoader(dataset, batch_sampler=batch_sampler) #from IPython import embed #embed() trial = ( Trial( self.policy_estimator.get_trained_module(), criterion=lambda *args: torch.zeros( 1, device=self.policy_estimator.torch_device, requires_grad=True ), ) .with_generators(val_generator=data_loader) .to(self.policy_estimator.torch_device) .eval() ) with torch.no_grad(): log_probas: torch.Tensor = trial.predict( verbose=0, data_key=torchbearer.VALIDATION_DATA ) probas: np.ndarray = torch.exp(log_probas).cpu().numpy() item_indices = policy_estimator_df[self.item_column] params = ( zip(item_indices, probas, policy_estimator_df[self.available_arms_column]) if self.available_arms_column else zip(item_indices, probas) ) # from IPython import embed; embed() df[self.propensity_score_column] = list( tqdm(pool.starmap(_get_ps_from_probas, params), total=len(df)) )