Beispiel #1
0
    def _get_arm_scores(self, agent: BanditAgent,
                        ob_dataset: Dataset) -> List[float]:
        batch_sampler = FasterBatchSampler(ob_dataset,
                                           self.batch_size,
                                           shuffle=False)
        generator = NoAutoCollationDataLoader(
            ob_dataset,
            batch_sampler=batch_sampler,
            num_workers=self.generator_workers,
            pin_memory=self.pin_memory if self.device == "cuda" else False,
        )

        model = agent.bandit.reward_model
        model.to(self.torch_device)
        model.eval()
        scores = []

        with torch.no_grad():
            for i, (x, _) in tqdm(enumerate(generator), total=len(generator)):
                input_params = x if isinstance(x, list) or isinstance(
                    x, tuple) else [x]
                input_params = [
                    t.to(self.torch_device)
                    if isinstance(t, torch.Tensor) else t for t in input_params
                ]

                scores_tensor: torch.Tensor = model.recommendation_score(
                    *input_params)
                scores_batch: List[float] = scores_tensor.cpu().numpy(
                ).reshape(-1).tolist()
                scores.extend(scores_batch)

        return scores
Beispiel #2
0
    def _direct_estimator_predict(self, df):
        _df = preprocess_interactions_data_frame(
            df.copy(), self.direct_estimator.project_config)
        transform_with_indexing(_df, self.direct_estimator.index_mapping,
                                self.direct_estimator.project_config)

        dataset = InteractionsDataset(
            data_frame=_df,
            embeddings_for_metadata=self.direct_estimator.
            embeddings_for_metadata,
            project_config=self.direct_estimator.project_config,
            index_mapping=self.direct_estimator.index_mapping)
        batch_sampler = FasterBatchSampler(dataset,
                                           self.direct_estimator.batch_size,
                                           shuffle=False)
        data_loader = NoAutoCollationDataLoader(dataset,
                                                batch_sampler=batch_sampler)

        trial = (Trial(
            self.direct_estimator.get_trained_module(),
            criterion=lambda *args: torch.zeros(1,
                                                device=self.direct_estimator.
                                                torch_device,
                                                requires_grad=True),
        ).with_generators(val_generator=data_loader).to(
            self.direct_estimator.torch_device).eval())

        with torch.no_grad():
            rewards_tensor: torch.Tensor = trial.predict(
                verbose=0, data_key=torchbearer.VALIDATION_DATA)
        rewards: np.ndarray = rewards_tensor[:, 0].cpu().numpy()

        return rewards
Beispiel #3
0
    def _get_arm_scores(self, agent: BanditAgent, ob_dataset: Dataset) -> List[float]:
        batch_sampler = FasterBatchSampler(ob_dataset, self.batch_size, shuffle=False)
        generator = NoAutoCollationDataLoader(
            ob_dataset,
            batch_sampler=batch_sampler,
            num_workers=self.generator_workers,
            pin_memory=self.pin_memory if self.device == "cuda" else False,
        )

        trial = (
            Trial(
                agent.bandit.reward_model,
                criterion=lambda *args: torch.zeros(
                    1, device=self.torch_device, requires_grad=True
                ),
            )
            .with_test_generator(generator)
            .to(self.torch_device)
            .eval()
        )

        with torch.no_grad():
            model_output: Union[torch.Tensor, Tuple[torch.Tensor]] = trial.predict(
                verbose=0
            )

        scores_tensor: torch.Tensor = model_output if isinstance(
            model_output, torch.Tensor
        ) else model_output[0][0]
        scores: List[float] = scores_tensor.cpu().numpy().reshape(-1).tolist()

        return scores
Beispiel #4
0
 def get_test_generator(self) -> DataLoader:
     batch_sampler = FasterBatchSampler(self.test_dataset,
                                        self.batch_size,
                                        shuffle=False)
     return NoAutoCollationDataLoader(
         self.test_dataset,
         batch_sampler=batch_sampler,
         num_workers=self.generator_workers,
         pin_memory=True if self.device == "cuda" else False,
     )
Beispiel #5
0
 def get_val_generator(self) -> Optional[DataLoader]:
     if len(self.val_data_frame) == 0:
         return None
     batch_sampler = FasterBatchSampler(self.val_dataset,
                                        self.batch_size,
                                        shuffle=False)
     return NoAutoCollationDataLoader(
         self.val_dataset,
         batch_sampler=batch_sampler,
         num_workers=self.generator_workers,
         pin_memory=self.pin_memory if self.device == "cuda" else False,
     )
Beispiel #6
0
    def get_test_generator(self, df) -> Optional[DataLoader]:

        dataset = InteractionsDataset(
            data_frame=df,
            embeddings_for_metadata=self.model_training.
            embeddings_for_metadata,
            project_config=self.model_training.project_config,
            index_mapping=self.model_training.index_mapping)

        batch_sampler = FasterBatchSampler(dataset,
                                           self.batch_size,
                                           shuffle=False)

        return NoAutoCollationDataLoader(
            dataset,
            batch_sampler=batch_sampler,
            num_workers=self.generator_workers,
            pin_memory=self.pin_memory if self.device == "cuda" else False,
        )
Beispiel #7
0
    def fill_ps(self, df: pd.DataFrame, pool: Pool):
        policy_estimator_df = preprocess_interactions_data_frame(df.copy(), self.policy_estimator.project_config)
        transform_with_indexing(
            policy_estimator_df,
            self.policy_estimator.index_mapping,
            self.policy_estimator.project_config,
        )

        if self.available_arms_column:
            policy_estimator_df[self.available_arms_column] = policy_estimator_df[
                self.available_arms_column
            ].map(
                functools.partial(
                    map_array,
                    mapping=self.policy_estimator.index_mapping[
                        self.policy_estimator.project_config.item_column.name
                    ],
                )
            )

        dataset = InteractionsDataset(
            data_frame=policy_estimator_df,
            embeddings_for_metadata=self.policy_estimator.embeddings_for_metadata,
            project_config=self.policy_estimator.project_config,
            index_mapping=self.policy_estimator.index_mapping
        )
        batch_sampler = FasterBatchSampler(
            dataset, self.policy_estimator.batch_size, shuffle=False
        )
        data_loader = NoAutoCollationDataLoader(dataset, batch_sampler=batch_sampler)
        #from IPython import embed
        #embed()
        trial = (
            Trial(
                self.policy_estimator.get_trained_module(),
                criterion=lambda *args: torch.zeros(
                    1, device=self.policy_estimator.torch_device, requires_grad=True
                ),
            )
            .with_generators(val_generator=data_loader)
            .to(self.policy_estimator.torch_device)
            .eval()
        )

        with torch.no_grad():
            log_probas: torch.Tensor = trial.predict(
                verbose=0, data_key=torchbearer.VALIDATION_DATA
            )
        probas: np.ndarray = torch.exp(log_probas).cpu().numpy()

        item_indices = policy_estimator_df[self.item_column]

        params = (
            zip(item_indices, probas, policy_estimator_df[self.available_arms_column])
            if self.available_arms_column
            else zip(item_indices, probas)
        )


        # from IPython import embed; embed()
        df[self.propensity_score_column] = list(
            tqdm(pool.starmap(_get_ps_from_probas, params), total=len(df))
        )