def _predict_contexts(self, contexts: np.ndarray, is_predict: bool, seeds: Optional[np.ndarray] = None, start_index: Optional[int] = None) -> List: # Copy learning policy object lp_list = deepcopy(self.lp_list) # Identify the cluster for each context to predict cluster_predictions = self.kmeans.predict(contexts) # Obtain prediction for each context predictions = [None] * len(contexts) for index, row in enumerate(contexts): row_2d = row[np.newaxis, :] cluster = cluster_predictions[index] # Set random state lp_list[cluster].rng = create_rng(seed=seeds[index]) # Predict based on the cluster if is_predict: predictions[index] = lp_list[cluster].predict(row_2d) else: predictions[index] = lp_list[cluster].predict_expectations(row_2d) # Return the list of predictions return predictions
def _predict_contexts(self, contexts: np.ndarray, is_predict: bool, seeds: Optional[np.ndarray] = None, start_index: Optional[int] = None) -> List: # Copy Learning Policy object and set random state lp = deepcopy(self.lp) # Create an empty list of predictions predictions = [None] * len(contexts) # For each row in the given contexts for index, row in enumerate(contexts): # Get random generator lp.rng = create_rng(seed=seeds[index]) # Calculate the distances from the historical contexts # Row is 1D so convert it to 2D array for cdist using newaxis # Finally, reshape to flatten the output distances list row_2d = row[np.newaxis, :] distances_to_row = cdist(self.contexts, row_2d, metric=self.metric).reshape(-1) # Find the k nearest neighbor indices indices = np.argpartition(distances_to_row, self.k - 1)[:self.k] predictions[index] = self._get_nhood_predictions( lp, indices, row_2d, is_predict) # Return the list of predictions return predictions
def _predict_contexts(self, contexts: np.ndarray, is_predict: bool, seeds: Optional[np.ndarray] = None, start_index: Optional[int] = None) -> List: # Copy learning policy object lp = deepcopy(self.lp) # Create an empty list of predictions predictions = [None] * len(contexts) # For each row in the given contexts for index, row in enumerate(contexts): # Get random generator lp.rng = create_rng(seed=seeds[index]) # Calculate the distances from the historical contexts # Row is 1D so convert it to 2D array for cdist using newaxis # Finally, reshape to flatten the output distances list row_2d = row[np.newaxis, :] distances_to_row = cdist(self.contexts, row_2d, metric=self.metric).reshape(-1) # Find the neighbor indices within the radius # np.where with a condition returns a tuple where the first element is an array of indices indices = np.where(distances_to_row <= self.radius) # If neighbors exist if indices[0].size > 0: predictions[index] = self._get_nhood_predictions( lp, indices, row_2d, is_predict) else: # When there are no neighbors predictions[index] = self._get_no_nhood_predictions( lp, is_predict) # Return the list of predictions return predictions
next_max_arm = argmax(arm_to_exp) next_max_exp = arm_to_exp[next_max_arm] print("Next max arm: ", next_max_arm, " with expectation: ", next_max_exp) # Regret between best and the second best decision regret = max_exp - next_max_exp print("Regret: ", regret, " margin: ", self.margin) # Return the arm with maximum expectation # if and only if the regret beats the given operational margin return max_arm if regret >= self.margin else next_max_arm # Random number generator rng = create_rng(123456) # Arms options = [1, 2] # Historical data of layouts decisions and corresponding rewards layouts = np.array([1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1]) revenues = np.array([10, 17, 22, 9, 4, 0, 7, 8, 20, 9, 50, 5, 7, 12, 10]) # Custom greedy learning policy with high operational cost margin greedy = CustomGreedy(rng, options, 1, 5.0) # Learn from previous layouts decisions and revenues generated greedy.fit(decisions=layouts, rewards=revenues) # Predict the next best layouts decision