Exemple #1
0
    def _predict_contexts(self, contexts: np.ndarray, is_predict: bool,
                          seeds: Optional[np.ndarray] = None, start_index: Optional[int] = None) -> List:

        # Copy learning policy object
        lp_list = deepcopy(self.lp_list)

        # Identify the cluster for each context to predict
        cluster_predictions = self.kmeans.predict(contexts)

        # Obtain prediction for each context
        predictions = [None] * len(contexts)
        for index, row in enumerate(contexts):
            row_2d = row[np.newaxis, :]
            cluster = cluster_predictions[index]

            # Set random state
            lp_list[cluster].rng = create_rng(seed=seeds[index])

            # Predict based on the cluster
            if is_predict:
                predictions[index] = lp_list[cluster].predict(row_2d)
            else:
                predictions[index] = lp_list[cluster].predict_expectations(row_2d)

        # Return the list of predictions
        return predictions
Exemple #2
0
    def _predict_contexts(self,
                          contexts: np.ndarray,
                          is_predict: bool,
                          seeds: Optional[np.ndarray] = None,
                          start_index: Optional[int] = None) -> List:

        # Copy Learning Policy object and set random state
        lp = deepcopy(self.lp)

        # Create an empty list of predictions
        predictions = [None] * len(contexts)

        # For each row in the given contexts
        for index, row in enumerate(contexts):

            # Get random generator
            lp.rng = create_rng(seed=seeds[index])

            # Calculate the distances from the historical contexts
            # Row is 1D so convert it to 2D array for cdist using newaxis
            # Finally, reshape to flatten the output distances list
            row_2d = row[np.newaxis, :]
            distances_to_row = cdist(self.contexts, row_2d,
                                     metric=self.metric).reshape(-1)

            # Find the k nearest neighbor indices
            indices = np.argpartition(distances_to_row, self.k - 1)[:self.k]

            predictions[index] = self._get_nhood_predictions(
                lp, indices, row_2d, is_predict)

        # Return the list of predictions
        return predictions
Exemple #3
0
    def _predict_contexts(self,
                          contexts: np.ndarray,
                          is_predict: bool,
                          seeds: Optional[np.ndarray] = None,
                          start_index: Optional[int] = None) -> List:

        # Copy learning policy object
        lp = deepcopy(self.lp)

        # Create an empty list of predictions
        predictions = [None] * len(contexts)

        # For each row in the given contexts
        for index, row in enumerate(contexts):

            # Get random generator
            lp.rng = create_rng(seed=seeds[index])

            # Calculate the distances from the historical contexts
            # Row is 1D so convert it to 2D array for cdist using newaxis
            # Finally, reshape to flatten the output distances list
            row_2d = row[np.newaxis, :]
            distances_to_row = cdist(self.contexts, row_2d,
                                     metric=self.metric).reshape(-1)

            # Find the neighbor indices within the radius
            # np.where with a condition returns a tuple where the first element is an array of indices
            indices = np.where(distances_to_row <= self.radius)

            # If neighbors exist
            if indices[0].size > 0:
                predictions[index] = self._get_nhood_predictions(
                    lp, indices, row_2d, is_predict)
            else:  # When there are no neighbors
                predictions[index] = self._get_no_nhood_predictions(
                    lp, is_predict)

        # Return the list of predictions
        return predictions
Exemple #4
0
        next_max_arm = argmax(arm_to_exp)
        next_max_exp = arm_to_exp[next_max_arm]
        print("Next max arm: ", next_max_arm, " with expectation: ",
              next_max_exp)

        # Regret between best and the second best decision
        regret = max_exp - next_max_exp
        print("Regret: ", regret, " margin: ", self.margin)

        # Return the arm with maximum expectation
        # if and only if the regret beats the given operational margin
        return max_arm if regret >= self.margin else next_max_arm


# Random number generator
rng = create_rng(123456)

# Arms
options = [1, 2]

# Historical data of layouts decisions and corresponding rewards
layouts = np.array([1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1])
revenues = np.array([10, 17, 22, 9, 4, 0, 7, 8, 20, 9, 50, 5, 7, 12, 10])

# Custom greedy learning policy with high operational cost margin
greedy = CustomGreedy(rng, options, 1, 5.0)

# Learn from previous layouts decisions and revenues generated
greedy.fit(decisions=layouts, rewards=revenues)

# Predict the next best layouts decision