Python random_argmax Examples

Programming Language: Python

Namespace/Package Name: base

Method/Function: random_argmax

Examples at hotexamples.com: 7

Python random_argmax - 7 examples found. These are the top rated real world Python examples of base.random_argmax extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: policy.py Project: DemirTonchev/iambandit

 def pick_action(self, context):
     # context contains all context_vectors for all arms
     self.context = context
     self.means = np.array([
         self._compute_mean(self.theta_hat[i], context[i])
         for i in range(self.k_arms)
     ])
     return random_argmax(self.means)

Example #2

Show file

File: policy.py Project: DemirTonchev/iambandit

 def observe_reward(self, arm_idx, reward):
     self.arms_data[arm_idx].append(reward)
     n = len(self.arms_data[arm_idx])
     self.estimated_means[arm_idx] = reward / n + (
         n - 1) * self.estimated_means[arm_idx] / n
     self.pulls[arm_idx] += 1
     self.best_arm = random_argmax(self.estimated_means)
     self._t += 1

Example #3

Show file

File: policy.py Project: DemirTonchev/iambandit

 def pick_action(self):
     # try each arm once then compute UCBs
     if self._t > self.k_arms:
         self._UCBs = [
             self._bound_function(arm_idx) for arm_idx in self._arm_idxs
         ]
         arm_idx = random_argmax(self._UCBs)
     else:
         arm_idx = next(self._try_each_arm)
     return arm_idx

Example #4

Show file

File: policy.py Project: DemirTonchev/iambandit

 def pick_action(self):
     # try each arm once then compute UCBs
     if self._t > self.k_arms:
         self.Vs = self.sq_sums - self.estimated_means**2 + self.radius(
             self._t, self.pulls)
         self._UCBs = self.estimated_means + np.sqrt(
             (np.log(self._t) / self.pulls) * self.Vs)
         arm_idx = random_argmax(self._UCBs)
     else:
         arm_idx = next(self._try_each_arm)
     return arm_idx

Example #5

Show file

File: policy.py Project: DemirTonchev/iambandit

 def pick_action(self):
     # try each arm once then compute UCBs
     if self._t > self.k_arms:
         self._UCBs = self.estimated_means + self.radius(
             self._t, self.pulls)
         arm_idx = random_argmax(self._UCBs)
         if self.keep_history:
             self.UCB_history.append(self._UCBs.copy())
             self.means_history.append(self.estimated_means.copy())
             self.pulls_history.append(self.pulls.copy())
     else:
         arm_idx = next(self._try_each_arm)
     return arm_idx

Example #6

Show file

    def generate_context(self):
        """Generates context vector of indicators and computes current
        real reward probability
        """
        context = []
        context_vector = bernuolli(self.context_options)
        if self.add_bias:
            context_vector = np.append([1], context_vector)
        for i in range(self.k_arms):
            context.append(context_vector)
        # pull all arms to generate current mean, rewards and get the optimal one
        # agent/policy/algorithm knows only the context but means are not revealed
        self.current_rewards = [
            arm.pull(context_vector)
            for arm, context_vector in zip(self.arms, context)
        ]
        self.current_means = [arm.get_current_mean() for arm in self.arms]
        self.current_optimal_arm = random_argmax(self.current_means)
        self.current_optimal_mean = self.current_means[
            self.current_optimal_arm]

        return context

Example #7

Show file

File: policy.py Project: DemirTonchev/iambandit

 def pick_action(self):
     if self.keep_history:
         self.prior_data_history.append(self.prior_data.copy())
         self.means_history.append(self.estimated_means.copy())
         self.pulls_history.append(self.pulls.copy())
     return random_argmax(self._sample_from_arms())