def optimal(self, epsilon=10**-9): """Compute optimal play for pig game """ goal = self.values['goal'] def pwin(probabilities, i, j, k): """Probability of winning for a given situation """ if i + k >= goal: return 1.0 elif j >= goal: return 0.0 else: return probabilities[i][j][k] def action_probs(indexes, probabilities): """Compute the probability of winning for the different possibles actions """ roll = 0.0 i, j, k = indexes wrong = self.data.get(['dice', 'wrong']) for value in range(1, self.values['dice'] + 1): if value in wrong: roll += (1.0 - pwin(probabilities, j, i, 0)) else: roll += pwin(probabilities, i, j, k + value) roll /= self.values['dice'] return [ (True, roll), (False, 1 - pwin(probabilities, j, i + k, 0)), ] values = markov.value_iteration([ lambda : goal, lambda i : goal, lambda i, j : goal - i, ], epsilon, action_probs, True) result = list() for i in range(goal): result.append(list()) for j in range(goal): try: todo = min(k for k in range(goal) if not values[i][j][k]) result[i].append(todo) except ValueError: result[i].append(goal) return result
def optimal(self, epsilon=10**-5, max_dice=50): """Determine optimal play using markov process """ def pwin(probabilities, i, j): """Compute probability to win for a given situation """ if i >= self.goal: return 1.0 elif j >= self.goal: return 0.0 else: return probabilities[i][j] dice_probs = dices.dice_probability(self.dice, max_dice, self.wrong) def action_probs(indexes, probabilities): """Compute the probability of winning for the different possibles actions """ probs = list() i, j = indexes for k in range(1, max_dice + 1): total_prob = self.dice - len(self.wrong) total_prob = 1 - (total_prob / self.dice) ** k roll = total_prob * (1 - pwin(probabilities, j, i)) for result, prob in dice_probs[k]: roll += prob * (1 - pwin(probabilities, j, i + result)) probs.append((k, roll)) return probs result = markov.value_iteration([ lambda : self.goal, lambda i : self.goal, ], epsilon, action_probs, True) return result