def algorithm(action): if action.mean_cost == 0.0: return MAX ucb_value = ucb(action.parent.N, action.N) return action.mean_reward / action.mean_cost + c * ( (1. + 1. / min_cost) * ucb_value) / (min_cost - ucb_value)
def algorithm(action): if action.mean_cost == 0.0: return MAX return action.V + c0 * action.parent.budget * ucb( action.parent.N, action.N)
def algorithm(action): return action.V + c * ucb(action.parent.N, action.N)