def filter(self, interactions: Iterable[Interaction]) -> Iterable[Interaction]: rng = CobaRandom(self._seed) interactions = list(interactions) for i in range(int(len(interactions) / (self._spacing + 1))): interactions.insert( i * self._spacing + rng.randint(0, self._spacing), interactions.pop()) return interactions
def __init__(self, n_interactions: int = 500, n_actions: int = 10, n_features: int = 10, context_features: bool = True, action_features: bool = True, sparse: bool = False, seed: int = 1) -> None: self._n_bandits = n_actions self._n_features = n_features self._context_features = context_features self._action_features = action_features self._seed = seed r = CobaRandom(seed) context: Callable[[int], Context] actions: Callable[[int, Context], Sequence[Action]] rewards: Callable[[int, Context, Action], float] sparsify = lambda x: (tuple(range(len(x))), tuple(x) ) if sparse else tuple(x) unsparse = lambda x: x[1] if sparse else x normalize = lambda X: [x / sum(X) for x in X] if not context_features and not action_features: means = [ m / n_actions + 1 / (2 * n_actions) for m in r.randoms(n_actions) ] actions_features = [] for i in range(n_actions): action = [0] * n_actions action[i] = 1 actions_features.append(tuple(action)) context = lambda i: None actions = lambda i, c: sparsify(actions_features) rewards = lambda i, c, a: means[unsparse(a).index(1)] + (r.random( ) - .5) / n_actions if context_features and not action_features: #normalizing allows us to make sure our reward is in [0,1] bandit_thetas = [r.randoms(n_features) for _ in range(n_actions)] theta_totals = [sum(theta) for theta in bandit_thetas] bandit_thetas = [[ t / norm for t in theta ] for theta, norm in zip(bandit_thetas, theta_totals)] actions_features = [] for i in range(n_actions): action = [0] * n_actions action[i] = 1 actions_features.append(tuple(action)) context = lambda i: sparsify(r.randoms(n_features)) actions = lambda i, c: [sparsify(af) for af in actions_features] rewards = lambda i, c, a: sum([ cc * t for cc, t in zip(unsparse(c), bandit_thetas[unsparse(a). index(1)]) ]) if not context_features and action_features: theta = r.randoms(n_features) context = lambda i: None actions = lambda i, c: [ sparsify(normalize(r.randoms(n_features))) for _ in range(r.randint(2, 10)) ] rewards = lambda i, c, a: float( sum([cc * t for cc, t in zip(theta, unsparse(a))])) if context_features and action_features: context = lambda i: sparsify(r.randoms(n_features)) actions = lambda i, c: [ sparsify(normalize(r.randoms(n_features))) for _ in range(r.randint(2, 10)) ] rewards = lambda i, c, a: sum( [cc * t for cc, t in zip(unsparse(c), unsparse(a))]) super().__init__(n_interactions, context, actions, rewards)