예제 #1
0
    def filter(self,
               interactions: Iterable[Interaction]) -> Iterable[Interaction]:

        rng = CobaRandom(self._seed)
        interactions = list(interactions)

        for i in range(int(len(interactions) / (self._spacing + 1))):
            interactions.insert(
                i * self._spacing + rng.randint(0, self._spacing),
                interactions.pop())

        return interactions
예제 #2
0
    def __init__(self,
                 n_interactions: int = 500,
                 n_actions: int = 10,
                 n_features: int = 10,
                 context_features: bool = True,
                 action_features: bool = True,
                 sparse: bool = False,
                 seed: int = 1) -> None:

        self._n_bandits = n_actions
        self._n_features = n_features
        self._context_features = context_features
        self._action_features = action_features
        self._seed = seed

        r = CobaRandom(seed)

        context: Callable[[int], Context]
        actions: Callable[[int, Context], Sequence[Action]]
        rewards: Callable[[int, Context, Action], float]

        sparsify = lambda x: (tuple(range(len(x))), tuple(x)
                              ) if sparse else tuple(x)
        unsparse = lambda x: x[1] if sparse else x
        normalize = lambda X: [x / sum(X) for x in X]

        if not context_features and not action_features:

            means = [
                m / n_actions + 1 / (2 * n_actions)
                for m in r.randoms(n_actions)
            ]

            actions_features = []
            for i in range(n_actions):
                action = [0] * n_actions
                action[i] = 1
                actions_features.append(tuple(action))

            context = lambda i: None
            actions = lambda i, c: sparsify(actions_features)
            rewards = lambda i, c, a: means[unsparse(a).index(1)] + (r.random(
            ) - .5) / n_actions

        if context_features and not action_features:
            #normalizing allows us to make sure our reward is in [0,1]
            bandit_thetas = [r.randoms(n_features) for _ in range(n_actions)]
            theta_totals = [sum(theta) for theta in bandit_thetas]
            bandit_thetas = [[
                t / norm for t in theta
            ] for theta, norm in zip(bandit_thetas, theta_totals)]

            actions_features = []
            for i in range(n_actions):
                action = [0] * n_actions
                action[i] = 1
                actions_features.append(tuple(action))

            context = lambda i: sparsify(r.randoms(n_features))
            actions = lambda i, c: [sparsify(af) for af in actions_features]
            rewards = lambda i, c, a: sum([
                cc * t for cc, t in zip(unsparse(c), bandit_thetas[unsparse(a).
                                                                   index(1)])
            ])

        if not context_features and action_features:

            theta = r.randoms(n_features)

            context = lambda i: None
            actions = lambda i, c: [
                sparsify(normalize(r.randoms(n_features)))
                for _ in range(r.randint(2, 10))
            ]
            rewards = lambda i, c, a: float(
                sum([cc * t for cc, t in zip(theta, unsparse(a))]))

        if context_features and action_features:

            context = lambda i: sparsify(r.randoms(n_features))
            actions = lambda i, c: [
                sparsify(normalize(r.randoms(n_features)))
                for _ in range(r.randint(2, 10))
            ]
            rewards = lambda i, c, a: sum(
                [cc * t for cc, t in zip(unsparse(c), unsparse(a))])

        super().__init__(n_interactions, context, actions, rewards)