Example #1
0
    def filter(self, items: Iterable[Any]) -> Sequence[Any]:

        rng = CobaRandom(self._seed)

        if self._max_count == 0:
            return []

        if self._max_count == None:
            return Take(self._count).filter(rng.shuffle(list(items)))

        W = 1
        items = iter(items)
        reservoir = rng.shuffle(list(islice(items, self._max_count)))

        try:
            while True:
                [r1, r2, r3] = rng.randoms(3)
                W = W * math.exp(math.log(r1) / (self._max_count or 1))
                S = math.floor(math.log(r2) / math.log(1 - W))
                reservoir[int(r3 * self._max_count - .001)] = next(
                    islice(items, S, S + 1))
        except StopIteration:
            pass

        return Take(self._count).filter(reservoir)
Example #2
0
    def test_regression_learning(self):
        vw = VowpalMediator().init_learner("--quiet", 1)

        n_features = 10
        n_examples = 1000

        rng = CobaRandom(1)

        weights = rng.randoms(n_features)
        rows = [rng.randoms(n_features) for _ in range(n_examples)]
        labels = [sum([w * r for w, r in zip(weights, row)]) for row in rows]

        examples = list(zip(rows, labels))

        self.assertEqual(0, vw.predict(vw.make_example({'x': rows[0]}, None)))

        pred_errs = []
        for row, label in examples[int(.9 * n_examples):]:
            pred_errs.append(
                vw.predict(vw.make_example({"x": row}, None)) - label)

        pre_learn_mse = sum([e**2 for e in pred_errs]) // len(pred_errs)

        for row, label in examples[0:int(.9 * n_examples)]:
            vw.learn(vw.make_example({"x": row}, str(label)))

        pred_errs = []

        for row, label in examples[int(.9 * n_examples):]:
            pred_errs.append(
                vw.predict(vw.make_example({"x": row}, None)) - label)

        post_learn_mse = sum([e**2 for e in pred_errs]) / len(pred_errs)

        self.assertNotAlmostEqual(0, pre_learn_mse, places=2)
        self.assertAlmostEqual(0, post_learn_mse, places=2)
Example #3
0
    def __init__(self,
                 n_interactions: int,
                 n_actions: int = 10,
                 n_context_features: int = 10,
                 n_action_features: int = 10,
                 n_exemplars: int = 10,
                 kernel: Literal['linear', 'polynomial',
                                 'exponential'] = 'exponential',
                 degree: int = 2,
                 gamma: float = 1,
                 seed: int = 1) -> None:
        """Instantiate a KernelSyntheticSimulation.

        Args:
            n_interactions: The number of interactions the simulation should have.
            n_actions: The number of actions each interaction should have.
            n_context_features: The number of features each context should have.
            n_action_features: The number of features each action should have.
            n_exemplars: The number of exemplar action, context pairs.
            kernel: The family of the kernel basis functions.
            degree: This argument is only relevant when using polynomial kernels.
            gamma: This argument is only relevant when using exponential kernels. 
            seed: The random number seed used to generate all features, weights and noise in the simulation.
        """

        self._args = (n_interactions, n_actions, n_context_features,
                      n_action_features, n_exemplars, kernel, degree, gamma,
                      seed)

        self._n_actions = n_actions
        self._n_context_features = n_context_features
        self._n_action_features = n_action_features
        self._n_exemplars = n_exemplars
        self._seed = seed
        self._kernel = kernel
        self._degree = degree
        self._gamma = gamma

        rng = CobaRandom(seed)

        #if there are no features then we are unable to define exemplars
        if n_action_features + n_context_features == 0: n_exemplars = 0

        feat_gen = lambda n: tuple(rng.gausses(n, 0, .75))
        one_hot_acts = OneHotEncoder().fit_encodes(range(n_actions))

        self._exemplars = [[
            feat_gen(n_action_features + n_context_features)
            for _ in range(n_exemplars)
        ] for _ in range(1 if n_action_features else n_actions)]
        weight_count = n_actions if n_exemplars == 0 else n_exemplars
        self._weights = [1 - 2 * w for w in rng.randoms(weight_count)]

        self._bias = 0

        if kernel == 'polynomial':
            #this ensures the dot-product between F and an exemplar is in [0,upper_bound]
            #This ensures that higher-order polynomials will remain reasonably well behaved
            upper_bound = (1.5)**(1 / degree) - 1
            self._exemplars = [[[upper_bound * ee / sum(e) for ee in e]
                                for e in E] for E in self._exemplars]

        def context(index: int) -> Context:
            return feat_gen(n_context_features) if n_context_features else None

        def actions(index: int, context: Context) -> Sequence[Action]:
            return [feat_gen(n_action_features) for _ in range(n_actions)
                    ] if n_action_features else one_hot_acts

        def reward(index: int, context: Context, action: Action) -> float:

            if n_exemplars == 0:
                return self._bias + self._weights[action.index(1)]

            #handles None context
            context = context or []

            if n_action_features:
                f = list(context) + list(action)
                W = self._weights
                E = self._exemplars[0]
            else:
                f = list(context)
                W = self._weights
                E = self._exemplars[action.index(1)]

            if kernel == "linear":
                K = lambda x1, x2: self._linear_kernel(x1, x2)
            if kernel == "polynomial":
                K = lambda x1, x2: self._polynomial_kernel(
                    x1, x2, self._degree)
            if kernel == "exponential":
                K = lambda x1, x2: self._exponential_kernel(
                    x1, x2, self._gamma)

            return self._bias + sum([w * K(e, f) for w, e in zip(W, E)])

        rewards = [
            reward(i, c, a) for i in range(100) for c in [context(i)]
            for a in actions(i, c)
        ]

        m = mean(rewards)
        s = (max(rewards) - min(rewards)) or 1

        self._bias = 0.5 - m / s
        self._weights = [w / s for w in self._weights]

        super().__init__(n_interactions, context, actions, reward)
Example #4
0
    def __init__(self,
                 n_interactions: int,
                 n_actions: int = 10,
                 n_context_features: int = 10,
                 n_action_features: int = 10,
                 reward_features: Sequence[str] = ["a", "xa"],
                 seed: int = 1) -> None:
        """Instantiate a LinearSyntheticSimulation.

        Args:
            n_interactions: The number of interactions the simulation should have.
            n_actions: The number of actions each interaction should have.
            n_context_features: The number of features each context should have.
            n_action_features: The number of features each action should have.
            reward_features: The features in the simulation's linear reward function.
            seed: The random number seed used to generate all features, weights and noise in the simulation.
        """

        self._args = (n_interactions, n_actions, n_context_features,
                      n_action_features, reward_features, seed)

        self._n_actions = n_actions
        self._n_context_features = n_context_features
        self._n_action_features = n_action_features
        self._reward_features = reward_features
        self._seed = seed

        if not self._n_context_features:
            reward_features = list(
                set(filter(None,
                           [f.replace('x', '') for f in reward_features])))

        if not self._n_action_features:
            reward_features = list(
                set(filter(None,
                           [f.replace('a', '') for f in reward_features])))

        rng = CobaRandom(seed)
        feat_encoder = InteractionsEncoder(reward_features)

        #to try and make sure high-order polynomials are well behaved
        #we center our context and action features on 1 and give them
        #a very small amount of variance. Then, in post processing, we
        #shift and re-scale our reward to center and fill in [0,1].
        max_degree = max([len(f)
                          for f in reward_features]) if reward_features else 1
        feat_gen = lambda n: tuple([
            g * rng.choice([1, -1])
            for g in rng.gausses(n, mu=1, sigma=1 / (2 * max_degree))
        ])
        one_hot_acts = OneHotEncoder().fit_encodes(range(n_actions))

        feature_count = len(
            feat_encoder.encode(x=[1] * n_context_features,
                                a=[1] * n_action_features))
        weight_parts = 1 if n_action_features else n_actions
        weight_count = 1 if feature_count == 0 else feature_count

        self._weights = [[1 - 2 * w for w in rng.randoms(weight_count)]
                         for _ in range(weight_parts)]

        self._bias = 0
        self._clip = False

        def context(index: int) -> Context:
            return feat_gen(n_context_features) if n_context_features else None

        def actions(index: int, context: Context) -> Sequence[Action]:
            return [feat_gen(n_action_features) for _ in range(n_actions)
                    ] if n_action_features else one_hot_acts

        def reward(index: int, context: Context, action: Action) -> float:

            F = feat_encoder.encode(x=context, a=action) or [1]
            W = self._weights[0 if n_action_features else action.index(1)]

            return self._bias + sum([w * f for w, f in zip(W, F)])

        rewards = [
            reward(i, c, a) for i in range(100) for c in [context(i)]
            for a in actions(i, c)
        ]

        m = mean(rewards)
        s = (max(rewards) - min(rewards)) or 1

        self._bias = 0.5 - m / s
        self._weights = [[w / s for w in W] for W in self._weights]
        self._clip = True

        super().__init__(n_interactions, context, actions, reward)
Example #5
0
    def __init__(self,
                 n_interactions: int = 500,
                 n_actions: int = 10,
                 n_features: int = 10,
                 context_features: bool = True,
                 action_features: bool = True,
                 sparse: bool = False,
                 seed: int = 1) -> None:

        self._n_bandits = n_actions
        self._n_features = n_features
        self._context_features = context_features
        self._action_features = action_features
        self._seed = seed

        r = CobaRandom(seed)

        context: Callable[[int], Context]
        actions: Callable[[int, Context], Sequence[Action]]
        rewards: Callable[[int, Context, Action], float]

        sparsify = lambda x: (tuple(range(len(x))), tuple(x)
                              ) if sparse else tuple(x)
        unsparse = lambda x: x[1] if sparse else x
        normalize = lambda X: [x / sum(X) for x in X]

        if not context_features and not action_features:

            means = [
                m / n_actions + 1 / (2 * n_actions)
                for m in r.randoms(n_actions)
            ]

            actions_features = []
            for i in range(n_actions):
                action = [0] * n_actions
                action[i] = 1
                actions_features.append(tuple(action))

            context = lambda i: None
            actions = lambda i, c: sparsify(actions_features)
            rewards = lambda i, c, a: means[unsparse(a).index(1)] + (r.random(
            ) - .5) / n_actions

        if context_features and not action_features:
            #normalizing allows us to make sure our reward is in [0,1]
            bandit_thetas = [r.randoms(n_features) for _ in range(n_actions)]
            theta_totals = [sum(theta) for theta in bandit_thetas]
            bandit_thetas = [[
                t / norm for t in theta
            ] for theta, norm in zip(bandit_thetas, theta_totals)]

            actions_features = []
            for i in range(n_actions):
                action = [0] * n_actions
                action[i] = 1
                actions_features.append(tuple(action))

            context = lambda i: sparsify(r.randoms(n_features))
            actions = lambda i, c: [sparsify(af) for af in actions_features]
            rewards = lambda i, c, a: sum([
                cc * t for cc, t in zip(unsparse(c), bandit_thetas[unsparse(a).
                                                                   index(1)])
            ])

        if not context_features and action_features:

            theta = r.randoms(n_features)

            context = lambda i: None
            actions = lambda i, c: [
                sparsify(normalize(r.randoms(n_features)))
                for _ in range(r.randint(2, 10))
            ]
            rewards = lambda i, c, a: float(
                sum([cc * t for cc, t in zip(theta, unsparse(a))]))

        if context_features and action_features:

            context = lambda i: sparsify(r.randoms(n_features))
            actions = lambda i, c: [
                sparsify(normalize(r.randoms(n_features)))
                for _ in range(r.randint(2, 10))
            ]
            rewards = lambda i, c, a: sum(
                [cc * t for cc, t in zip(unsparse(c), unsparse(a))])

        super().__init__(n_interactions, context, actions, rewards)
Example #6
0
    def test_cb_adf_learning(self):
        learner = VowpalArgsLearner()

        n_actions = 3
        n_features = 10
        n_examples = 2000

        rng = CobaRandom(11111)

        contexts = [rng.randoms(n_features) for _ in range(n_examples)]

        pre_learn_rewards = []
        for context in contexts[:int(.9 * n_examples)]:

            actions = [rng.randoms(n_features) for _ in range(n_actions)]
            rewards = [
                sum([a * c for a, c in zip(action, context)])
                for action in actions
            ]
            rewards = [int(r == max(rewards)) for r in rewards]

            pre_learn_rewards.append(
                rng.choice(rewards,
                           learner.predict(context, actions)[0]))

        for context in contexts[:int(.9 * n_examples)]:

            actions = [rng.randoms(n_features) for _ in range(n_actions)]
            rewards = [
                sum([a * c for a, c in zip(action, context)])
                for action in actions
            ]
            rewards = [int(r == max(rewards)) for r in rewards]

            probs, info = learner.predict(context, actions)
            choice = rng.choice(list(range(3)), probs)

            learner.learn(context, actions[choice], rewards[choice],
                          probs[choice], info)

        post_learn_rewards = []

        for context in contexts[int(.9 * n_examples):]:
            actions = [rng.randoms(n_features) for _ in range(n_actions)]
            rewards = [
                sum([a * c for a, c in zip(action, context)])
                for action in actions
            ]
            rewards = [int(r == max(rewards)) for r in rewards]

            post_learn_rewards.append(
                rng.choice(rewards,
                           learner.predict(context, actions)[0]))

        average_pre_learn_reward = sum(pre_learn_rewards) / len(
            pre_learn_rewards)
        average_post_learn_reward = sum(post_learn_rewards) / len(
            post_learn_rewards)

        self.assertAlmostEqual(.33, average_pre_learn_reward, places=2)
        self.assertAlmostEqual(.78, average_post_learn_reward, places=2)