def filter(self, items: Iterable[Any]) -> Sequence[Any]: rng = CobaRandom(self._seed) if self._max_count == 0: return [] if self._max_count == None: return Take(self._count).filter(rng.shuffle(list(items))) W = 1 items = iter(items) reservoir = rng.shuffle(list(islice(items, self._max_count))) try: while True: [r1, r2, r3] = rng.randoms(3) W = W * math.exp(math.log(r1) / (self._max_count or 1)) S = math.floor(math.log(r2) / math.log(1 - W)) reservoir[int(r3 * self._max_count - .001)] = next( islice(items, S, S + 1)) except StopIteration: pass return Take(self._count).filter(reservoir)
def test_regression_learning(self): vw = VowpalMediator().init_learner("--quiet", 1) n_features = 10 n_examples = 1000 rng = CobaRandom(1) weights = rng.randoms(n_features) rows = [rng.randoms(n_features) for _ in range(n_examples)] labels = [sum([w * r for w, r in zip(weights, row)]) for row in rows] examples = list(zip(rows, labels)) self.assertEqual(0, vw.predict(vw.make_example({'x': rows[0]}, None))) pred_errs = [] for row, label in examples[int(.9 * n_examples):]: pred_errs.append( vw.predict(vw.make_example({"x": row}, None)) - label) pre_learn_mse = sum([e**2 for e in pred_errs]) // len(pred_errs) for row, label in examples[0:int(.9 * n_examples)]: vw.learn(vw.make_example({"x": row}, str(label))) pred_errs = [] for row, label in examples[int(.9 * n_examples):]: pred_errs.append( vw.predict(vw.make_example({"x": row}, None)) - label) post_learn_mse = sum([e**2 for e in pred_errs]) / len(pred_errs) self.assertNotAlmostEqual(0, pre_learn_mse, places=2) self.assertAlmostEqual(0, post_learn_mse, places=2)
def __init__(self, n_interactions: int, n_actions: int = 10, n_context_features: int = 10, n_action_features: int = 10, n_exemplars: int = 10, kernel: Literal['linear', 'polynomial', 'exponential'] = 'exponential', degree: int = 2, gamma: float = 1, seed: int = 1) -> None: """Instantiate a KernelSyntheticSimulation. Args: n_interactions: The number of interactions the simulation should have. n_actions: The number of actions each interaction should have. n_context_features: The number of features each context should have. n_action_features: The number of features each action should have. n_exemplars: The number of exemplar action, context pairs. kernel: The family of the kernel basis functions. degree: This argument is only relevant when using polynomial kernels. gamma: This argument is only relevant when using exponential kernels. seed: The random number seed used to generate all features, weights and noise in the simulation. """ self._args = (n_interactions, n_actions, n_context_features, n_action_features, n_exemplars, kernel, degree, gamma, seed) self._n_actions = n_actions self._n_context_features = n_context_features self._n_action_features = n_action_features self._n_exemplars = n_exemplars self._seed = seed self._kernel = kernel self._degree = degree self._gamma = gamma rng = CobaRandom(seed) #if there are no features then we are unable to define exemplars if n_action_features + n_context_features == 0: n_exemplars = 0 feat_gen = lambda n: tuple(rng.gausses(n, 0, .75)) one_hot_acts = OneHotEncoder().fit_encodes(range(n_actions)) self._exemplars = [[ feat_gen(n_action_features + n_context_features) for _ in range(n_exemplars) ] for _ in range(1 if n_action_features else n_actions)] weight_count = n_actions if n_exemplars == 0 else n_exemplars self._weights = [1 - 2 * w for w in rng.randoms(weight_count)] self._bias = 0 if kernel == 'polynomial': #this ensures the dot-product between F and an exemplar is in [0,upper_bound] #This ensures that higher-order polynomials will remain reasonably well behaved upper_bound = (1.5)**(1 / degree) - 1 self._exemplars = [[[upper_bound * ee / sum(e) for ee in e] for e in E] for E in self._exemplars] def context(index: int) -> Context: return feat_gen(n_context_features) if n_context_features else None def actions(index: int, context: Context) -> Sequence[Action]: return [feat_gen(n_action_features) for _ in range(n_actions) ] if n_action_features else one_hot_acts def reward(index: int, context: Context, action: Action) -> float: if n_exemplars == 0: return self._bias + self._weights[action.index(1)] #handles None context context = context or [] if n_action_features: f = list(context) + list(action) W = self._weights E = self._exemplars[0] else: f = list(context) W = self._weights E = self._exemplars[action.index(1)] if kernel == "linear": K = lambda x1, x2: self._linear_kernel(x1, x2) if kernel == "polynomial": K = lambda x1, x2: self._polynomial_kernel( x1, x2, self._degree) if kernel == "exponential": K = lambda x1, x2: self._exponential_kernel( x1, x2, self._gamma) return self._bias + sum([w * K(e, f) for w, e in zip(W, E)]) rewards = [ reward(i, c, a) for i in range(100) for c in [context(i)] for a in actions(i, c) ] m = mean(rewards) s = (max(rewards) - min(rewards)) or 1 self._bias = 0.5 - m / s self._weights = [w / s for w in self._weights] super().__init__(n_interactions, context, actions, reward)
def __init__(self, n_interactions: int, n_actions: int = 10, n_context_features: int = 10, n_action_features: int = 10, reward_features: Sequence[str] = ["a", "xa"], seed: int = 1) -> None: """Instantiate a LinearSyntheticSimulation. Args: n_interactions: The number of interactions the simulation should have. n_actions: The number of actions each interaction should have. n_context_features: The number of features each context should have. n_action_features: The number of features each action should have. reward_features: The features in the simulation's linear reward function. seed: The random number seed used to generate all features, weights and noise in the simulation. """ self._args = (n_interactions, n_actions, n_context_features, n_action_features, reward_features, seed) self._n_actions = n_actions self._n_context_features = n_context_features self._n_action_features = n_action_features self._reward_features = reward_features self._seed = seed if not self._n_context_features: reward_features = list( set(filter(None, [f.replace('x', '') for f in reward_features]))) if not self._n_action_features: reward_features = list( set(filter(None, [f.replace('a', '') for f in reward_features]))) rng = CobaRandom(seed) feat_encoder = InteractionsEncoder(reward_features) #to try and make sure high-order polynomials are well behaved #we center our context and action features on 1 and give them #a very small amount of variance. Then, in post processing, we #shift and re-scale our reward to center and fill in [0,1]. max_degree = max([len(f) for f in reward_features]) if reward_features else 1 feat_gen = lambda n: tuple([ g * rng.choice([1, -1]) for g in rng.gausses(n, mu=1, sigma=1 / (2 * max_degree)) ]) one_hot_acts = OneHotEncoder().fit_encodes(range(n_actions)) feature_count = len( feat_encoder.encode(x=[1] * n_context_features, a=[1] * n_action_features)) weight_parts = 1 if n_action_features else n_actions weight_count = 1 if feature_count == 0 else feature_count self._weights = [[1 - 2 * w for w in rng.randoms(weight_count)] for _ in range(weight_parts)] self._bias = 0 self._clip = False def context(index: int) -> Context: return feat_gen(n_context_features) if n_context_features else None def actions(index: int, context: Context) -> Sequence[Action]: return [feat_gen(n_action_features) for _ in range(n_actions) ] if n_action_features else one_hot_acts def reward(index: int, context: Context, action: Action) -> float: F = feat_encoder.encode(x=context, a=action) or [1] W = self._weights[0 if n_action_features else action.index(1)] return self._bias + sum([w * f for w, f in zip(W, F)]) rewards = [ reward(i, c, a) for i in range(100) for c in [context(i)] for a in actions(i, c) ] m = mean(rewards) s = (max(rewards) - min(rewards)) or 1 self._bias = 0.5 - m / s self._weights = [[w / s for w in W] for W in self._weights] self._clip = True super().__init__(n_interactions, context, actions, reward)
def __init__(self, n_interactions: int = 500, n_actions: int = 10, n_features: int = 10, context_features: bool = True, action_features: bool = True, sparse: bool = False, seed: int = 1) -> None: self._n_bandits = n_actions self._n_features = n_features self._context_features = context_features self._action_features = action_features self._seed = seed r = CobaRandom(seed) context: Callable[[int], Context] actions: Callable[[int, Context], Sequence[Action]] rewards: Callable[[int, Context, Action], float] sparsify = lambda x: (tuple(range(len(x))), tuple(x) ) if sparse else tuple(x) unsparse = lambda x: x[1] if sparse else x normalize = lambda X: [x / sum(X) for x in X] if not context_features and not action_features: means = [ m / n_actions + 1 / (2 * n_actions) for m in r.randoms(n_actions) ] actions_features = [] for i in range(n_actions): action = [0] * n_actions action[i] = 1 actions_features.append(tuple(action)) context = lambda i: None actions = lambda i, c: sparsify(actions_features) rewards = lambda i, c, a: means[unsparse(a).index(1)] + (r.random( ) - .5) / n_actions if context_features and not action_features: #normalizing allows us to make sure our reward is in [0,1] bandit_thetas = [r.randoms(n_features) for _ in range(n_actions)] theta_totals = [sum(theta) for theta in bandit_thetas] bandit_thetas = [[ t / norm for t in theta ] for theta, norm in zip(bandit_thetas, theta_totals)] actions_features = [] for i in range(n_actions): action = [0] * n_actions action[i] = 1 actions_features.append(tuple(action)) context = lambda i: sparsify(r.randoms(n_features)) actions = lambda i, c: [sparsify(af) for af in actions_features] rewards = lambda i, c, a: sum([ cc * t for cc, t in zip(unsparse(c), bandit_thetas[unsparse(a). index(1)]) ]) if not context_features and action_features: theta = r.randoms(n_features) context = lambda i: None actions = lambda i, c: [ sparsify(normalize(r.randoms(n_features))) for _ in range(r.randint(2, 10)) ] rewards = lambda i, c, a: float( sum([cc * t for cc, t in zip(theta, unsparse(a))])) if context_features and action_features: context = lambda i: sparsify(r.randoms(n_features)) actions = lambda i, c: [ sparsify(normalize(r.randoms(n_features))) for _ in range(r.randint(2, 10)) ] rewards = lambda i, c, a: sum( [cc * t for cc, t in zip(unsparse(c), unsparse(a))]) super().__init__(n_interactions, context, actions, rewards)
def test_cb_adf_learning(self): learner = VowpalArgsLearner() n_actions = 3 n_features = 10 n_examples = 2000 rng = CobaRandom(11111) contexts = [rng.randoms(n_features) for _ in range(n_examples)] pre_learn_rewards = [] for context in contexts[:int(.9 * n_examples)]: actions = [rng.randoms(n_features) for _ in range(n_actions)] rewards = [ sum([a * c for a, c in zip(action, context)]) for action in actions ] rewards = [int(r == max(rewards)) for r in rewards] pre_learn_rewards.append( rng.choice(rewards, learner.predict(context, actions)[0])) for context in contexts[:int(.9 * n_examples)]: actions = [rng.randoms(n_features) for _ in range(n_actions)] rewards = [ sum([a * c for a, c in zip(action, context)]) for action in actions ] rewards = [int(r == max(rewards)) for r in rewards] probs, info = learner.predict(context, actions) choice = rng.choice(list(range(3)), probs) learner.learn(context, actions[choice], rewards[choice], probs[choice], info) post_learn_rewards = [] for context in contexts[int(.9 * n_examples):]: actions = [rng.randoms(n_features) for _ in range(n_actions)] rewards = [ sum([a * c for a, c in zip(action, context)]) for action in actions ] rewards = [int(r == max(rewards)) for r in rewards] post_learn_rewards.append( rng.choice(rewards, learner.predict(context, actions)[0])) average_pre_learn_reward = sum(pre_learn_rewards) / len( pre_learn_rewards) average_post_learn_reward = sum(post_learn_rewards) / len( post_learn_rewards) self.assertAlmostEqual(.33, average_pre_learn_reward, places=2) self.assertAlmostEqual(.78, average_post_learn_reward, places=2)