def __init__(self, n_interactions: int, n_actions: int = 10, n_context_features: int = 10, n_action_features: int = 10, n_exemplars: int = 10, kernel: Literal['linear', 'polynomial', 'exponential'] = 'exponential', degree: int = 2, gamma: float = 1, seed: int = 1) -> None: """Instantiate a KernelSyntheticSimulation. Args: n_interactions: The number of interactions the simulation should have. n_actions: The number of actions each interaction should have. n_context_features: The number of features each context should have. n_action_features: The number of features each action should have. n_exemplars: The number of exemplar action, context pairs. kernel: The family of the kernel basis functions. degree: This argument is only relevant when using polynomial kernels. gamma: This argument is only relevant when using exponential kernels. seed: The random number seed used to generate all features, weights and noise in the simulation. """ self._args = (n_interactions, n_actions, n_context_features, n_action_features, n_exemplars, kernel, degree, gamma, seed) self._n_actions = n_actions self._n_context_features = n_context_features self._n_action_features = n_action_features self._n_exemplars = n_exemplars self._seed = seed self._kernel = kernel self._degree = degree self._gamma = gamma rng = CobaRandom(seed) #if there are no features then we are unable to define exemplars if n_action_features + n_context_features == 0: n_exemplars = 0 feat_gen = lambda n: tuple(rng.gausses(n, 0, .75)) one_hot_acts = OneHotEncoder().fit_encodes(range(n_actions)) self._exemplars = [[ feat_gen(n_action_features + n_context_features) for _ in range(n_exemplars) ] for _ in range(1 if n_action_features else n_actions)] weight_count = n_actions if n_exemplars == 0 else n_exemplars self._weights = [1 - 2 * w for w in rng.randoms(weight_count)] self._bias = 0 if kernel == 'polynomial': #this ensures the dot-product between F and an exemplar is in [0,upper_bound] #This ensures that higher-order polynomials will remain reasonably well behaved upper_bound = (1.5)**(1 / degree) - 1 self._exemplars = [[[upper_bound * ee / sum(e) for ee in e] for e in E] for E in self._exemplars] def context(index: int) -> Context: return feat_gen(n_context_features) if n_context_features else None def actions(index: int, context: Context) -> Sequence[Action]: return [feat_gen(n_action_features) for _ in range(n_actions) ] if n_action_features else one_hot_acts def reward(index: int, context: Context, action: Action) -> float: if n_exemplars == 0: return self._bias + self._weights[action.index(1)] #handles None context context = context or [] if n_action_features: f = list(context) + list(action) W = self._weights E = self._exemplars[0] else: f = list(context) W = self._weights E = self._exemplars[action.index(1)] if kernel == "linear": K = lambda x1, x2: self._linear_kernel(x1, x2) if kernel == "polynomial": K = lambda x1, x2: self._polynomial_kernel( x1, x2, self._degree) if kernel == "exponential": K = lambda x1, x2: self._exponential_kernel( x1, x2, self._gamma) return self._bias + sum([w * K(e, f) for w, e in zip(W, E)]) rewards = [ reward(i, c, a) for i in range(100) for c in [context(i)] for a in actions(i, c) ] m = mean(rewards) s = (max(rewards) - min(rewards)) or 1 self._bias = 0.5 - m / s self._weights = [w / s for w in self._weights] super().__init__(n_interactions, context, actions, reward)
def __init__(self, n_interactions: int, n_actions: int = 10, n_context_features: int = 10, n_action_features: int = 10, seed: int = 1) -> None: """Instantiate an MLPSythenticSimulation. Args: n_interactions: The number of interactions the simulation should have. n_actions: The number of actions each interaction should have. n_context_features: The number of features each context should have. n_action_features: The number of features each action should have. seed: The random number seed used to generate all features, weights and noise in the simulation. """ self._args = (n_interactions, n_actions, n_context_features, n_action_features, seed) self._n_actions = n_actions self._n_context_features = n_context_features self._n_action_features = n_action_features self._seed = seed rng = CobaRandom(seed) input_layer_size = n_context_features + n_action_features hidden_layer_size = 50 self._bias = 0 if input_layer_size: hidden_weights = [[ rng.gausses(input_layer_size, 0, 1.5) for _ in range(hidden_layer_size) ] for _ in range(1 if n_action_features else n_actions)] hidden_activation = lambda x: 1 / (1 + math.exp(-x) ) #sigmoid activation hidden_output = lambda inputs, weights: hidden_activation( sum([i * w for i, w in zip(inputs, weights)])) self._output_weights = rng.gausses(hidden_layer_size) else: self._output_weights = rng.gausses(n_actions) def context(index: int) -> Context: return tuple(rng.gausses( n_context_features)) if n_context_features else None def actions(index: int, context: Context) -> Sequence[Action]: if n_action_features: return [(rng.gausses(n_action_features)) for _ in range(n_actions)] else: return OneHotEncoder().fit_encodes(range(n_actions)) def reward(index: int, context: Context, action: Action) -> float: #handles None context context = context or [] if not n_action_features and not n_context_features: return self._bias + self._output_weights[action.index(1)] if n_action_features: I = list(context) + list(action) W = self._output_weights H = hidden_weights[0] else: I = list(context) W = self._output_weights H = hidden_weights[action.index(1)] hidden_outputs = [hidden_output(I, h) for h in H] return self._bias + sum( [w * hout for w, hout in zip(W, hidden_outputs)]) rewards = [ reward(i, c, a) for i in range(100) for c in [context(i)] for a in actions(i, c) ] m = mean(rewards) s = (max(rewards) - min(rewards)) or 1 self._bias = 0.5 - m / s self._output_weights = [w / s for w in self._output_weights] super().__init__(n_interactions, context, actions, reward)
def __init__(self, n_interactions: int, n_actions: int = 10, n_context_features: int = 10, n_action_features: int = 10, reward_features: Sequence[str] = ["a", "xa"], seed: int = 1) -> None: """Instantiate a LinearSyntheticSimulation. Args: n_interactions: The number of interactions the simulation should have. n_actions: The number of actions each interaction should have. n_context_features: The number of features each context should have. n_action_features: The number of features each action should have. reward_features: The features in the simulation's linear reward function. seed: The random number seed used to generate all features, weights and noise in the simulation. """ self._args = (n_interactions, n_actions, n_context_features, n_action_features, reward_features, seed) self._n_actions = n_actions self._n_context_features = n_context_features self._n_action_features = n_action_features self._reward_features = reward_features self._seed = seed if not self._n_context_features: reward_features = list( set(filter(None, [f.replace('x', '') for f in reward_features]))) if not self._n_action_features: reward_features = list( set(filter(None, [f.replace('a', '') for f in reward_features]))) rng = CobaRandom(seed) feat_encoder = InteractionsEncoder(reward_features) #to try and make sure high-order polynomials are well behaved #we center our context and action features on 1 and give them #a very small amount of variance. Then, in post processing, we #shift and re-scale our reward to center and fill in [0,1]. max_degree = max([len(f) for f in reward_features]) if reward_features else 1 feat_gen = lambda n: tuple([ g * rng.choice([1, -1]) for g in rng.gausses(n, mu=1, sigma=1 / (2 * max_degree)) ]) one_hot_acts = OneHotEncoder().fit_encodes(range(n_actions)) feature_count = len( feat_encoder.encode(x=[1] * n_context_features, a=[1] * n_action_features)) weight_parts = 1 if n_action_features else n_actions weight_count = 1 if feature_count == 0 else feature_count self._weights = [[1 - 2 * w for w in rng.randoms(weight_count)] for _ in range(weight_parts)] self._bias = 0 self._clip = False def context(index: int) -> Context: return feat_gen(n_context_features) if n_context_features else None def actions(index: int, context: Context) -> Sequence[Action]: return [feat_gen(n_action_features) for _ in range(n_actions) ] if n_action_features else one_hot_acts def reward(index: int, context: Context, action: Action) -> float: F = feat_encoder.encode(x=context, a=action) or [1] W = self._weights[0 if n_action_features else action.index(1)] return self._bias + sum([w * f for w, f in zip(W, F)]) rewards = [ reward(i, c, a) for i in range(100) for c in [context(i)] for a in actions(i, c) ] m = mean(rewards) s = (max(rewards) - min(rewards)) or 1 self._bias = 0.5 - m / s self._weights = [[w / s for w in W] for W in self._weights] self._clip = True super().__init__(n_interactions, context, actions, reward)