def test_sparse_xa_with_strings(self): encoder = InteractionsEncoder(["xa"]) interactions = encoder.encode(x={"1": "z", "2": 2}, a={"1": 3, "2": 4}) self.assertEqual( dict([("x1za1", 3), ("x1za2", 4), ("x2a1", 6), ("x2a2", 8)]), interactions)
def test_sparse_xa_with_numeric_keys(self): encoder = InteractionsEncoder(["xa"]) interactions = encoder.encode(x={1: "z", 2: 2}, a={1: 3, 2: 4}) self.assertEqual( dict([("x1za1", 3), ("x1za2", 4), ("x2a1", 6), ("x2a2", 8)]), interactions)
def test_sparse_xxa(self): encoder = InteractionsEncoder(["xxa"]) interactions = encoder.encode(x={"1": 1, "2": 2}, a={"1": 3, "2": 4}) self.assertEqual( dict([("x1x1a1", 3), ("x1x1a2", 4), ("x1x2a1", 6), ("x1x2a2", 8), ("x2x2a1", 12), ("x2x2a2", 16)]), interactions)
def test_singular_numeric_xa(self): encoder = InteractionsEncoder(["xa"]) interactions1 = encoder.encode(x=(1, 2, 3), a=2) interactions2 = encoder.encode(x=(1, 2, 3), a=2) self.assertEqual([2, 4, 6], interactions1) self.assertEqual(interactions1, interactions2)
def test_string_tuple(self): encoder = InteractionsEncoder(["xa"]) interactions1 = encoder.encode(x=('d', 2), a=2) interactions2 = encoder.encode(x=('d', 2), a=2) self.assertEqual(dict([('x0da0', 2), ('x1a0', 4)]), interactions1) self.assertEqual(interactions1, interactions2)
def test_dense_interaction_xx_encode_performance(self): encoder = InteractionsEncoder(["xx"]) x = list(range(100)) time = timeit.timeit(lambda: encoder.encode(x=x), number=100) #best observed was 0.03 self.assertLess(time, 0.3)
def test_sparse_interaction_xx_encode_performance(self): encoder = InteractionsEncoder(["xx"]) x = dict(zip(map(str, range(100)), range(100))) time = timeit.timeit(lambda: encoder.encode(x=x), number=100) #best observed was 0.09 self.assertLess(time, 0.9)
def test_sparse_interaction_xxa_encode_performance(self): encoder = InteractionsEncoder(["xxa"]) x = dict(zip(map(str, range(100)), range(100))) a = [1, 2, 3] time = timeit.timeit(lambda: encoder.encode(x=x, a=a), number=50) #best observed was 0.20 self.assertLess(time, 2.0)
def test_dense_x_a_xa_xxa(self): encoder = InteractionsEncoder(["x", "a", "xa", "xxa"]) interactions1 = encoder.encode(x=[1, 2, 3], a=[1, 2]) interactions2 = encoder.encode(x=[1, 2, 3], a=[1, 2]) self.assertCountEqual([ 1, 2, 3, 1, 2, 1, 2, 3, 2, 4, 6, 1, 2, 3, 4, 6, 9, 2, 4, 6, 8, 12, 18 ], interactions1) self.assertEqual(interactions1, interactions2)
def test_sparse_interaction_abc_encode_performance(self): encoder = InteractionsEncoder(["aabc"]) a = dict(zip(map(str, range(100)), range(100))) b = [1, 2] c = [2, 3] time = timeit.timeit(lambda: encoder.encode(a=a, b=b, c=c), number=25) #best observed was 0.17 self.assertLess(time, 1.7)
def test_sparse_meta_x_a(self): encoder = InteractionsEncoder(["x", "a"]) interactions = encoder.encode(x=SparseWithMeta({ "1": 1, "2": 2 }), a={ "1": 3, "2": 4 }) self.assertEqual(dict([("x1", 1), ("x2", 2), ("a1", 3), ("a2", 4)]), interactions)
def __init__(self, alpha: float = 1, X: Sequence[str] = ['a', 'ax']) -> None: """Instantiate a LinUCBLearner. Args: alpha: This parameter controls the exploration rate of the algorithm. A value of 0 will cause actions to be selected based on the current best point estimate (i.e., no exploration) while a value of inf means that actions will be selected based solely on the bounds of the action point estimates (i.e., we will always take actions that have the largest bound on their point estimate). X: Feature set interactions to use when calculating action value estimates. Context features are indicated by x's while action features are indicated by a's. For example, xaa means to cross the features between context and actions and actions. """ PackageChecker.numpy("LinUCBLearner.__init__") self._alpha = alpha self._X = X self._X_encoder = InteractionsEncoder(X) self._theta = None self._A_inv = None
def learn(self, context: Context, action: Action, reward: float, probability: float, info: Info) -> None: import numpy as np if isinstance(action, dict) or isinstance(context, dict): raise CobaException("Sparse data cannot be handled by this algorithm.") if not context: self._X_encoder = InteractionsEncoder(list(set(filter(None,[ f.replace('x','') for f in self._X])))) context = list(Flatten().filter([list(context)]))[0] if context else [] features: np.ndarray = np.array([1]+self._X_encoder.encode(x=context,a=action)).T if(self._A_inv is None): self._theta = np.zeros((features.shape[0])) self._A_inv = np.identity(features.shape[0]) r = self._theta @ features w = self._A_inv @ features v = features @ w self._A_inv = self._A_inv - np.outer(w,w)/(1+v) self._theta = self._theta + (reward-r)/(1+v) * w
def predict(self, context: Context, actions: Sequence[Action]) -> Probs: import numpy as np #type: ignore if isinstance(actions[0], dict) or isinstance(context, dict): raise CobaException("Sparse data cannot be handled by this algorithm.") if not context: self._X_encoder = InteractionsEncoder(list(set(filter(None,[ f.replace('x','') for f in self._X])))) context = list(Flatten().filter([list(context)]))[0] if context else [] features: np.ndarray = np.array([[1]+self._X_encoder.encode(x=context,a=action) for action in actions]).T if(self._A_inv is None): self._theta = np.zeros(features.shape[0]) self._A_inv = np.identity(features.shape[0]) point_estimate = self._theta @ features point_bounds = np.diagonal(features.T @ self._A_inv @ features) action_values = point_estimate + self._alpha*np.sqrt(point_bounds) max_indexes = np.where(action_values == np.amax(action_values))[0] return [ int(ind in max_indexes)/len(max_indexes) for ind in range(len(actions))]
def test_multiple_sparse_with_only_empty(self): encoder = InteractionsEncoder(['ab', 'abc']) interactions = encoder.encode(a={'A': 2}, b=None, c=None) self.assertEqual({}, interactions)
def test_multiple_sparse_with_empty_and_non_empty(self): encoder = InteractionsEncoder(['ab', 'abc']) interactions = encoder.encode(a={'A': 2}, b='b', c=None) self.assertEqual({'aAb0b': 2}, interactions)
def test_multiple_dense_with_only_empty(self): encoder = InteractionsEncoder(['ab', 'abc']) interactions = encoder.encode(a=2, b=None, c=None) self.assertEqual([], interactions)
def test_singular_string_abc(self): encoder = InteractionsEncoder(["abc"]) interactions = encoder.encode(a=2, b=3, c=4) self.assertEqual([24], interactions)
def test_sparse_xa_is_string(self): encoder = InteractionsEncoder(["xa"]) interactions = encoder.encode(x={"1": 1, "2": 2}, a="a") self.assertEqual(dict([("x1a0a", 1), ("x2a0a", 2)]), interactions)
def test_singular_string_a(self): encoder = InteractionsEncoder(["a"]) interactions = encoder.encode(x=["a"], a="d") self.assertEqual(dict([("a0d", 1)]), interactions)
def test_string_numeric_xa(self): encoder = InteractionsEncoder(["xa"]) interactions = encoder.encode(x=[2], a=["d", "e"]) self.assertEqual(dict([("x0a0d", 2), ("x0a1e", 2)]), interactions)
def test_string_xa(self): encoder = InteractionsEncoder(["xa"]) interactions = encoder.encode(x=["a"], a=["d", "e"]) self.assertEqual(dict([("x0aa0d", 1), ("x0aa1e", 1)]), interactions)
def __init__(self, n_interactions: int, n_actions: int = 10, n_context_features: int = 10, n_action_features: int = 10, reward_features: Sequence[str] = ["a", "xa"], seed: int = 1) -> None: """Instantiate a LinearSyntheticSimulation. Args: n_interactions: The number of interactions the simulation should have. n_actions: The number of actions each interaction should have. n_context_features: The number of features each context should have. n_action_features: The number of features each action should have. reward_features: The features in the simulation's linear reward function. seed: The random number seed used to generate all features, weights and noise in the simulation. """ self._args = (n_interactions, n_actions, n_context_features, n_action_features, reward_features, seed) self._n_actions = n_actions self._n_context_features = n_context_features self._n_action_features = n_action_features self._reward_features = reward_features self._seed = seed if not self._n_context_features: reward_features = list( set(filter(None, [f.replace('x', '') for f in reward_features]))) if not self._n_action_features: reward_features = list( set(filter(None, [f.replace('a', '') for f in reward_features]))) rng = CobaRandom(seed) feat_encoder = InteractionsEncoder(reward_features) #to try and make sure high-order polynomials are well behaved #we center our context and action features on 1 and give them #a very small amount of variance. Then, in post processing, we #shift and re-scale our reward to center and fill in [0,1]. max_degree = max([len(f) for f in reward_features]) if reward_features else 1 feat_gen = lambda n: tuple([ g * rng.choice([1, -1]) for g in rng.gausses(n, mu=1, sigma=1 / (2 * max_degree)) ]) one_hot_acts = OneHotEncoder().fit_encodes(range(n_actions)) feature_count = len( feat_encoder.encode(x=[1] * n_context_features, a=[1] * n_action_features)) weight_parts = 1 if n_action_features else n_actions weight_count = 1 if feature_count == 0 else feature_count self._weights = [[1 - 2 * w for w in rng.randoms(weight_count)] for _ in range(weight_parts)] self._bias = 0 self._clip = False def context(index: int) -> Context: return feat_gen(n_context_features) if n_context_features else None def actions(index: int, context: Context) -> Sequence[Action]: return [feat_gen(n_action_features) for _ in range(n_actions) ] if n_action_features else one_hot_acts def reward(index: int, context: Context, action: Action) -> float: F = feat_encoder.encode(x=context, a=action) or [1] W = self._weights[0 if n_action_features else action.index(1)] return self._bias + sum([w * f for w, f in zip(W, F)]) rewards = [ reward(i, c, a) for i in range(100) for c in [context(i)] for a in actions(i, c) ] m = mean(rewards) s = (max(rewards) - min(rewards)) or 1 self._bias = 0.5 - m / s self._weights = [[w / s for w in W] for W in self._weights] self._clip = True super().__init__(n_interactions, context, actions, reward)
def test_dense_meta_x_a(self): encoder = InteractionsEncoder(["x", "a"]) interactions = encoder.encode(x=DenseWithMeta([1, 2, 3]), a=[1, 2]) self.assertEqual([1, 2, 3, 1, 2], interactions)
def test_empty_interactions_sparse(self): encoder = InteractionsEncoder([]) interactions = encoder.encode(a={'A': 2}, b=None, c=None) self.assertEqual({}, interactions)
def test_singular_string_xa(self): encoder = InteractionsEncoder(["xa"]) interactions = encoder.encode(x="abc", a="dbc") self.assertEqual(dict([("x0abca0dbc", 1)]), interactions)
def test_empty_interactions_dense(self): encoder = InteractionsEncoder([]) interactions = encoder.encode(a=2, b=None, c=None) self.assertEqual([], interactions)
def test_sparse_x_a_numeric_keys(self): encoder = InteractionsEncoder(["x", "a"]) interactions = encoder.encode(x={1: 1, 2: 2}, a={1: 3, 2: 4}) self.assertEqual(dict([("x1", 1), ("x2", 2), ("a1", 3), ("a2", 4)]), interactions)
def process( self, environment: Environment, interactions: Iterable[SimulatedInteraction]) -> Dict[Any, Any]: contexts, actions, rewards = zip(*[(i.context, i.actions, i.kwargs["rewards"]) for i in interactions]) env_statistics = {} try: PackageChecker.sklearn("ClassEnvironmentTask.process") import numpy as np import scipy.sparse as sp import scipy.stats as st from sklearn.feature_extraction import FeatureHasher from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import cross_val_score from sklearn.metrics import pairwise_distances from sklearn.decomposition import TruncatedSVD, PCA X = [InteractionsEncoder('x').encode(x=c, a=[]) for c in contexts] Y = [a[r.index(1)] for a, r in zip(actions, rewards)] C = collections.defaultdict(list) clf = DecisionTreeClassifier(random_state=1) if isinstance(X[0], dict): X = FeatureHasher(n_features=2**14, input_type="dict").fit_transform(X) if len(Y) > 5: scores = cross_val_score(clf, X, Y, cv=5) env_statistics["bayes_rate_avg"] = round(scores.mean(), 4) env_statistics["bayes_rate_iqr"] = round(st.iqr(scores), 4) svd = TruncatedSVD(n_components=8) if sp.issparse(X) else PCA() svd.fit(X) env_statistics[ "PcaVarExplained"] = svd.explained_variance_ratio_[:8].tolist( ) for x, y in zip(X, Y): C[y].append(x) if sp.issparse(X): centroids = sp.vstack( [sp.csr_matrix(sp.vstack(c).mean(0)) for c in C.values()]) else: centroids = np.vstack( [np.vstack(c).mean(0) for c in C.values()]) centroid_order = list(C.keys()) centroid_index = [centroid_order.index(y) for y in Y] centroid_dists = pairwise_distances(X, centroids) closest_index = centroid_dists.argmin(1) cluster_purity = (closest_index == centroid_index).mean() env_statistics["centroid_purity"] = round(cluster_purity, 4) env_statistics["centroid_distance"] = round( median(centroid_dists[range(centroid_dists.shape[0]), centroid_index]), 4) except CobaExit: pass labels = set() features = set() feat_cnts = [] label_cnts = collections.defaultdict(int) for c, a, f in zip(contexts, actions, rewards): inter_label = a[f.index(1)] inter_feats = c.keys() if isinstance(c, dict) else range(len(c)) labels.add(inter_label) features.update(inter_feats) feat_cnts.append(len(inter_feats)) label_cnts[inter_label] += 1 env_statistics["action_cardinality"] = len(labels) env_statistics["context_dimensions"] = len(features) env_statistics["context_median_nz"] = median(feat_cnts) env_statistics["imbalance_ratio"] = round( max(label_cnts.values()) / min(label_cnts.values()), 4) return { **SimpleEnvironmentTask().process(environment, interactions), **env_statistics }
def test_string_x(self): encoder = InteractionsEncoder(["x"]) interactions = encoder.encode(x=["a", "b", "c"], a=["d", "e"]) self.assertEqual(dict([("x0a", 1), ("x1b", 1), ("x2c", 1)]), interactions)