Ejemplo n.º 1
0
    def __init__(self,
                 n_interactions: int,
                 n_actions: int = 10,
                 n_context_features: int = 10,
                 n_action_features: int = 10,
                 n_exemplars: int = 10,
                 kernel: Literal['linear', 'polynomial',
                                 'exponential'] = 'exponential',
                 degree: int = 2,
                 gamma: float = 1,
                 seed: int = 1) -> None:
        """Instantiate a KernelSyntheticSimulation.

        Args:
            n_interactions: The number of interactions the simulation should have.
            n_actions: The number of actions each interaction should have.
            n_context_features: The number of features each context should have.
            n_action_features: The number of features each action should have.
            n_exemplars: The number of exemplar action, context pairs.
            kernel: The family of the kernel basis functions.
            degree: This argument is only relevant when using polynomial kernels.
            gamma: This argument is only relevant when using exponential kernels. 
            seed: The random number seed used to generate all features, weights and noise in the simulation.
        """

        self._args = (n_interactions, n_actions, n_context_features,
                      n_action_features, n_exemplars, kernel, degree, gamma,
                      seed)

        self._n_actions = n_actions
        self._n_context_features = n_context_features
        self._n_action_features = n_action_features
        self._n_exemplars = n_exemplars
        self._seed = seed
        self._kernel = kernel
        self._degree = degree
        self._gamma = gamma

        rng = CobaRandom(seed)

        #if there are no features then we are unable to define exemplars
        if n_action_features + n_context_features == 0: n_exemplars = 0

        feat_gen = lambda n: tuple(rng.gausses(n, 0, .75))
        one_hot_acts = OneHotEncoder().fit_encodes(range(n_actions))

        self._exemplars = [[
            feat_gen(n_action_features + n_context_features)
            for _ in range(n_exemplars)
        ] for _ in range(1 if n_action_features else n_actions)]
        weight_count = n_actions if n_exemplars == 0 else n_exemplars
        self._weights = [1 - 2 * w for w in rng.randoms(weight_count)]

        self._bias = 0

        if kernel == 'polynomial':
            #this ensures the dot-product between F and an exemplar is in [0,upper_bound]
            #This ensures that higher-order polynomials will remain reasonably well behaved
            upper_bound = (1.5)**(1 / degree) - 1
            self._exemplars = [[[upper_bound * ee / sum(e) for ee in e]
                                for e in E] for E in self._exemplars]

        def context(index: int) -> Context:
            return feat_gen(n_context_features) if n_context_features else None

        def actions(index: int, context: Context) -> Sequence[Action]:
            return [feat_gen(n_action_features) for _ in range(n_actions)
                    ] if n_action_features else one_hot_acts

        def reward(index: int, context: Context, action: Action) -> float:

            if n_exemplars == 0:
                return self._bias + self._weights[action.index(1)]

            #handles None context
            context = context or []

            if n_action_features:
                f = list(context) + list(action)
                W = self._weights
                E = self._exemplars[0]
            else:
                f = list(context)
                W = self._weights
                E = self._exemplars[action.index(1)]

            if kernel == "linear":
                K = lambda x1, x2: self._linear_kernel(x1, x2)
            if kernel == "polynomial":
                K = lambda x1, x2: self._polynomial_kernel(
                    x1, x2, self._degree)
            if kernel == "exponential":
                K = lambda x1, x2: self._exponential_kernel(
                    x1, x2, self._gamma)

            return self._bias + sum([w * K(e, f) for w, e in zip(W, E)])

        rewards = [
            reward(i, c, a) for i in range(100) for c in [context(i)]
            for a in actions(i, c)
        ]

        m = mean(rewards)
        s = (max(rewards) - min(rewards)) or 1

        self._bias = 0.5 - m / s
        self._weights = [w / s for w in self._weights]

        super().__init__(n_interactions, context, actions, reward)
Ejemplo n.º 2
0
    def __init__(self,
                 n_interactions: int,
                 n_actions: int = 10,
                 n_context_features: int = 10,
                 n_action_features: int = 10,
                 seed: int = 1) -> None:
        """Instantiate an MLPSythenticSimulation.

        Args:
            n_interactions: The number of interactions the simulation should have.
            n_actions: The number of actions each interaction should have.
            n_context_features: The number of features each context should have.
            n_action_features: The number of features each action should have.
            seed: The random number seed used to generate all features, weights and noise in the simulation.
        """

        self._args = (n_interactions, n_actions, n_context_features,
                      n_action_features, seed)

        self._n_actions = n_actions
        self._n_context_features = n_context_features
        self._n_action_features = n_action_features
        self._seed = seed

        rng = CobaRandom(seed)

        input_layer_size = n_context_features + n_action_features
        hidden_layer_size = 50

        self._bias = 0

        if input_layer_size:
            hidden_weights = [[
                rng.gausses(input_layer_size, 0, 1.5)
                for _ in range(hidden_layer_size)
            ] for _ in range(1 if n_action_features else n_actions)]
            hidden_activation = lambda x: 1 / (1 + math.exp(-x)
                                               )  #sigmoid activation
            hidden_output = lambda inputs, weights: hidden_activation(
                sum([i * w for i, w in zip(inputs, weights)]))
            self._output_weights = rng.gausses(hidden_layer_size)
        else:
            self._output_weights = rng.gausses(n_actions)

        def context(index: int) -> Context:
            return tuple(rng.gausses(
                n_context_features)) if n_context_features else None

        def actions(index: int, context: Context) -> Sequence[Action]:
            if n_action_features:
                return [(rng.gausses(n_action_features))
                        for _ in range(n_actions)]
            else:
                return OneHotEncoder().fit_encodes(range(n_actions))

        def reward(index: int, context: Context, action: Action) -> float:

            #handles None context
            context = context or []

            if not n_action_features and not n_context_features:
                return self._bias + self._output_weights[action.index(1)]

            if n_action_features:
                I = list(context) + list(action)
                W = self._output_weights
                H = hidden_weights[0]
            else:
                I = list(context)
                W = self._output_weights
                H = hidden_weights[action.index(1)]

            hidden_outputs = [hidden_output(I, h) for h in H]

            return self._bias + sum(
                [w * hout for w, hout in zip(W, hidden_outputs)])

        rewards = [
            reward(i, c, a) for i in range(100) for c in [context(i)]
            for a in actions(i, c)
        ]

        m = mean(rewards)
        s = (max(rewards) - min(rewards)) or 1

        self._bias = 0.5 - m / s
        self._output_weights = [w / s for w in self._output_weights]

        super().__init__(n_interactions, context, actions, reward)
Ejemplo n.º 3
0
    def __init__(self,
                 n_interactions: int,
                 n_actions: int = 10,
                 n_context_features: int = 10,
                 n_action_features: int = 10,
                 reward_features: Sequence[str] = ["a", "xa"],
                 seed: int = 1) -> None:
        """Instantiate a LinearSyntheticSimulation.

        Args:
            n_interactions: The number of interactions the simulation should have.
            n_actions: The number of actions each interaction should have.
            n_context_features: The number of features each context should have.
            n_action_features: The number of features each action should have.
            reward_features: The features in the simulation's linear reward function.
            seed: The random number seed used to generate all features, weights and noise in the simulation.
        """

        self._args = (n_interactions, n_actions, n_context_features,
                      n_action_features, reward_features, seed)

        self._n_actions = n_actions
        self._n_context_features = n_context_features
        self._n_action_features = n_action_features
        self._reward_features = reward_features
        self._seed = seed

        if not self._n_context_features:
            reward_features = list(
                set(filter(None,
                           [f.replace('x', '') for f in reward_features])))

        if not self._n_action_features:
            reward_features = list(
                set(filter(None,
                           [f.replace('a', '') for f in reward_features])))

        rng = CobaRandom(seed)
        feat_encoder = InteractionsEncoder(reward_features)

        #to try and make sure high-order polynomials are well behaved
        #we center our context and action features on 1 and give them
        #a very small amount of variance. Then, in post processing, we
        #shift and re-scale our reward to center and fill in [0,1].
        max_degree = max([len(f)
                          for f in reward_features]) if reward_features else 1
        feat_gen = lambda n: tuple([
            g * rng.choice([1, -1])
            for g in rng.gausses(n, mu=1, sigma=1 / (2 * max_degree))
        ])
        one_hot_acts = OneHotEncoder().fit_encodes(range(n_actions))

        feature_count = len(
            feat_encoder.encode(x=[1] * n_context_features,
                                a=[1] * n_action_features))
        weight_parts = 1 if n_action_features else n_actions
        weight_count = 1 if feature_count == 0 else feature_count

        self._weights = [[1 - 2 * w for w in rng.randoms(weight_count)]
                         for _ in range(weight_parts)]

        self._bias = 0
        self._clip = False

        def context(index: int) -> Context:
            return feat_gen(n_context_features) if n_context_features else None

        def actions(index: int, context: Context) -> Sequence[Action]:
            return [feat_gen(n_action_features) for _ in range(n_actions)
                    ] if n_action_features else one_hot_acts

        def reward(index: int, context: Context, action: Action) -> float:

            F = feat_encoder.encode(x=context, a=action) or [1]
            W = self._weights[0 if n_action_features else action.index(1)]

            return self._bias + sum([w * f for w, f in zip(W, F)])

        rewards = [
            reward(i, c, a) for i in range(100) for c in [context(i)]
            for a in actions(i, c)
        ]

        m = mean(rewards)
        s = (max(rewards) - min(rewards)) or 1

        self._bias = 0.5 - m / s
        self._weights = [[w / s for w in W] for W in self._weights]
        self._clip = True

        super().__init__(n_interactions, context, actions, reward)