Exemple #1
0
class MetaRmp(object):
    def __init__(self, policies, *args, use_cumulative_values=False, **kwargs):
        super(MetaRmp, self).__init__(*args, **kwargs)
        self.policies = policies
        self.meta_qregrets = ResourceVariable(tf.zeros([len(policies), 1]),
                                              trainable=False)
        self.use_cumulative_values = use_cumulative_values

    def num_policies(self):
        return len(self.policies)

    def meta_policy(self):
        return cpea.rm_policy(self.meta_qregrets)

    def policy_activation(self, predictions):
        policies = tf.stack([policy(predictions) for policy in self.policies],
                            axis=-1)
        meta_policy = tf.reshape(self.meta_policy(),
                                 [1, 1, self.num_policies()])
        return tf.reduce_sum(policies * meta_policy, axis=-1)

    def __call__(self, inputs, rewards):
        return ContextualKofnGame(
            tf.squeeze(self.template.prob_ith_element_is_sampled), rewards,
            self.policy(inputs))

    def loss_and_grad(self, inputs, rewards):
        meta_policy = self.meta_policy()
        with tf.GradientTape() as tape:
            predictions = self.model(inputs)

            policies = tf.stack(
                [policy(predictions) for policy in self.policies], axis=-1)
            expanded_meta_policy = tf.reshape(self.meta_policy(),
                                              [1, 1, self.num_policies()])
            policy = tf.reduce_sum(policies * expanded_meta_policy, axis=-1)

            r = tf.stop_gradient(
                ContextualKofnGame(
                    tf.squeeze(self.template.prob_ith_element_is_sampled),
                    rewards, policy).cfv)
            loss_value = self.loss(predictions, policy, r)

        evs = tf.reduce_mean(tf.reduce_sum(tf.expand_dims(r, axis=-1) *
                                           policies,
                                           axis=1),
                             axis=0,
                             keepdims=True)
        inst_r = evs - tf.matmul(evs, meta_policy)
        grad = tape.gradient(loss_value, self.model.variables)
        return loss_value, (zip(grad, self.model.variables), inst_r)

    def apply(self, grad):
        self.optimizer.apply_gradients(grad[0])
        self.meta_qregrets.assign(tf.maximum(grad[1] + self.meta_qregrets,
                                             0.0))
        return self
Exemple #2
0
 def __init__(self, name, init_value, lower_limit, upper_limit, step_size = 1e-6) : 
     global __all_variables__
     ResourceVariable.__init__(self, init_value, dtype = atfi.fptype(), trainable = True)
     self.init_value = init_value
     self.par_name = name
     self.step_size = step_size
     self.lower_limit = lower_limit
     self.upper_limit = upper_limit
     self.prev_value = None
     self.fixed = False
     self.error = 0.
     self.positive_error = 0.
     self.negative_error = 0.
     self.fitted_value = init_value
Exemple #3
0
 def __init__(self,
              new_variable_optimizer,
              use_locking=False,
              name=None,
              var_list=[]):
     super(CompositeOptimizer,
           self).__init__(use_locking,
                          type(self).__name__ if name is None else name)
     self._new_opt = new_variable_optimizer
     self._optimizers = None
     self.initializer = None
     self._num_updates = ResourceVariable(0, name='num_updates')
     if len(var_list) > 0:
         self._create_slots(var_list)
Exemple #4
0
class FitParameter:
    def __init__(self,
                 name,
                 init_value,
                 lower_limit,
                 upper_limit,
                 step_size=1e-6):
        self.var = ResourceVariable(init_value,
                                    shape=(),
                                    name=name,
                                    dtype=atfi.fptype(),
                                    trainable=True)
        self.init_value = init_value
        self.name = name
        self.step_size = step_size
        self.lower_limit = lower_limit
        self.upper_limit = upper_limit
        self.prev_value = None
        self.fixed = False
        self.error = 0.0
        self.positive_error = 0.0
        self.negative_error = 0.0
        self.fitted_value = init_value

    def update(self, value):
        if value != self.prev_value:
            self.var.assign(value)
            self.prev_value = value

    def __call__(self):
        return self.var

    def fix(self):
        self.fixed = True

    def float(self):
        self.fixed = False

    def setFixed(self, fixed):
        self.fixed = fixed

    def floating(self):
        """
        Return True if the parameter is floating and step size>0
        """
        return self.step_size > 0 and not self.fixed

    def numpy(self):
        return self.var.numpy()
Exemple #5
0
    def test_l1_mrr_linear_multiple_outputs(self):
        num_dimensions = 2
        num_players = 5
        num_examples = 10

        x = np.concatenate([
            np.random.normal(size=[num_examples, num_dimensions - 1]),
            np.ones([num_examples, 1])
        ],
                           axis=1).astype('float32')
        y = np.random.normal(
            size=[num_examples, num_players]).astype('float32')

        w = ResourceVariable(tf.zeros([num_dimensions, num_players]))

        loss = tf.reduce_mean(tf.keras.losses.mse(y, tf.matmul(x, w)))
        optimizer = CompositeOptimizer(
            lambda var: rm_optimizers.RmL1AmrrVariableOptimizer(var,
                                                                scale=1000.0),
            var_list=[w])

        self.assertEqual(0.0, tf.reduce_sum(tf.abs(w)).numpy())
        self.assertAlmostEqual(0.86844116, loss.numpy(), places=6)
        for t in range(10):
            with tf.GradientTape() as tape:
                loss = tf.reduce_mean(tf.keras.losses.mse(y, tf.matmul(x, w)))
            grad = tape.gradient(loss, [w])
            optimizer.apply_gradients(zip(grad, [w]))
            if t > 1:
                self.assertLess(loss.numpy(), 0.86844116)
        self.assertAlmostEqual(0.85839784, loss.numpy(), places=6)

        # Compare this to rm_optimizers.RmL1VariableOptimizer:
        w = ResourceVariable(tf.zeros([num_dimensions, num_players]))

        loss = tf.reduce_mean(tf.keras.losses.mse(y, tf.matmul(x, w)))
        optimizer = CompositeOptimizer(
            lambda var: rm_optimizers.RmL1VariableOptimizer(var, scale=1000.0),
            var_list=[w])

        self.assertEqual(0.0, tf.reduce_sum(tf.abs(w)).numpy())
        self.assertAlmostEqual(0.86844116, loss.numpy(), places=6)
        for t in range(10):
            with tf.GradientTape() as tape:
                loss = tf.reduce_mean(tf.keras.losses.mse(y, tf.matmul(x, w)))
            grad = tape.gradient(loss, [w])
            optimizer.apply_gradients(zip(grad, [w]))
        self.assertAlmostEqual(97.47674, loss.numpy(), places=4)
Exemple #6
0
    def test_inf_linear_single_output(self):
        num_dimensions = 2
        num_players = 1
        num_examples = 10

        x = np.concatenate([
            np.random.normal(size=[num_examples, num_dimensions - 1]),
            np.ones([num_examples, 1])
        ],
                           axis=1).astype('float32')
        y = np.random.normal(
            size=[num_examples, num_players]).astype('float32')

        w = ResourceVariable(tf.zeros([num_dimensions, num_players]))

        loss = tf.reduce_mean(tf.keras.losses.mse(y, tf.matmul(x, w)))
        optimizer = CompositeOptimizer(
            lambda var: rm_optimizers.RmInfVariableOptimizer(var, scale=0.8))

        self.assertEqual(0.0, tf.reduce_sum(tf.abs(w)).numpy())
        self.assertAlmostEqual(1.1386044, loss.numpy(), places=7)
        for t in range(50):
            with tf.GradientTape() as tape:
                loss = tf.reduce_mean(tf.keras.losses.mse(y, tf.matmul(x, w)))
            grad = tape.gradient(loss, [w])
            optimizer.apply_gradients(zip(grad, [w]))
            if t > 1:
                self.assertLess(loss.numpy(), 1.1386044)
        self.assertAlmostEqual(0.5145497, loss.numpy(), places=6)
        self.assertGreater(tf.reduce_sum(tf.abs(w)), 0.8)
Exemple #7
0
 def __init__(self,
              name,
              init_value,
              lower_limit,
              upper_limit,
              step_size=1e-6):
     self.var = ResourceVariable(init_value,
                                 shape=(),
                                 name=name,
                                 dtype=atfi.fptype(),
                                 trainable=True)
     self.init_value = init_value
     self.name = name
     self.step_size = step_size
     self.lower_limit = lower_limit
     self.upper_limit = upper_limit
     self.prev_value = None
     self.fixed = False
     self.error = 0.0
     self.positive_error = 0.0
     self.negative_error = 0.0
     self.fitted_value = init_value
Exemple #8
0
class CompositeOptimizer(optimizer.Optimizer):
    @classmethod
    def combine(cls, *new_variable_optimizer, **kwargs):
        return cls(lambda var, i: new_variable_optimizer[i](var), **kwargs)

    def __init__(self,
                 new_variable_optimizer,
                 use_locking=False,
                 name=None,
                 var_list=[]):
        super(CompositeOptimizer,
              self).__init__(use_locking,
                             type(self).__name__ if name is None else name)
        self._new_opt = new_variable_optimizer
        self._optimizers = None
        self.initializer = None
        self._num_updates = ResourceVariable(0, name='num_updates')
        if len(var_list) > 0:
            self._create_slots(var_list)

    def variables(self):
        return sum([list(opt.variables()) for opt in self._optimizers],
                   [self._num_updates])

    def _create_slots(self, var_list):
        if self._optimizers is None:
            self._optimizers = []
            initializers = [self._num_updates.initializer]
            pass_i = (len(
                signature(self._new_opt, follow_wrapped=False).parameters) > 1)
            for i in range(len(var_list)):
                var = var_list[i]
                self._optimizers.append(
                    (self._new_opt(var, i) if pass_i else self._new_opt(var)))
                initializers.append(self._optimizers[-1].initializer)
            self.initializer = tf.group(*initializers)
        return self.initializer

    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        grads, var_list = zip(*grads_and_vars)
        if self._optimizers is None: self._create_slots(var_list)
        updates = []
        for i in range(len(grads)):
            updates.append(self._apply_gradients(self._optimizers[i],
                                                 grads[i]))
        updates.append(
            self._num_updates.assign_add(1, use_locking=self._use_locking))
        return tf.group(*updates)

    def _apply_gradients(self, optimizer, grad):
        return optimizer.dense_update(grad, self._num_updates)
class TabularAdaNormalHedgeCurrent(TabularCfrCurrent):
    @classmethod
    def zeros(cls, num_info_sets, num_actions, *args, **kwargs):
        return cls(tf.zeros([num_info_sets, num_actions]),
                   tf.zeros([num_info_sets, num_actions]), *args, **kwargs)

    @classmethod
    def load(cls, name):
        return cls(*np.load('{}.npy'.format(name)))

    def save(self, name):
        np.save(name, [self.regrets, self._counts])
        return self

    def graph_save(self, name, sess):
        np.save(name, sess.run([self.regrets, self._counts]))
        return self

    def __init__(self, regrets, counts=None, degree=-1):
        self._counts = ResourceVariable(
            tf.zeros_like(regrets) if counts is None else counts)
        self._degree = degree
        super().__init__(regrets)

    def positive_projection(self, v):
        return general_normal_hedge_dt_positive_projection(v,
                                                           self._counts,
                                                           degree=self._degree)

    def update_with_cfv(self, cfv, rm_plus=False):
        evs = cpea.utility(self.policy(), cfv)
        regrets = cfv - evs

        r = self.regrets + regrets
        if rm_plus:
            r = tf.nn.relu(r)
        self._has_updated = True
        return evs, tf.group(self.regrets.assign(r),
                             self._counts.assign_add(tf.abs(regrets)))

    @property
    def variables(self):
        return super().variables + [self._counts]
Exemple #10
0
    def test_inf_single_column_weights_only(self):
        num_dimensions = 2
        num_players = 1
        max_value = 0.5

        w = ResourceVariable(tf.zeros([num_dimensions, num_players]))

        loss = tf.reduce_mean(w + max_value)
        optimizer = CompositeOptimizer(
            lambda var: rm_optimizers.RmInfVariableOptimizer(var,
                                                             scale=max_value))

        self.assertEqual(max_value, loss.numpy())
        with tf.GradientTape() as tape:
            loss = tf.reduce_mean(w + max_value)
        grad = tape.gradient(loss, [w])
        optimizer.apply_gradients(zip(grad, [w]))
        loss = tf.reduce_mean(w + max_value)
        self.assertEqual(0.0, loss.numpy())
Exemple #11
0
    def test_nn_two_column_weights_only(self):
        num_dimensions = 3
        num_players = 2

        w = ResourceVariable(tf.zeros([num_dimensions, num_players]))

        y = 0.1

        loss = tf.reduce_mean(tf.keras.losses.mse(tf.fill(w.shape, y), w))
        optimizer = CompositeOptimizer(
            lambda var: rm_optimizers.RmNnVariableOptimizer(var, scale=1.0))

        self.assertAlmostEqual(y * y, loss.numpy())
        for i in range(20):
            with tf.GradientTape() as tape:
                loss = tf.reduce_mean(
                    tf.keras.losses.mse(tf.fill(w.shape, y), w))
            grad = tape.gradient(loss, [w])
            optimizer.apply_gradients(zip(grad, [w]))
        loss = tf.reduce_mean(tf.keras.losses.mse(tf.fill(w.shape, y), w))
        self.assertAlmostEqual(0.0, loss.numpy())
 def __init__(self, regrets):
     self.regrets = ResourceVariable(regrets)
     self._has_updated = False
     self._policy = cpea.normalized(self.positive_projection(self.regrets),
                                    axis=1)
class TabularCfrCurrent(object):
    @classmethod
    def zeros(cls, num_info_sets, num_actions, *args, **kwargs):
        return cls(tf.zeros([num_info_sets, num_actions]), *args, **kwargs)

    @classmethod
    def load(cls, name):
        return cls(np.load('{}.npy'.format(name)))

    def __init__(self, regrets):
        self.regrets = ResourceVariable(regrets)
        self._has_updated = False
        self._policy = cpea.normalized(self.positive_projection(self.regrets),
                                       axis=1)

    def positive_projection(self, v):
        return rm_positive_projection(v)

    def save(self, name):
        np.save(name, self.regrets)
        return self

    def graph_save(self, name, sess):
        np.save(name, sess.run(self.regrets))
        return self

    def num_info_sets(self):
        return tf.shape(self.regrets)[0]

    def num_actions(self):
        return tf.shape(self.regrets)[1]

    def clear(self):
        self.regrets.assign(tf.zeros_like(self.regrets))

    def copy(self):
        return self.__class__(self.regrets)

    def policy(self):
        if tf.executing_eagerly() and self._has_updated:
            self._policy = cpea.normalized(self.positive_projection(
                self.regrets),
                                           axis=1)
            self._has_updated = False
        return self._policy

    def update(self, env, **kwargs):
        return self.update_with_cfv(env(self.policy()), **kwargs)

    def update_with_cfv(self, cfv, rm_plus=False):
        evs = cpea.utility(self.policy(), cfv)
        regrets = cfv - evs

        r = self.regrets + regrets
        if rm_plus:
            r = tf.nn.relu(r)
        self._has_updated = True
        return evs, self.regrets.assign(r)

    @property
    def variables(self):
        return [self.regrets]

    @property
    def initializer(self):
        return tf.group(*[v.initializer for v in self.variables])
 def __init__(self, cur, policy_sum, t=0):
     self._cur = cur
     self.policy_sum = ResourceVariable(policy_sum)
     self.t = ResourceVariable(t)
class TabularCfr(object):
    @classmethod
    def zeros(cls,
              num_info_sets,
              num_actions,
              *args,
              cur_cls=TabularCfrCurrent,
              **kwargs):
        return cls(cur_cls.zeros(num_info_sets, num_actions),
                   tf.zeros([num_info_sets, num_actions]), *args, **kwargs)

    @classmethod
    def load(cls, name, cur_cls=TabularCfrCurrent):
        return cls(cur_cls.load('{}.cur'.format(name)),
                   *np.load('{}.npy'.format(name)))

    def __init__(self, cur, policy_sum, t=0):
        self._cur = cur
        self.policy_sum = ResourceVariable(policy_sum)
        self.t = ResourceVariable(t)

    def save(self, name):
        self._cur.save('{}.cur'.format(name))
        np.save(name, [self.policy_sum, self.t])
        return self

    def graph_save(self, name, sess):
        self._cur.graph_save('{}.cur'.format(name), sess)
        np.save(name, sess.run([self.policy_sum, self.t]))
        return self

    @property
    def num_info_sets(self):
        return self._cur.num_info_sets

    @property
    def num_actions(self):
        return self._cur.num_actions

    def clear(self):
        self._cur.clear()
        self.policy_sum.assign(tf.zeros_like(self.policy_sum))
        self.t.assign(0)

    def copy(self, copy_t=False):
        if copy_t:
            return self.__class__(self._cur.copy(), self.policy_sum, self.t)
        else:
            return self.__class__(self._cur.copy(), self.policy_sum)

    @property
    def cur(self):
        return self._cur.policy

    def avg(self):
        return cpea.normalized(self.policy_sum, axis=1)

    def policy(self, mix_avg=1.0):
        use_cur = mix_avg < 1
        pol = 0.0
        if use_cur:
            cur = self.cur()
            pol = (1.0 - mix_avg) * cur

        use_avg = mix_avg > 0
        if use_avg:
            avg = self.avg()
            pol = mix_avg * avg + pol
        return pol

    def update(self, env, mix_avg=0.0, rm_plus=False, next_policy_sum=None):
        cur = self.cur()
        policy = (1.0 - mix_avg) * cur

        use_avg = mix_avg > 0
        if use_avg:
            avg = self.avg()
            policy = policy + mix_avg * avg

        evs, update_current = self._cur.update_with_cfv(env(policy),
                                                        rm_plus=rm_plus)
        update_t = self.t.assign_add(1)

        if next_policy_sum is None:
            next_policy_sum = (linear_avg_next_policy_sum
                               if rm_plus else uniform_avg_next_policy_sum)

        update_policy_sum = self.policy_sum.assign(
            next_policy_sum(self.policy_sum, cur,
                            tf.cast(self.t + 1, tf.float32)))
        return evs, tf.group(update_policy_sum, update_current, update_t)

    @property
    def variables(self):
        return self._cur.variables + [self.policy_sum, self.t]

    @property
    def initializer(self):
        return tf.group(*[v.initializer for v in self.variables])
 def __init__(self, regrets, counts=None, degree=-1):
     self._counts = ResourceVariable(
         tf.zeros_like(regrets) if counts is None else counts)
     self._degree = degree
     super().__init__(regrets)
Exemple #17
0
 def _get_or_make_slot(self, val, name, **kwargs):
     self._slots[name] = ResourceVariable(val, trainable=False, **kwargs)
     return self._slots[name]
Exemple #18
0
 def __init__(self, policies, *args, use_cumulative_values=False, **kwargs):
     super(MetaRmp, self).__init__(*args, **kwargs)
     self.policies = policies
     self.meta_qregrets = ResourceVariable(tf.zeros([len(policies), 1]),
                                           trainable=False)
     self.use_cumulative_values = use_cumulative_values