Example #1
0
    def testSecondOrderGradientCalculation(self):
        param_list = [
            "prune_option=second_order_gradient",
            "gradient_decay_rate=0.5",
        ]
        test_spec = ",".join(param_list)
        pruning_hparams = pruning.get_pruning_hparams().parse(test_spec)
        tf.logging.info(pruning_hparams)

        w = tf.Variable(tf.linspace(1.0, 10.0, 10), name="weights")
        _ = pruning.apply_mask(w, prune_option="second_order_gradient")

        p = pruning.Pruning(pruning_hparams)
        old_weight_update_op = p.old_weight_update_op()
        old_old_weight_update_op = p.old_old_weight_update_op()
        gradient_update_op = p.gradient_update_op()

        with self.cached_session() as session:
            tf.global_variables_initializer().run()
            session.run(old_weight_update_op)
            session.run(old_old_weight_update_op)
            session.run(tf.assign(w, tf.math.scalar_mul(2.0, w)))
            session.run(gradient_update_op)

            old_weights = pruning.get_old_weights()
            old_old_weights = pruning.get_old_old_weights()
            gradients = pruning.get_gradients()

            old_weight = old_weights[0]
            old_old_weight = old_old_weights[0]
            gradient = gradients[0]
            self.assertAllEqual(
                gradient.eval(),
                tf.math.scalar_mul(
                    0.5, tf.nn.l2_normalize(tf.linspace(1.0, 10.0,
                                                        10))).eval())
            self.assertAllEqual(old_weight.eval(), old_old_weight.eval())
Example #2
0
def get_checkpoint_init_fn():
    """Returns the checkpoint init_fn if the checkpoint is provided."""
    if FLAGS.fine_tune_checkpoint:
        variables_to_restore = slim.get_variables_to_restore()
        global_step_reset = tf.assign(tf.train.get_or_create_global_step(), 0)
        # When restoring from a floating point model, the min/max values for
        # quantized weights and activations are not present.
        # We instruct slim to ignore variables that are missing during restoration
        # by setting ignore_missing_vars=True
        slim_init_fn = slim.assign_from_checkpoint_fn(
            FLAGS.fine_tune_checkpoint,
            variables_to_restore,
            ignore_missing_vars=True)

        def init_fn(sess):
            slim_init_fn(sess)
            # If we are restoring from a floating point model, we need to initialize
            # the global step to zero for the exponential decay to result in
            # reasonable learning rates.
            sess.run(global_step_reset)

        return init_fn
    else:
        return None
    def _setup_graph(self,
                     n_inp,
                     n_out,
                     drop_frac,
                     start_iter=1,
                     end_iter=4,
                     freq_iter=2):
        """Setups a trivial training procedure for sparse training."""
        tf.reset_default_graph()
        optim = tf.train.GradientDescentOptimizer(0.1)
        sparse_optim = sparse_optimizers.SparseSETOptimizer(
            optim, start_iter, end_iter, freq_iter, drop_fraction=drop_frac)
        x = tf.random.uniform((1, n_inp))
        y = layers.masked_fully_connected(x, n_out, activation_fn=None)
        global_step = tf.train.get_or_create_global_step()
        weight = pruning.get_weights()[0]
        # There is one masked layer to be trained.
        mask = pruning.get_masks()[0]
        # Around half of the values of the mask is set to zero with `mask_update`.
        mask_update = tf.assign(
            mask,
            tf.constant(np.random.choice([0, 1],
                                         size=(n_inp, n_out),
                                         p=[1. / 2, 1. / 2]),
                        dtype=tf.float32))
        loss = tf.reduce_mean(y)
        global_step = tf.train.get_or_create_global_step()
        train_op = sparse_optim.minimize(loss, global_step)

        # Init
        sess = tf.Session()
        init = tf.global_variables_initializer()
        sess.run(init)
        sess.run([mask_update])

        return sess, train_op, mask, weight, global_step
Example #4
0
def gen_train_op(cost, params, step, iters, flags):
    """Build the generator train op."""
    if flags.lr_decay == 'linear':
        step_lr = (1. - (tf.cast(step, tf.float32) / iters))
    elif flags.lr_decay == 'quadratic':
        step_lr = ((1. - (tf.cast(step, tf.float32) / iters))**2)
    elif flags.lr_decay == 'none':
        step_lr = 1.
    train_op = tf.train.AdamOptimizer(step_lr * flags.lr_g, flags.beta1,
                                      flags.beta2).minimize(
                                          cost,
                                          var_list=params,
                                          colocate_gradients_with_ops=True)

    if flags.weight_decay_g is not None:
        decay = (step_lr * flags.weight_decay_g)
        with tf.control_dependencies([train_op]):
            weights = [p for p in params if 'weights' in p.name]
            decayed = [w - (decay * w) for w in weights]
            decay_op = tf.group(
                *[tf.assign(w, d) for w, d in zip(weights, decayed)])
        train_op = decay_op

    return train_op
Example #5
0
    def testAppendGradientsWithLossScaleWithtNan(self):
        v = tf.Variable(0)
        training_ops = []
        get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)]
        loss_scale_params = variable_mgr_util.AutoLossScaleParams(
            enable_auto_loss_scale=True,
            loss_scale=tf.Variable(4, dtype=tf.float32),
            loss_scale_normal_steps=tf.Variable(10),
            inc_loss_scale_every_n=10,
            is_chief=True)
        variable_mgr_util.append_gradients_with_loss_scale(
            training_ops,
            get_apply_gradients_ops_func,
            loss_scale_params,
            grad_has_inf_nan=tf.constant(True))

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(training_ops)
            self.assertEqual(sess.run(v), 0)  # Skip updating for v.
            # halve loss_scale and reset local_scale_normal_steps.
            self.assertEqual(sess.run(loss_scale_params.loss_scale), 2)
            self.assertEqual(
                sess.run(loss_scale_params.loss_scale_normal_steps), 0)
def vq_discrete_bottleneck(x, hparams):
  """Simple vector quantized discrete bottleneck."""
  tf.logging.info("Using EMA with beta = {}".format(hparams.beta))
  bottleneck_size = 2**hparams.bottleneck_bits
  x_shape = common_layers.shape_list(x)
  x = tf.reshape(x, [-1, hparams.hidden_size])
  x_means_hot, e_loss = vq_nearest_neighbor(
      x, hparams)
  means, ema_means, ema_count = (hparams.means, hparams.ema_means,
                                 hparams.ema_count)

  # Update the ema variables
  updated_ema_count = moving_averages.assign_moving_average(
      ema_count,
      tf.reduce_sum(x_means_hot, axis=0),
      hparams.decay,
      zero_debias=False)

  dw = tf.matmul(x_means_hot, x, transpose_a=True)
  updated_ema_means = moving_averages.assign_moving_average(
      ema_means, dw, hparams.decay, zero_debias=False)
  n = tf.reduce_sum(updated_ema_count, axis=-1, keepdims=True)
  updated_ema_count = (
      (updated_ema_count + hparams.epsilon) /
      (n + bottleneck_size * hparams.epsilon) * n)
  # pylint: disable=g-no-augmented-assignment
  updated_ema_means = updated_ema_means / tf.expand_dims(
      updated_ema_count, axis=-1)
  # pylint: enable=g-no-augmented-assignment
  with tf.control_dependencies([e_loss]):
    update_means = tf.assign(means, updated_ema_means)
    with tf.control_dependencies([update_means]):
      loss = hparams.beta * e_loss

  discrete = tf.reshape(x_means_hot, x_shape[:-1] + [bottleneck_size])
  return discrete, loss
Example #7
0
def set_vars(var_to_value_dict: dict) -> None:
    """Set the values of given tf.Variables.

    Equivalent to the following, but more efficient and does not bloat the tf graph:
    tflib.run([tf.assign(var, value) for var, value in var_to_value_dict.items()]
    """
    assert_tf_initialized()
    ops = []
    feed_dict = {}

    for var, value in var_to_value_dict.items():
        assert is_tf_expression(var)

        try:
            setter = tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/setter:0"))  # look for existing op
        except KeyError:
            with absolute_name_scope(var.name.split(":")[0]):
                with tf.control_dependencies(None):  # ignore surrounding control_dependencies
                    setter = tf.assign(var, tf.placeholder(var.dtype, var.shape, "new_value"), name="setter")  # create new setter

        ops.append(setter)
        feed_dict[setter.op.inputs[1]] = value

    run(ops, feed_dict)
Example #8
0
File: krylov.py Project: cthl/sqgn
    def _prepare_solve(self, rs, ps):
        ops = []

        if self._conf['zero_guess']:
            for r, b, p in zip(rs, self._bs, ps):
                # Set initial residual.
                ops.append(tf.assign(r, b))
                # Set initial search direction.
                ops.append(tf.assign(p, b))
        else:
            for r, b, p, Az in zip(rs, self._bs, ps, self._Azs):
                # Set initial residual.
                ops.append(tf.assign(r, b - Az))
                # Set initial search direction.
                ops.append(tf.assign(p, b - Az))

        ops.append(tf.assign(self._rTr, tf.zeros(shape=[], dtype=rs[0].dtype)))

        ops.append(tf.assign(self._indefinite, False))

        return tf.group(ops)
Example #9
0
# _*_ coding utf-8 _*_
# Author:94342
# Time:  2020/9/1717:09
# File:  New03.py
# Engine:PyCharm


import tensorflow.compat.v1 as tf


if __name__ == '__main__':
    tf.compat.v1.disable_eager_execution()

    v1 = tf.Variable(0, name='counter')
    one = tf.constant(1)
    temp = tf.add(v1, one)
    process = tf.assign(v1, temp)
    init = tf.initialize_all_variables()

    with tf.Session() as sess:
        sess.run(init)
        print(sess.run(v1))
        for i in range(3):
            sess.run(process)
            print(sess.run(v1))
Example #10
0
    def _forward(self, x, y, model_params, init_states, is_training=False):
        """Computes the logits.

    Args:
      x: [batch_size, num_steps], input batch.
      y: [batch_size, num_steps], output batch.
      model_params: a `dict` of params to use.
      init_states: a `dict` of params to use.
      is_training: if `True`, will apply regularizations.

    Returns:
      loss: scalar, cross-entropy loss
    """
        w_emb = model_params['w_emb']
        w_lstm = model_params['w_lstm']
        w_soft = model_params['w_soft']
        prev_c = init_states['c']
        prev_h = init_states['h']

        emb = tf.nn.embedding_lookup(w_emb, x)
        if is_training:
            emb = tf.layers.dropout(
                emb,
                self.params.drop_i,
                [self.params.batch_size, 1, self.params.emb_size],
                training=True)

            layer_masks = [None]
            for _ in range(1, self.params.num_layers - 1):
                mask = _gen_mask(
                    [self.params.batch_size, self.params.hidden_size],
                    self.params.drop_l)
                layer_masks.append(mask)
            layer_masks.append(None)
        else:
            layer_masks = [None] * self.params.num_layers

        out_c, out_h, all_h = _lstm(emb, prev_c, prev_h, w_lstm, layer_masks)
        top_h = all_h[-1]
        if is_training:
            top_h = tf.layers.dropout(
                top_h,
                self.params.drop_o,
                [self.params.batch_size, 1, self.params.emb_size],
                training=True)

        carry_on = []
        for var, val in zip(prev_c + prev_h, out_c + out_h):
            carry_on.append(tf.assign(var, val))

        logits = tf.einsum('bnh,vh->bnv', top_h, w_soft)
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                              logits=logits)
        loss = tf.reduce_mean(loss)  # TODO(hyhieu): watch for num_steps

        reg_loss = loss  # loss + regularization_terms, for training only
        if is_training:
            # L2 weight reg
            reg_loss += self.params.weight_decay * tf.add_n(
                [tf.reduce_sum(w**2) for w in tf.trainable_variables()])

            # activation L2 reg
            reg_loss += self.params.alpha * tf.add_n(
                [tf.reduce_mean(h**2) for h in all_h[:-1]])

            # activation slowness L2 reg
            reg_loss += self.params.beta * tf.add_n([
                tf.reduce_mean((h[:, 1:, :] - h[:, :-1, :])**2)
                for h in all_h[:-1]
            ])

        with tf.control_dependencies(carry_on):
            loss = tf.identity(loss)
            if is_training:
                reg_loss = tf.identity(reg_loss)

        return reg_loss, loss
Example #11
0
    def __init__(self, net_params, batch_size, num_classes):
        """
        Defines the TensorFlow model, loss, optimisation and accuracy. Then
        loads the MXNET weights into the model.

        """
        self.gpu_policy = _utils.TensorFlowGPUPolicy()
        self.gpu_policy.start()

        for key in net_params.keys():
            net_params[key] = _utils.convert_shared_float_array_to_numpy(
                net_params[key])

        _tf.reset_default_graph()

        self.num_classes = num_classes
        self.batch_size = batch_size

        self.input = _tf.placeholder(_tf.float32, [None, 28, 28, 1])

        self.one_hot_labels = _tf.placeholder(_tf.int32,
                                              [None, self.num_classes])

        # Weights
        weights = {
            'drawing_conv0_weight':
            _tf.Variable(_tf.zeros([3, 3, 1, 16]),
                         name='drawing_conv0_weight'),
            'drawing_conv1_weight':
            _tf.Variable(_tf.zeros([3, 3, 16, 32]),
                         name='drawing_conv1_weight'),
            'drawing_conv2_weight':
            _tf.Variable(_tf.zeros([3, 3, 32, 64]),
                         name='drawing_conv2_weight'),
            'drawing_dense0_weight':
            _tf.Variable(_tf.zeros([576, 128]), name='drawing_dense0_weight'),
            'drawing_dense1_weight':
            _tf.Variable(_tf.zeros([128, self.num_classes]),
                         name='drawing_dense1_weight')
        }

        # Biases
        biases = {
            'drawing_conv0_bias':
            _tf.Variable(_tf.zeros([16]), name='drawing_conv0_bias'),
            'drawing_conv1_bias':
            _tf.Variable(_tf.zeros([32]), name='drawing_conv1_bias'),
            'drawing_conv2_bias':
            _tf.Variable(_tf.zeros([64]), name='drawing_conv2_bias'),
            'drawing_dense0_bias':
            _tf.Variable(_tf.zeros([128]), name='drawing_dense0_bias'),
            'drawing_dense1_bias':
            _tf.Variable(_tf.zeros([self.num_classes]),
                         name='drawing_dense1_bias')
        }

        conv_1 = _tf.nn.conv2d(self.input,
                               weights["drawing_conv0_weight"],
                               strides=1,
                               padding='SAME')
        conv_1 = _tf.nn.bias_add(conv_1, biases["drawing_conv0_bias"])
        relu_1 = _tf.nn.relu(conv_1)
        pool_1 = _tf.nn.max_pool2d(relu_1,
                                   ksize=[1, 2, 2, 1],
                                   strides=[1, 2, 2, 1],
                                   padding='VALID')

        conv_2 = _tf.nn.conv2d(pool_1,
                               weights["drawing_conv1_weight"],
                               strides=1,
                               padding='SAME')
        conv_2 = _tf.nn.bias_add(conv_2, biases["drawing_conv1_bias"])
        relu_2 = _tf.nn.relu(conv_2)
        pool_2 = _tf.nn.max_pool2d(relu_2,
                                   ksize=[1, 2, 2, 1],
                                   strides=[1, 2, 2, 1],
                                   padding='VALID')

        conv_3 = _tf.nn.conv2d(pool_2,
                               weights["drawing_conv2_weight"],
                               strides=1,
                               padding='SAME')
        conv_3 = _tf.nn.bias_add(conv_3, biases["drawing_conv2_bias"])
        relu_3 = _tf.nn.relu(conv_3)
        pool_3 = _tf.nn.max_pool2d(relu_3,
                                   ksize=[1, 2, 2, 1],
                                   strides=[1, 2, 2, 1],
                                   padding='VALID')

        # Flatten the data to a 1-D vector for the fully connected layer
        fc1 = _tf.reshape(pool_3, (-1, 576))

        fc1 = _tf.nn.xw_plus_b(fc1,
                               weights=weights["drawing_dense0_weight"],
                               biases=biases["drawing_dense0_bias"])

        fc1 = _tf.nn.relu(fc1)

        out = _tf.nn.xw_plus_b(fc1,
                               weights=weights["drawing_dense1_weight"],
                               biases=biases["drawing_dense1_bias"])
        softmax_out = _tf.nn.softmax(out)

        self.predictions = softmax_out

        # Loss
        self.cost = _tf.losses.softmax_cross_entropy(
            logits=out,
            onehot_labels=self.one_hot_labels,
            reduction=_tf.losses.Reduction.NONE)

        # Optimizer
        self.optimizer = _tf.train.AdamOptimizer(learning_rate=0.001).minimize(
            self.cost)

        # Predictions
        correct_prediction = _tf.equal(_tf.argmax(self.predictions, 1),
                                       _tf.argmax(self.one_hot_labels, 1))

        self.sess = _tf.Session()
        self.sess.run(_tf.global_variables_initializer())

        # Assign the initialised weights from C++ to tensorflow
        layers = [
            'drawing_conv0_weight', 'drawing_conv0_bias',
            'drawing_conv1_weight', 'drawing_conv1_bias',
            'drawing_conv2_weight', 'drawing_conv2_bias',
            'drawing_dense0_weight', 'drawing_dense0_bias',
            'drawing_dense1_weight', 'drawing_dense1_bias'
        ]

        for key in layers:
            if 'bias' in key:
                self.sess.run(
                    _tf.assign(
                        _tf.get_default_graph().get_tensor_by_name(key + ":0"),
                        net_params[key]))
            else:
                if 'drawing_dense0_weight' in key:
                    '''
                    To make output of CoreML pool3 (NCHW) compatible with TF (NHWC).
                    Decompose FC weights to NCHW. Transpose to NHWC. Reshape back to FC.
                    '''
                    coreml_128_576 = net_params[key]
                    coreml_128_576 = _np.reshape(coreml_128_576,
                                                 (128, 64, 3, 3))
                    coreml_128_576 = _np.transpose(coreml_128_576,
                                                   (0, 2, 3, 1))
                    coreml_128_576 = _np.reshape(coreml_128_576, (128, 576))
                    self.sess.run(
                        _tf.assign(
                            _tf.get_default_graph().get_tensor_by_name(key +
                                                                       ":0"),
                            _np.transpose(coreml_128_576, (1, 0))))
                elif 'dense' in key:
                    dense_weights = _utils.convert_dense_coreml_to_tf(
                        net_params[key])
                    self.sess.run(
                        _tf.assign(
                            _tf.get_default_graph().get_tensor_by_name(key +
                                                                       ":0"),
                            dense_weights))
                else:
                    # TODO: Call _utils.convert_conv2d_coreml_to_tf when #2513 is merged
                    self.sess.run(
                        _tf.assign(
                            _tf.get_default_graph().get_tensor_by_name(key +
                                                                       ":0"),
                            _np.transpose(net_params[key], (2, 3, 1, 0))))
Example #12
0
 def _resource_apply_dense(self, grad, handle):
     var = handle
     grad = tf.to_float(grad)
     grad_squared = tf.square(grad) + self._epsilon1
     grad_squared_mean = tf.reduce_mean(grad_squared)
     decay_rate = self._call_if_callable(self._decay_rate)
     update_scale = self._call_if_callable(self._learning_rate)
     update_scale = tf.convert_to_tensor(update_scale, name="update_scale")
     update_scale = tf.cast(update_scale,
                            grad_squared_mean.dtype.base_dtype)
     old_val = var
     if var.dtype.base_dtype == tf.bfloat16:
         old_val = tf.to_float(self._parameter_encoding.decode(old_val))
     if self._multiply_by_parameter_scale:
         update_scale *= tf.to_float(self._parameter_scale(old_val))
     # HACK: Make things dependent on grad.
     # This confounds the XLA rewriter and keeps it from fusing computations
     # across different variables.  This fusion is a bad for HBM usage, since
     # it causes the gradients to persist in memory.
     decay_rate += grad_squared_mean * 1e-30
     update_scale += grad_squared_mean * 1e-30
     # END HACK
     mixing_rate = 1.0 - decay_rate
     shape = var.get_shape().as_list()
     updates = []
     if self._should_use_factored_second_moment_estimate(shape):
         grad_squared_row_mean = tf.reduce_mean(grad_squared, -1)
         grad_squared_col_mean = tf.reduce_mean(grad_squared, -2)
         vr = self.get_slot(var, "vr")
         new_vr = (decay_rate * vr + mixing_rate * grad_squared_row_mean)
         vc = self.get_slot(var, "vc")
         new_vc = (decay_rate * vc + mixing_rate * grad_squared_col_mean)
         vr_update = tf.assign(vr, new_vr, use_locking=self._use_locking)
         vc_update = tf.assign(vc, new_vc, use_locking=self._use_locking)
         updates = [vr_update, vc_update]
         long_term_mean = tf.reduce_mean(new_vr, -1, keepdims=True)
         r_factor = tf.rsqrt(new_vr / long_term_mean)
         c_factor = tf.rsqrt(new_vc)
         x = grad * tf.expand_dims(r_factor, -1) * tf.expand_dims(
             c_factor, -2)
     else:
         v = self.get_slot(var, "v")
         new_v = decay_rate * v + mixing_rate * grad_squared
         v_update = tf.assign(v, new_v, use_locking=self._use_locking)
         updates = [v_update]
         x = grad * tf.rsqrt(new_v)
     if self._clipping_threshold is not None:
         clipping_denom = tf.maximum(
             1.0,
             reduce_rms(x) / self._clipping_threshold)
         x /= clipping_denom
     subtrahend = update_scale * x
     if self._beta1:
         m = self.get_slot(var, "m")
         new_m = self._beta1 * tf.to_float(m) + (1.0 -
                                                 self._beta1) * subtrahend
         subtrahend = new_m
         new_m = common_layers.cast_like(new_m, var)
         updates.append(tf.assign(m, new_m, use_locking=self._use_locking))
     new_val = tf.to_float(old_val) - subtrahend
     if var.dtype.base_dtype == tf.bfloat16:
         new_val = self._parameter_encoding.encode(new_val,
                                                   self._quantization_noise)
     if self._simulated_quantize_bits:
         new_val = quantization.simulated_quantize(
             var - subtrahend, self._simulated_quantize_bits,
             self._quantization_noise)
     new_val = tf.cast(new_val, var.dtype)
     var_update = tf.assign(var, new_val, use_locking=self._use_locking)
     updates = [var_update] + updates
     return tf.group(*updates)
Example #13
0
    def __init__(self, args):
        self.args = args
        dense = tf.layers.dense

        inputs = tf.placeholder(shape=(args.batch_size, None),
                                dtype=tf.int32,
                                name='inputs')
        time_inputs = tf.placeholder(shape=(args.batch_size, None),
                                     dtype=tf.int32,
                                     name='time_inputs')
        mask = tf.placeholder(shape=(args.batch_size, None),
                              dtype=tf.float32,
                              name='inputs_mask')
        seq_length = tf.placeholder(shape=args.batch_size,
                                    dtype=tf.float32,
                                    name='seq_length')

        self.s_inputs = s_inputs = tf.placeholder(shape=args.batch_size,
                                                  dtype=tf.int32,
                                                  name='s_inputs')
        self.d_inputs = d_inputs = tf.placeholder(shape=args.batch_size,
                                                  dtype=tf.int32,
                                                  name='d_inputs')

        self.input_form = [inputs, time_inputs, mask, seq_length]

        decoder_inputs = tf.concat(
            [tf.zeros(shape=(args.batch_size, 1), dtype=tf.int32), inputs],
            axis=1)
        decoder_targets = tf.concat(
            [inputs,
             tf.zeros(shape=(args.batch_size, 1), dtype=tf.int32)],
            axis=1)
        decoder_mask = tf.concat(
            [mask,
             tf.zeros(shape=(args.batch_size, 1), dtype=tf.float32)],
            axis=1)

        x_size = out_size = args.map_size[0] * args.map_size[1]
        embeddings = tf.Variable(tf.random_uniform(
            [x_size, args.x_latent_size], -1.0, 1.0),
                                 dtype=tf.float32)
        encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, inputs)
        decoder_inputs_embedded = tf.nn.embedding_lookup(
            embeddings, decoder_inputs)

        time_embeddings = tf.Variable(tf.random_uniform(
            [49, args.x_latent_size], -1.0, 1.0),
                                      dtype=tf.float32)
        encoder_time_inputs_embedded = tf.nn.embedding_lookup(
            time_embeddings, time_inputs)

        time_mean = tf.reduce_mean(encoder_time_inputs_embedded, axis=1)
        mu_c_delta = dense(time_mean, args.rnn_size, activation=None)
        stack_mu_c_delta = tf.stack([mu_c_delta] * args.mem_num, axis=1)
        log_sigma_sq_c_delta = dense(time_mean, args.rnn_size, activation=None)
        stack_log_sigma_sq_c_delta = tf.stack([log_sigma_sq_c_delta] *
                                              args.mem_num,
                                              axis=1)

        with tf.variable_scope("encoder"):
            encoder_cell = tf.nn.rnn_cell.GRUCell(args.rnn_size)
            _, encoder_final_state = tf.nn.dynamic_rnn(
                encoder_cell,
                encoder_inputs_embedded,
                sequence_length=seq_length,
                dtype=tf.float32,
            )

        with tf.variable_scope("clusters"):
            mu_c = tf.get_variable("mu_c", [args.mem_num, args.rnn_size],
                                   initializer=tf.random_uniform_initializer(
                                       0.0, 1.0))
            log_sigma_sq_c = tf.get_variable(
                "sigma_sq_c", [args.mem_num, args.rnn_size],
                initializer=tf.constant_initializer(0.0),
                trainable=False)
            log_pi_prior = tf.get_variable(
                "log_pi_prior",
                args.mem_num,
                initializer=tf.constant_initializer(0.0),
                trainable=False)
            pi_prior = tf.nn.softmax(log_pi_prior)

            init_mu_c = tf.placeholder(shape=(args.mem_num, args.rnn_size),
                                       dtype=tf.float32,
                                       name='init_mu_c')
            init_sigma_c = tf.placeholder(shape=(args.mem_num, args.rnn_size),
                                          dtype=tf.float32,
                                          name='init_sigma_c')
            init_pi = tf.placeholder(shape=args.mem_num,
                                     dtype=tf.float32,
                                     name='init_pi')
            self.cluster_init = [init_mu_c, init_sigma_c, init_pi]

            self.init_mu_c_op = tf.assign(mu_c, init_mu_c)
            self.init_sigma_c_op = tf.assign(log_sigma_sq_c, init_sigma_c)
            self.init_pi_op = tf.assign(log_pi_prior, init_pi)

            self.mu_c = mu_c
            self.sigma_c = log_sigma_sq_c
            self.pi = pi_prior

            stack_mu_c = tf.stack([mu_c] * args.batch_size, axis=0)
            stack_log_sigma_sq_c = tf.stack([log_sigma_sq_c] * args.batch_size,
                                            axis=0)

            stack_mu_c += stack_mu_c_delta
            stack_log_sigma_sq_c += stack_log_sigma_sq_c_delta

        with tf.variable_scope("latent"):
            mu_z = dense(encoder_final_state, args.rnn_size,
                         activation=None)  # shape=(128, 256)
            log_sigma_sq_z = dense(encoder_final_state,
                                   args.rnn_size,
                                   activation=None)  # shape=(128, 256)

            eps_z = tf.random_normal(shape=tf.shape(log_sigma_sq_z),
                                     mean=0,
                                     stddev=1,
                                     dtype=tf.float32)
            z = mu_z + tf.sqrt(tf.exp(log_sigma_sq_z)) * eps_z

            stack_mu_z = tf.stack([mu_z] * args.mem_num, axis=1)
            stack_log_sigma_sq_z = tf.stack([log_sigma_sq_z] * args.mem_num,
                                            axis=1)
            stack_z = tf.stack([z] * args.mem_num, axis=1)
            self.batch_post_embedded = z

        with tf.variable_scope("sd_attention"):
            s_embeddings = tf.Variable(tf.random_uniform(
                [x_size, args.rnn_size], -1.0, 1.0),
                                       dtype=tf.float32)
            d_embeddings = tf.Variable(tf.random_uniform(
                [x_size, args.rnn_size], -1.0, 1.0),
                                       dtype=tf.float32)
            s = tf.nn.embedding_lookup(s_embeddings, s_inputs)
            d = tf.nn.embedding_lookup(d_embeddings, d_inputs)
            sd = tf.concat([s, d], axis=1)
            hsd1 = dense(sd, args.rnn_size, activation=tf.nn.relu)
            sd_logits = dense(hsd1, args.mem_num, activation=tf.nn.relu)
            sd_att = tf.nn.softmax(sd_logits)

        # for batch_latent_loss
        with tf.variable_scope("attention"):
            att_logits = -tf.reduce_sum(
                tf.square(stack_z - stack_mu_c) / tf.exp(stack_log_sigma_sq_c),
                axis=-1)
            att = tf.nn.softmax(att_logits) + 1e-10
            self.batch_att = att

        def generation(h):
            with tf.variable_scope("generation", reuse=tf.AUTO_REUSE):
                with tf.variable_scope("decoder"):
                    decoder_init_state = h
                    decoder_cell = tf.nn.rnn_cell.GRUCell(args.rnn_size)
                    decoder_outputs, _ = tf.nn.dynamic_rnn(
                        decoder_cell,
                        decoder_inputs_embedded,
                        initial_state=decoder_init_state,
                        sequence_length=seq_length,
                        dtype=tf.float32,
                    )
                with tf.variable_scope("outputs"):
                    out_w = tf.get_variable(
                        "out_w", [out_size, args.rnn_size], tf.float32,
                        tf.random_normal_initializer(stddev=0.02))
                    out_b = tf.get_variable(
                        "out_b", [out_size],
                        tf.float32,
                        initializer=tf.constant_initializer(0.0))

                    batch_rec_loss = tf.reduce_mean(
                        decoder_mask * tf.reshape(
                            tf.nn.sampled_softmax_loss(
                                weights=out_w,
                                biases=out_b,
                                labels=tf.reshape(decoder_targets,
                                                  [-1, 1]),  # shape=(None, 1)
                                inputs=tf.reshape(
                                    decoder_outputs,
                                    [-1, args.rnn_size]),  # shape=(None, 256)
                                num_sampled=args.neg_size,
                                num_classes=out_size),
                            [args.batch_size, -1]),
                        axis=-1)
                    target_out_w = tf.nn.embedding_lookup(
                        out_w, decoder_targets)
                    target_out_b = tf.nn.embedding_lookup(
                        out_b, decoder_targets)
                    batch_likelihood = tf.reduce_mean(
                        decoder_mask * tf.log_sigmoid(
                            tf.reduce_sum(decoder_outputs * target_out_w, -1) +
                            target_out_b),
                        axis=-1,
                        name="batch_likelihood")

                    batch_latent_loss = 0.5 * tf.reduce_sum(
                        att * tf.reduce_mean(
                            stack_log_sigma_sq_c + tf.exp(stack_log_sigma_sq_z)
                            / tf.exp(stack_log_sigma_sq_c) +
                            tf.square(stack_mu_z - stack_mu_c) /
                            tf.exp(stack_log_sigma_sq_c),
                            axis=-1),
                        axis=-1) - 0.5 * tf.reduce_mean(1 + log_sigma_sq_z,
                                                        axis=-1)
                    batch_cate_loss = tf.reduce_mean(
                        tf.reduce_mean(att, axis=0) *
                        tf.log(tf.reduce_mean(att, axis=0)))
                return batch_rec_loss, batch_latent_loss, batch_cate_loss, batch_likelihood

        if args.eval:
            sd_z = tf.matmul(
                tf.one_hot(tf.argmax(sd_att, axis=-1),
                           depth=args.mem_num,
                           axis=-1), mu_c)
            # sd_z = tf.matmul(tf.one_hot(tf.argmax(sd_att, axis=-1), depth=args.mem_num, axis=-1), mu_c+tf.reduce_mean(stack_mu_c_delta, 0))
            results = generation(sd_z)
            self.batch_likelihood = results[-1]
        else:
            results = generation(z)
            self.batch_likelihood = results[-1]
            self.rec_loss = rec_loss = tf.reduce_mean(results[0])
            self.latent_loss = latent_loss = tf.reduce_mean(results[1])
            self.cate_loss = cate_loss = results[2]

            self.sd_loss = sd_loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(labels=att,
                                                           logits=sd_logits))

            self.loss = loss = rec_loss + latent_loss + 0.1 * cate_loss
            self.pretrain_loss = pretrain_loss = rec_loss

            all_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            sd_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                        scope='sd_attention')
            cluster_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                             scope='clusters')
            vae_vars = list(set(all_vars) - set(sd_vars) - set(cluster_vars))

            self.pretrain_op = tf.train.AdamOptimizer(
                args.learning_rate).minimize(pretrain_loss, var_list=vae_vars)
            self.train_op = tf.train.AdamOptimizer(
                args.learning_rate).minimize(loss, var_list=vae_vars)
            self.sd_train_op = tf.train.AdamOptimizer(
                args.learning_rate).minimize(sd_loss, var_list=sd_vars)

        saver = tf.train.Saver(tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES),
                               max_to_keep=100)
        self.save, self.restore = saver.save, saver.restore
  def run_update_step(self, session, step_number=None):
    """Returns the combine update tf OP."""
    logging.info('running run_update_step self._global_step is %s name is %s',
                 self._global_step, self.a_matrix_tfvar.op.name)
    # TODO(wanxin): Resolve tensor infetchable issue and update mask here.
    if step_number is None:
      if self._spec.run_update_interval_check != 0:
        logging.info(
            'running run_update_step step_num is null self.globalstep is %s',
            self._global_step)
        step_number = session.run(self._global_step)
        logging.info('running run_update_step step_num is %s', step_number)
      else:
        step_number = 1

    logging.info(
        'In compression op.run_update_step: '
        'step_number is %s, begin, end and update_count are: %s %s %s ',
        step_number, self._spec.begin_compression_step,
        self._spec.end_compression_step, self.run_update_count)
    if (step_number >= self._spec.begin_compression_step and
        (step_number < self._spec.end_compression_step or
         self._spec.end_compression_step == -1)):
      logging.info(
          'In compression op.run_update_step:'
          'step_number is %s, begin, end and update_count are: %s %s %s ',
          step_number, self._spec.begin_compression_step,
          self._spec.end_compression_step, self.run_update_count)
      self.run_update_count += 1
      logging.info('inside compression interval')

      # Need to persist these python state variables in TF as if a task gets
      # aborted things get out of sync.
      self._last_update_step = session.run(self._last_alpha_update_step)
      logging.info(
          'In compression op.run_update_step: '
          'step_number is %s, begin, end, update_count, last_alpha_update'
          ' are: %s %s %s %s', step_number, self._spec.begin_compression_step,
          self._spec.end_compression_step, self.run_update_count,
          self._last_update_step)
      if self._last_update_step == -1:
        logging.info(
            'In compression op.run_update_step: step_number is %s, '
            'begin, end, update_count are: %s %s %s ', step_number,
            self._spec.begin_compression_step, self._spec.end_compression_step,
            self.run_update_count)
        print('inside compression interval: initial decomposition step')
        a_matrix = session.run(self.a_matrix_tfvar)
        pruned_a_matrix = session.run(
            tf.multiply(self.a_matrix_tfvar, self.mask))
        logging.info(
            'In compression op.run_update_step: '
            'a_matrix.shape is %s norm is %d', a_matrix.shape,
            np.linalg.norm(a_matrix))
        if self.matrix_compressor.get_spec().is_c_matrix_present:
          logging.info(
              'In compression op.run_update_step: '
              'step_number is %s, begin, end and update_count are: %s %s %s ',
              step_number, self._spec.begin_compression_step,
              self._spec.end_compression_step, self.run_update_count)
          if getattr(self._spec, 'do_transpose', False):
            [b_matrix, c_matrix
             ] = self.matrix_compressor.static_matrix_compressor(
                 pruned_a_matrix.T)
          else:
            [b_matrix, c_matrix
            ] = self.matrix_compressor.static_matrix_compressor(pruned_a_matrix)
          session.run(tf.assign(self.b_matrix_tfvar, b_matrix))
          session.run(tf.assign(self.c_matrix_tfvar, c_matrix))
        else:
          [b_matrix
          ] = self.matrix_compressor.static_matrix_compressor(pruned_a_matrix)
          session.run(tf.assign(self.b_matrix_tfvar, b_matrix))
      logging.info(
          'In compression op.run_update_step: '
          'step_number is %s, begin, end and update_count are: %s %s %s ',
          step_number, self._spec.begin_compression_step,
          self._spec.end_compression_step, self.run_update_count)

      alpha = session.run(self.alpha)
      self.last_alpha_value = alpha
      if self.last_alpha_value > 0:
        make_a_zero = False
        new_alpha = max(alpha - self._spec.alpha_decrement_value, 0)
        if make_a_zero and new_alpha == 0:
          logging.info('Making a_matrix all zero for %s',
                       self.a_matrix_tfvar.op.name)
          a_matrix = np.zeros(shape=self.a_matrix_tfvar.shape)
          session.run(tf.assign(self.a_matrix_tfvar, a_matrix))
        logging.info('in run_update_step decrementing alpha, alpha value is %d',
                     self.last_alpha_value)

        logging.info(
            'running run_update_step self._global_step is %s new and old alpha are %d %d',
            self._global_step, alpha, new_alpha)
        session.run(tf.assign(self.alpha, new_alpha))
        self.last_alpha_value = new_alpha
        self._last_update_step = step_number
        session.run(tf.assign(self._last_alpha_update_step, step_number))
    logging.info(
        'In compression op.run_update_step: '
        'step_number is %s, begin, end  and update_count are: %s %s %s ',
        step_number, self._spec.begin_compression_step,
        self._spec.end_compression_step, self.run_update_count)
Example #15
0
 def _assign(self, ref, value):
     return tf.assign(ref, value, use_locking=self._use_locking)
Example #16
0
def train(agent, replay_buffer, dev_data, objective='mapo'):
  """Training Loop."""
  sgd_steps = 0
  train_env_dict = replay_buffer.env_dict
  train_sample_gen = SampleGenerator(
      replay_buffer,
      agent,
      objective=objective,
      explore=FLAGS.explore,
      n_samples=FLAGS.n_replay_samples,
      use_top_k_samples=FLAGS.use_top_k_samples,
      min_replay_weight=FLAGS.min_replay_weight)
  train_sample_generator = train_sample_gen.generate_samples(
      batch_size=len(train_env_dict), debug=FLAGS.is_debug)
  if FLAGS.meta_learn:
    dev_replay_buffer = dev_data
    dev_env_dict = dev_replay_buffer.env_dict
    dev_sample_gen = SampleGenerator(
        dev_replay_buffer,
        agent,
        objective=objective,
        explore=FLAGS.dev_explore)
    dev_sample_generator = dev_sample_gen.generate_samples(
        batch_size=len(dev_env_dict), debug=FLAGS.is_debug)
  else:
    dev_env_dict = dev_data

  ckpt_dir = osp.join(FLAGS.train_dir, 'model')
  if (tf.train.latest_checkpoint(ckpt_dir) is
      None) and FLAGS.pretrained_ckpt_dir:
    pretrained_ckpt_dir = osp.join(FLAGS.pretrained_ckpt_dir, 'best_model')
    # Store weights before loading the checkpoint
    if FLAGS.pretrained_load_data_only and FLAGS.meta_learn:
      pi_weights = agent.pi.get_weights()
    create_checkpoint_manager(
        agent,
        pretrained_ckpt_dir,
        restore=True,
        include_optimizer=False,
        meta_learn=False)
    # Reset the global step to 0
    tf.assign(agent.global_step, 0)
    if FLAGS.pretrained_load_data_only and FLAGS.meta_learn:
      dev_trajs = agent.sample_trajs(dev_env_dict.values(), greedy=True)
      dev_replay_buffer.save_trajs(dev_trajs)
      agent.pi.set_weights(pi_weights)
      tf.logging.info('Collected data using the pretrained checkpoint')

  ckpt_manager = create_checkpoint_manager(
      agent,
      ckpt_dir,
      restore=True,
      include_optimizer=True,
      meta_learn=FLAGS.meta_learn)
  best_ckpt_dir = osp.join(FLAGS.train_dir, 'best_model')
  best_ckpt_manager = create_checkpoint_manager(
      agent, best_ckpt_dir, restore=False, include_optimizer=False)
  # Log summaries for the accuracy results
  summary_writer = contrib_summary.create_file_writer(
      osp.join(FLAGS.train_dir, 'tb_log'), flush_millis=5000)
  max_val_acc = helpers.eval_agent(agent, dev_env_dict)

  with summary_writer.as_default(), \
    contrib_summary.always_record_summaries():
    while agent.global_step.numpy() < FLAGS.num_steps:
      if sgd_steps % FLAGS.save_every_n == 0:
        ckpt_manager.save()
        train_acc = helpers.eval_agent(agent, train_env_dict)
        val_acc = helpers.eval_agent(agent, dev_env_dict)
        contrib_summary.scalar('train_acc', train_acc)
        contrib_summary.scalar('validation_acc', val_acc)
        if val_acc > max_val_acc:
          max_val_acc = val_acc
          tf.logging.info('Best validation accuracy {}'.format(max_val_acc))
          best_ckpt_manager.save()

      # Sample environments and trajectories
      samples, contexts = next(train_sample_generator)
      if FLAGS.meta_learn:
        dev_samples, dev_contexts = next(dev_sample_generator)
        agent.update(samples, contexts, dev_samples, dev_contexts)
      else:
        # Update the policy
        agent.update(samples, contexts)
      # Update the random noise
      agent.update_eps(agent.global_step.numpy(), FLAGS.num_steps)
      sgd_steps += 1
bins = 128
npots = 200
validnth = 5
sinval = np.sin([[np.pi * i * j / bins for i in range(1, bins)]
                 for j in range(1, bins // 2)])
cosval = np.cos([[np.pi * i * j / bins for i in range(1, bins)]
                 for j in range(1, bins // 2)])
sqrt2 = np.sqrt(2)

defgrdstate = tf.constant(
    [sqrt2 * np.sin(i * np.pi / bins) for i in range(1, bins)])
psi = tf.Variable(defgrdstate)
zerotens = tf.zeros([1])
psil = tf.concat([psi[1:], zerotens], 0)
psir = tf.concat([zerotens, psi[:-1]], 0)
renorm = tf.assign(psi, tf.divide(psi,
                                  tf.sqrt(tf.reduce_mean(tf.square(psi)))))
optim = tf.train.GradientDescentOptimizer(0.0625 / bins)
reinit = tf.assign(psi, defgrdstate)
init = tf.global_variables_initializer()

potentials = []
valid_potentials = []
wave_functions = []
valid_functions = []

sess = tf.Session()
sess.run(init)
for i in range(npots):
    if i % 10 == 0:
        print(str((100. * i) / npots) + '% complete')
    for j in range(3):
Example #18
0
import time
import tensorflow.compat.v1 as tf

# Configuration of cluster

worker_hosts = ["9.134.80.230:9501", "9.134.189.246:9501"]
ps_hosts = ["9.134.189.246:9500"]
cluster = tf.train.ClusterSpec({"worker": worker_hosts, "ps": ps_hosts})

server = tf.train.Server(cluster, job_name='worker',
                         task_index=0)  #找到‘worker’名字下的,task0,也就是机器A
with tf.device(tf.train.replica_device_setter()):
    w = tf.get_variable('w', (1),
                        tf.float32,
                        initializer=tf.constant_initializer(2))
    add = tf.add(w, 1)
    update = tf.assign(w, add)

with tf.Session(server.target) as sess:
    sess.run(tf.global_variables_initializer())
    for _ in range(100):
        print("==============================")
        print(sess.run(w))
        print(sess.run(update))
        time.sleep(1)
Example #19
0
 def save_internal_states_ops(self, internal_states):
     if not self.hparams.concat_internal_states:
         return [[tf.no_op()]]
     ops = [[tf.assign(x, y)]
            for x, y in zip(self.internal_states[0], internal_states[0])]
     return ops
Example #20
0
 def save_internal_states_ops(self, internal_states):
     ops = [[tf.assign(x[0], y[0]),
             tf.assign(x[1], y[1])]
            for x, y in zip(self.internal_states, internal_states)]
     return ops
def train(train_data, test_data=None):
    G = train_data[0]
    features = train_data[1]
    id_map = train_data[2]

    if not features is None:
        # pad with dummy zero vector
        features = np.vstack([features, np.zeros((features.shape[1], ))])

    context_pairs = train_data[3] if FLAGS.random_context else None
    placeholders = construct_placeholders()
    minibatch = EdgeMinibatchIterator(G,
                                      id_map,
                                      placeholders,
                                      batch_size=FLAGS.batch_size,
                                      max_degree=FLAGS.max_degree,
                                      num_neg_samples=FLAGS.neg_sample_size,
                                      context_pairs=context_pairs)
    adj_info_ph = tf.placeholder(tf.int32, shape=minibatch.adj.shape)
    adj_info = tf.Variable(adj_info_ph, trainable=False, name="adj_info")

    if FLAGS.model == 'graphsage_mean':
        # Create model
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [
            SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
            SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)
        ]

        model = SampleAndAggregate(placeholders,
                                   features,
                                   adj_info,
                                   minibatch.deg,
                                   layer_infos=layer_infos,
                                   model_size=FLAGS.model_size,
                                   identity_dim=FLAGS.identity_dim,
                                   logging=True)
    elif FLAGS.model == 'gcn':
        # Create model
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [
            SAGEInfo("node", sampler, FLAGS.samples_1, 2 * FLAGS.dim_1),
            SAGEInfo("node", sampler, FLAGS.samples_2, 2 * FLAGS.dim_2)
        ]

        model = SampleAndAggregate(placeholders,
                                   features,
                                   adj_info,
                                   minibatch.deg,
                                   layer_infos=layer_infos,
                                   aggregator_type="gcn",
                                   model_size=FLAGS.model_size,
                                   identity_dim=FLAGS.identity_dim,
                                   concat=False,
                                   logging=True)

    elif FLAGS.model == 'graphsage_seq':
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [
            SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
            SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)
        ]

        model = SampleAndAggregate(placeholders,
                                   features,
                                   adj_info,
                                   minibatch.deg,
                                   layer_infos=layer_infos,
                                   identity_dim=FLAGS.identity_dim,
                                   aggregator_type="seq",
                                   model_size=FLAGS.model_size,
                                   logging=True)

    elif FLAGS.model == 'graphsage_maxpool':
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [
            SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
            SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)
        ]

        model = SampleAndAggregate(placeholders,
                                   features,
                                   adj_info,
                                   minibatch.deg,
                                   layer_infos=layer_infos,
                                   aggregator_type="maxpool",
                                   model_size=FLAGS.model_size,
                                   identity_dim=FLAGS.identity_dim,
                                   logging=True)
    elif FLAGS.model == 'graphsage_meanpool':
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [
            SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
            SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)
        ]

        model = SampleAndAggregate(placeholders,
                                   features,
                                   adj_info,
                                   minibatch.deg,
                                   layer_infos=layer_infos,
                                   aggregator_type="meanpool",
                                   model_size=FLAGS.model_size,
                                   identity_dim=FLAGS.identity_dim,
                                   logging=True)

    elif FLAGS.model == 'n2v':
        model = Node2VecModel(
            placeholders,
            features.shape[0],
            minibatch.deg,
            #2x because graphsage uses concat
            nodevec_dim=2 * FLAGS.dim_1,
            lr=FLAGS.learning_rate)
    else:
        raise Exception('Error: model name unrecognized.')

    config = tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)
    config.gpu_options.allow_growth = True
    #config.gpu_options.per_process_gpu_memory_fraction = GPU_MEM_FRACTION
    config.allow_soft_placement = True

    # Initialize session
    sess = tf.Session(config=config)
    merged = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter(log_dir(), sess.graph)

    # Init variables
    sess.run(tf.global_variables_initializer(),
             feed_dict={adj_info_ph: minibatch.adj})

    # Train model

    train_shadow_mrr = None
    shadow_mrr = None

    total_steps = 0
    avg_time = 0.0
    epoch_val_costs = []

    train_adj_info = tf.assign(adj_info, minibatch.adj)
    val_adj_info = tf.assign(adj_info, minibatch.test_adj)
    for epoch in range(FLAGS.epochs):
        minibatch.shuffle()

        iter = 0
        print('Epoch: %04d' % (epoch + 1))
        epoch_val_costs.append(0)
        while not minibatch.end():
            # Construct feed dictionary
            feed_dict = minibatch.next_minibatch_feed_dict()
            feed_dict.update({placeholders['dropout']: FLAGS.dropout})

            t = time.time()
            # Training step
            outs = sess.run([
                merged, model.opt_op, model.loss, model.ranks, model.aff_all,
                model.mrr, model.outputs1
            ],
                            feed_dict=feed_dict)
            train_cost = outs[2]
            train_mrr = outs[5]
            if train_shadow_mrr is None:
                train_shadow_mrr = train_mrr  #
            else:
                train_shadow_mrr -= (1 - 0.99) * (train_shadow_mrr - train_mrr)

            if iter % FLAGS.validate_iter == 0:
                # Validation
                sess.run(val_adj_info.op)
                val_cost, ranks, val_mrr, duration = evaluate(
                    sess, model, minibatch, size=FLAGS.validate_batch_size)
                sess.run(train_adj_info.op)
                epoch_val_costs[-1] += val_cost
            if shadow_mrr is None:
                shadow_mrr = val_mrr
            else:
                shadow_mrr -= (1 - 0.99) * (shadow_mrr - val_mrr)

            if total_steps % FLAGS.print_every == 0:
                summary_writer.add_summary(outs[0], total_steps)

            # Print results
            avg_time = (avg_time * total_steps + time.time() -
                        t) / (total_steps + 1)

            if total_steps % FLAGS.print_every == 0:
                print(
                    "Iter:",
                    '%04d' % iter,
                    "train_loss=",
                    "{:.5f}".format(train_cost),
                    "train_mrr=",
                    "{:.5f}".format(train_mrr),
                    "train_mrr_ema=",
                    "{:.5f}".format(
                        train_shadow_mrr),  # exponential moving average
                    "val_loss=",
                    "{:.5f}".format(val_cost),
                    "val_mrr=",
                    "{:.5f}".format(val_mrr),
                    "val_mrr_ema=",
                    "{:.5f}".format(shadow_mrr),  # exponential moving average
                    "time=",
                    "{:.5f}".format(avg_time))

            iter += 1
            total_steps += 1

            if total_steps > FLAGS.max_total_steps:
                break

        if total_steps > FLAGS.max_total_steps:
            break

    print("Optimization Finished!")
    if FLAGS.save_embeddings:
        sess.run(val_adj_info.op)

        save_val_embeddings(sess, model, minibatch, FLAGS.validate_batch_size,
                            log_dir())

        if FLAGS.model == "n2v":
            # stopping the gradient for the already trained nodes
            train_ids = tf.constant(
                [[id_map[n]] for n in G.nodes_iter()
                 if not G.node[n]['val'] and not G.node[n]['test']],
                dtype=tf.int32)
            test_ids = tf.constant([[id_map[n]] for n in G.nodes_iter()
                                    if G.node[n]['val'] or G.node[n]['test']],
                                   dtype=tf.int32)
            update_nodes = tf.nn.embedding_lookup(model.context_embeds,
                                                  tf.squeeze(test_ids))
            no_update_nodes = tf.nn.embedding_lookup(model.context_embeds,
                                                     tf.squeeze(train_ids))
            update_nodes = tf.scatter_nd(test_ids, update_nodes,
                                         tf.shape(model.context_embeds))
            no_update_nodes = tf.stop_gradient(
                tf.scatter_nd(train_ids, no_update_nodes,
                              tf.shape(model.context_embeds)))
            model.context_embeds = update_nodes + no_update_nodes
            sess.run(model.context_embeds)

            # run random walks
            from graphsage.utils import run_random_walks
            nodes = [
                n for n in G.nodes_iter()
                if G.node[n]["val"] or G.node[n]["test"]
            ]
            start_time = time.time()
            pairs = run_random_walks(G, nodes, num_walks=50)
            walk_time = time.time() - start_time

            test_minibatch = EdgeMinibatchIterator(
                G,
                id_map,
                placeholders,
                batch_size=FLAGS.batch_size,
                max_degree=FLAGS.max_degree,
                num_neg_samples=FLAGS.neg_sample_size,
                context_pairs=pairs,
                n2v_retrain=True,
                fixed_n2v=True)

            start_time = time.time()
            print("Doing test training for n2v.")
            test_steps = 0
            for epoch in range(FLAGS.n2v_test_epochs):
                test_minibatch.shuffle()
                while not test_minibatch.end():
                    feed_dict = test_minibatch.next_minibatch_feed_dict()
                    feed_dict.update({placeholders['dropout']: FLAGS.dropout})
                    outs = sess.run([
                        model.opt_op, model.loss, model.ranks, model.aff_all,
                        model.mrr, model.outputs1
                    ],
                                    feed_dict=feed_dict)
                    if test_steps % FLAGS.print_every == 0:
                        print("Iter:", '%04d' % test_steps, "train_loss=",
                              "{:.5f}".format(outs[1]), "train_mrr=",
                              "{:.5f}".format(outs[-2]))
                    test_steps += 1
            train_time = time.time() - start_time
            save_val_embeddings(sess,
                                model,
                                minibatch,
                                FLAGS.validate_batch_size,
                                log_dir(),
                                mod="-test")
            print("Total time: ", train_time + walk_time)
            print("Walk time: ", walk_time)
            print("Train time: ", train_time)
Example #22
0
    #     subdircount += 1

    tfprvs = tf.placeholder(tf.float32, shape=[4, 256, 448, 3], name="first_frame")
    tfnext = tf.placeholder(tf.float32, shape=[4, 256, 448, 3], name="second_frame")

    l_r = tf.placeholder(tf.float32, shape=[], name='learning_rate')
    lamda = tf.placeholder(tf.int16, shape=[], name="train_lambda")

    recon, mse, bpp = net(tfprvs, tfnext)
    train_loss = tf.cast(lamda, tf.float32) * mse + bpp
    train = tf.train.AdamOptimizer(learning_rate=l_r).minimize(train_loss)
    aux_step1 = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(net.ofcomp.entropy_bottleneck.losses[0])
    aux_step2 = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(net.rescomp.entropy_bottleneck.losses[0])

    tfvideo_batch = tf.get_variable("tfvideo_batch", initializer=tf.constant(0))
    increment_video_batch = tf.assign(tfvideo_batch, tfvideo_batch + 1)
    directory = tf.get_variable("directory", initializer=tf.constant(1))

    increment_directory = tf.assign(directory, directory + 1)
    init_video_batch_updater = tf.assign(tfvideo_batch, 0)
    init_directory_updater = tf.assign(directory, 1)

    init = tf.global_variables_initializer()

    saver = tf.train.Saver()

    starting = args.restore

    with tf.Session() as sess:
        sess.run(init)
        if starting:
    def simulate(self, action):
        with tf.name_scope("environment/simulate"):
            actions = tf.concat([tf.expand_dims(action, axis=1)] *
                                self._num_frames,
                                axis=1)
            history = self.history_buffer.get_all_elements()
            with tf.variable_scope(tf.get_variable_scope(),
                                   reuse=tf.AUTO_REUSE):
                # We only need 1 target frame here, set it.
                hparams_target_frames = self._model.hparams.video_num_target_frames
                self._model.hparams.video_num_target_frames = 1
                model_output = self._model.infer({
                    "inputs":
                    history,
                    "input_action":
                    actions,
                    "reset_internal_states":
                    self._reset_model.read_value()
                })
                self._model.hparams.video_num_target_frames = hparams_target_frames

            observ = tf.cast(tf.squeeze(model_output["targets"], axis=1),
                             self.observ_dtype)

            reward = tf.to_float(model_output["target_reward"])
            reward = tf.reshape(reward,
                                shape=(self.batch_size, )) + self._min_reward

            if self._intrinsic_reward_scale:
                # Use the model's uncertainty about its prediction as an intrinsic
                # reward. The uncertainty is measured by the log probability of the
                # predicted pixel value.
                if "targets_logits" not in model_output:
                    raise ValueError(
                        "The use of intrinsic rewards requires access to "
                        "the logits. Ensure that model.infer returns "
                        "'targets_logits'")
                uncertainty_reward = compute_uncertainty_reward(
                    model_output["targets_logits"], model_output["targets"])
                uncertainty_reward = tf.minimum(
                    1., self._intrinsic_reward_scale * uncertainty_reward)
                uncertainty_reward = tf.Print(uncertainty_reward,
                                              [uncertainty_reward],
                                              message="uncertainty_reward",
                                              first_n=1,
                                              summarize=8)
                reward += uncertainty_reward

            done = tf.constant(False, tf.bool, shape=(self.batch_size, ))

            with tf.control_dependencies([observ]):
                dump_frame_op = tf.cond(
                    self._video_condition,
                    lambda: tf.py_func(
                        self._video_dump_frame,  # pylint: disable=g-long-lambda
                        [observ, reward],
                        []),
                    tf.no_op)
                with tf.control_dependencies([
                        self._observ.assign(observ),
                        self.history_buffer.move_by_one_element(observ),
                        dump_frame_op
                ]):
                    clear_reset_model_op = tf.assign(self._reset_model,
                                                     tf.constant(0.0))
                    with tf.control_dependencies([clear_reset_model_op]):
                        return tf.identity(reward), tf.identity(done)
Example #24
0
 def set_state(self, state):
     return [
         tf.assign(self.mean_variable, state[0]),
         tf.assign(self.log_var, state[1])
     ]
Example #25
0
    def _build_params(self):
        """Create and count model parameters."""
        print('-' * 80)
        print('Building model params')
        with tf.variable_scope(self.name):
            with tf.variable_scope('embedding'):
                initializer = tf.initializers.random_uniform(
                    -self.params.init_range, self.params.init_range)
                w_emb = tf.get_variable(
                    'w', [self.params.vocab_size, self.params.emb_size],
                    initializer=initializer)
                dropped_w_emb = tf.layers.dropout(w_emb,
                                                  self.params.drop_e,
                                                  [self.params.vocab_size, 1],
                                                  training=True)

            w_lstm = []
            dropped_w_lstm = []
            with tf.variable_scope('lstm'):
                for i in range(self.params.num_layers):
                    inp_size = self.params.emb_size if i == 0 else self.params.hidden_size
                    hid_size = (self.params.emb_size
                                if i == self.params.num_layers -
                                1 else self.params.hidden_size)
                    init_range = 1.0 / np.sqrt(hid_size)
                    initializer = tf.initializers.random_uniform(
                        -init_range, init_range)
                    with tf.variable_scope('layer_{0}'.format(i)):
                        w = tf.get_variable(
                            'w', [inp_size + hid_size, 4 * hid_size],
                            initializer=initializer)
                        i_mask = tf.ones([inp_size, 4 * hid_size],
                                         dtype=tf.float32)
                        h_mask = _gen_mask([hid_size, 4 * hid_size],
                                           self.params.drop_w)
                        mask = tf.concat([i_mask, h_mask], axis=0)
                        dropped_w = w * mask
                        w_lstm.append(w)
                        dropped_w_lstm.append(dropped_w)

            with tf.variable_scope('init_states'):
                batch_prev_c, batch_prev_h, batch_reset = [], [], []
                test_prev_c, test_prev_h, test_reset = [], [], []
                for i in range(self.params.num_layers):
                    inp_size = self.params.emb_size if i == 0 else self.params.hidden_size
                    hid_size = (self.params.emb_size
                                if i == self.params.num_layers -
                                1 else self.params.hidden_size)

                    with tf.variable_scope('layer_{0}'.format(i)):
                        with tf.variable_scope('batch'):
                            init_shape = [self.params.batch_size, hid_size]
                            batch_prev_c.append(
                                tf.get_variable('c',
                                                init_shape,
                                                dtype=tf.float32,
                                                trainable=False))
                            batch_prev_h.append(
                                tf.get_variable('h',
                                                init_shape,
                                                dtype=tf.float32,
                                                trainable=False))
                            zeros = np.zeros(init_shape, dtype=np.float32)
                            batch_reset.append(
                                tf.assign(batch_prev_c[-1], zeros))
                            batch_reset.append(
                                tf.assign(batch_prev_h[-1], zeros))
                        with tf.variable_scope('test'):
                            init_shape = [1, hid_size]
                            test_prev_c.append(
                                tf.get_variable('c',
                                                init_shape,
                                                dtype=tf.float32,
                                                trainable=False))
                            test_prev_h.append(
                                tf.get_variable('h',
                                                init_shape,
                                                dtype=tf.float32,
                                                trainable=False))
                            zeros = np.zeros(init_shape, dtype=np.float32)
                            test_reset.append(tf.assign(
                                test_prev_c[-1], zeros))
                            test_reset.append(tf.assign(
                                test_prev_h[-1], zeros))

        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        print('Model has {0} params'.format(num_params))

        self.batch_init_states = {
            'c': batch_prev_c,
            'h': batch_prev_h,
            'reset': batch_reset,
        }
        self.train_params = {
            'w_emb': dropped_w_emb,
            'w_lstm': dropped_w_lstm,
            'w_soft': w_emb,
        }
        self.test_init_states = {
            'c': test_prev_c,
            'h': test_prev_h,
            'reset': test_reset,
        }
        self.eval_params = {
            'w_emb': w_emb,
            'w_lstm': w_lstm,
            'w_soft': w_emb,
        }
Example #26
0
 def set_state(self, state):
     ops = list(FactorisedPosterior.set_state(self, state[:-1]))
     ops += [tf.assign(self.off_diag_vars_base, state[-1], validate_shape=False)]
     return ops
Example #27
0
    def _create_average_ops(self):
        """Build moving average ops."""
        print('Creating moving average ops')

        with tf.variable_scope('moving_avg_flag'):
            self.moving_avg_started = tf.get_variable(
                'flag', [],
                tf.int32,
                initializer=tf.initializers.zeros(),
                trainable=False)
            self.start_moving_avg_op = tf.assign(self.moving_avg_started, 1)

        all_vars = tf.trainable_variables()
        average_pairs = []
        var_cnt = 0
        with tf.variable_scope('average'):
            for v in all_vars:
                avg_v = tf.get_variable(str(var_cnt),
                                        shape=v.shape,
                                        dtype=v.dtype,
                                        initializer=tf.zeros_initializer,
                                        trainable=False)
                var_cnt += 1
                average_pairs.append([v, avg_v])
        backup_pairs = []
        var_cnt = 0
        with tf.variable_scope('backup'):
            for v in all_vars:
                backup_v = tf.get_variable(str(var_cnt),
                                           shape=v.shape,
                                           dtype=v.dtype,
                                           trainable=False)
                var_cnt += 1
                backup_pairs.append([v, backup_v])

        with tf.variable_scope('avg_step'):
            avg_step = tf.get_variable('step', [],
                                       dtype=tf.float32,
                                       trainable=False)

        with tf.control_dependencies([tf.assign_add(avg_step, 1.0)]):
            average_op = []
            for v, avg_v in average_pairs:
                mu = 1 / avg_step
                new_avg = mu * v + (1 - mu) * avg_v
                with tf.control_dependencies([new_avg]):
                    average_op.append(tf.assign(avg_v, new_avg))

        assert len(average_pairs) == len(all_vars)
        assert len(average_pairs) == len(backup_pairs)
        use_average_op = []
        for i in range(len(average_pairs)):
            v, avg_v = average_pairs[i]
            _, backup_v = backup_pairs[i]
            with tf.control_dependencies([tf.assign(backup_v, v)]):
                use_average_op.append(tf.assign(v, avg_v))
        use_average_op = tf.group(*use_average_op)

        reverse_average_op = []
        for v, backup_v in backup_pairs:
            reverse_average_op.append(tf.assign(v, backup_v))
        reverse_average_op = tf.group(*reverse_average_op)

        return average_op, use_average_op, reverse_average_op
    def __init__(
        self,
        n_actions,
        n_features,  #observation/state 的属性,如长宽高
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=300,
        memory_size=500,
        double=True,
        batch_size=32,
        e_greedy_increment=None,
        prioritized=True,
        output_graph=False,
    ):
        self.n_actions = n_actions
        self.n_features = n_features  #observation/state 的属性,如长宽高
        self.lr = learning_rate
        self.gamma = reward_decay
        self.epsilon_max = e_greedy  # epsilon 的最大值
        self.replace_target_iter = replace_target_iter  # 更换 target_net 的步数
        self.memory_size = memory_size  # 记忆上限
        self.batch_size = batch_size  # 每次更新时从 memory 里面取多少记忆出来
        self.epsilon_increment = e_greedy_increment  # epsilon 的增量
        self.epsilon = 0 if e_greedy_increment is not None else self.epsilon_max
        # 是否开启探索模式, 并逐步减少探索次数,e_greedy_increment=None-->self.epsilon = 0,
        # e_greedy_increment!=None-->self.epsilon=self.epsilon_max
        # TODO(xhx):探索模式后续如何启动?

        self.double = double
        self.prioritized = prioritized

        # 记录学习次数 (用于判断是否更换 target_net 参数)
        self.learn_step_counter = 0

        #############################prioritized####################################################
        if self.prioritized:
            self.memory = Memory(capacity=memory_size)
        else:
            self.memory = np.zeros(
                (self.memory_size,
                 n_features * 2 + 2))  # 初始化全 0 记忆 [s, a, r, s_]
        #############################prioritized####################################################

        # size = s特征数+s_特征数+a(0/1/2/3)+r
        # self.memory = np.zeros((self.memory_size, n_features * 2 + 2))  # 和视频中不同, 因为 pandas 运算比较慢, 这里改为直接用 numpy

        # 创建 [target_net, evaluate_net]
        self._build_net()

        # 替换 target net 的参数
        # TODO(xhx):替换参数这四行代码没看懂
        #在build_net中,各自的w1,b1,w2,b2都放进collection 'target_net_params' 和'eval_net_params'
        t_params = tf.get_collection('target_net_params')  # 提取 target_net 的参数
        e_params = tf.get_collection('eval_net_params')  # 提取  eval_net 的参数
        self.replace_target_op = [
            tf.assign(t, e) for t, e in zip(t_params, e_params)
        ]  # 更新 target_net 参数

        self.sess = tf.Session()

        # 输出 tensorboard 文件
        if output_graph:
            # $ tensorboard --logdir=logs
            tf.summary.FileWriter("logs/", self.sess.graph)

        self.sess.run(tf.global_variables_initializer())
        self.cost_his = []  # 记录所有 cost 变化, 用于最后 plot 出来观看
Example #29
0
def learn(
        env,
        model_path,
        data_path,
        policy_fn,
        *,
        horizon=150,  # timesteps per actor per update
        rolloutSize=50,
        clip_param=0.2,
        entcoeff=0.02,  # clipping parameter epsilon, entropy coeff
        optim_epochs=10,
        optim_stepsize=3e-4,
        optim_batchsize=32,  # optimization hypers
        gamma=0.99,
        lam=0.95,  # advantage estimation
        max_iters=0,  # time constraint
        adam_epsilon=1e-4,
        schedule='constant',  # annealing for stepsize parameters (epsilon and adam)
        retrain=False):

    # Setup losses and policy
    ob_space = env.observation_space
    ac_space = env.action_space
    pi = policy_fn("pi", ob_space,
                   ac_space)  # Construct network for new policy
    oldpi = policy_fn("oldpi", ob_space, ac_space)  # Network for old policy
    atarg = tf.placeholder(
        dtype=tf.float32,
        shape=[None])  # Target advantage function (if applicable)
    ret = tf.placeholder(dtype=tf.float32, shape=[None])  # Empirical return
    lrmult = tf.placeholder(
        name='lrmult', dtype=tf.float32,
        shape=[])  # learning rate multiplier, updated with schedule

    ob = U.get_placeholder_cached(name="ob")
    ac = pi.pdtype.sample_placeholder([None])

    kloldnew = oldpi.pd.kl(pi.pd)
    ent = pi.pd.entropy()
    meankl = tf.reduce_mean(kloldnew)
    meanent = tf.reduce_mean(ent)
    pol_entpen = (-entcoeff) * meanent

    ratio = tf.exp(pi.pd.logp(ac) - oldpi.pd.logp(ac))  # pnew / pold
    surr1 = ratio * atarg  # surrogate from conservative policy iteration
    surr2 = tf.clip_by_value(ratio, 1.0 - clip_param,
                             1.0 + clip_param) * atarg  #
    pol_surr = -tf.reduce_mean(tf.minimum(
        surr1, surr2))  # PPO's pessimistic surrogate (L^CLIP)
    vf_loss = tf.reduce_mean(tf.square(pi.vpred - ret))
    total_loss = pol_surr + pol_entpen + vf_loss
    losses = [pol_surr, pol_entpen, vf_loss, meankl, meanent]
    loss_names = ["pol_surr", "pol_entpen", "vf_loss", "kl", "ent"]

    var_list = pi.get_trainable_variables()
    lossandgrad = U.function([ob, ac, atarg, ret, lrmult],
                             losses + [U.flatgrad(total_loss, var_list)])
    adam = MpiAdam(var_list, epsilon=adam_epsilon)

    assign_old_eq_new = U.function(
        [], [],
        updates=[
            tf.assign(oldv, newv)
            for (oldv,
                 newv) in zipsame(oldpi.get_variables(), pi.get_variables())
        ])
    compute_losses = U.function([ob, ac, atarg, ret, lrmult], losses)

    U.initialize()
    adam.sync()

    # Prepare for rollouts
    episodes_so_far = 0
    timesteps_so_far = 0
    iters_so_far = 0
    tstart = time.time()
    lenbuffer = deque(maxlen=5)  # rolling buffer for episode lengths
    rewbuffer = deque(maxlen=5)  # rolling buffer for episode rewards

    p = []  # for saving the rollouts

    if retrain == True:
        print("Retraining the policy from saved path")
        time.sleep(2)
        U.load_state(model_path)
    max_timesteps = int(horizon * rolloutSize * max_iters)

    while True:
        if max_iters and iters_so_far >= max_iters:
            break
        if schedule == 'constant':
            cur_lrmult = 1.0
        elif schedule == 'linear':
            cur_lrmult = max(1.0 - float(timesteps_so_far) / max_timesteps, 0)
        else:
            raise NotImplementedError

        logger.log("********** Iteration %i ************" % iters_so_far)
        print("Collecting samples for policy optimization !! ")
        if iters_so_far > 70:
            render = True
        else:
            render = False
        rollouts = sample_trajectory(pi,
                                     env,
                                     horizon=horizon,
                                     rolloutSize=rolloutSize,
                                     stochastic=True,
                                     render=render)
        # Save rollouts
        data = {'rollouts': rollouts}
        p.append(data)
        del data
        data_file_name = data_path + 'rollout_data.pkl'
        pickle.dump(p, open(data_file_name, "wb"))

        add_vtarg_and_adv(rollouts, gamma, lam)

        ob, ac, atarg, tdlamret = rollouts["ob"], rollouts["ac"], rollouts[
            "adv"], rollouts["tdlamret"]
        atarg = (atarg - atarg.mean()
                 ) / atarg.std()  # standardized advantage function estimate
        d = Dataset(dict(ob=ob, ac=ac, atarg=atarg, vtarg=tdlamret),
                    deterministic=pi.recurrent)
        optim_batchsize = optim_batchsize or ob.shape[0]

        if hasattr(pi, "ob_rms"):
            pi.ob_rms.update(ob)  # update running mean/std for policy

        assign_old_eq_new()  # set old parameter values to new parameter values
        logger.log("Optimizing...")
        # Here we do a bunch of optimization epochs over the data
        for _ in range(optim_epochs):
            losses = [
            ]  # list of tuples, each of which gives the loss for a minibatch
            for batch in d.iterate_once(optim_batchsize):
                *newlosses, g = lossandgrad(batch["ob"], batch["ac"],
                                            batch["atarg"], batch["vtarg"],
                                            cur_lrmult)
                adam.update(g, optim_stepsize * cur_lrmult)
                losses.append(newlosses)

        lrlocal = (rollouts["ep_lens"], rollouts["ep_rets"])  # local values
        listoflrpairs = MPI.COMM_WORLD.allgather(lrlocal)  # list of tuples
        lens, rews = map(flatten_lists, zip(*listoflrpairs))
        lenbuffer.extend(lens)
        rewbuffer.extend(rews)
        logger.record_tabular("Success", rollouts["success"])
        logger.record_tabular("EpLenMean", np.mean(lenbuffer))
        logger.record_tabular("EpRewMean", np.mean(rewbuffer))
        logger.record_tabular("EpThisIter", len(lens))
        episodes_so_far += len(lens)
        timesteps_so_far += sum(lens)
        iters_so_far += 1
        logger.record_tabular("EpisodesSoFar", episodes_so_far)
        logger.record_tabular("TimestepsSoFar", timesteps_so_far)
        logger.record_tabular("TimeElapsed", time.time() - tstart)
        if MPI.COMM_WORLD.Get_rank() == 0:
            logger.dump_tabular()

    return pi
Example #30
0
 def reset_cell_states(self):
     for cell_group_key in self.cell_groups:
         for state_key in self.cell_groups[cell_group_key].states:
             state = self.cell_groups[cell_group_key].states[state_key]
             self.session.run(tf.assign(state, tf.zeros_like(state)))