def testSecondOrderGradientCalculation(self): param_list = [ "prune_option=second_order_gradient", "gradient_decay_rate=0.5", ] test_spec = ",".join(param_list) pruning_hparams = pruning.get_pruning_hparams().parse(test_spec) tf.logging.info(pruning_hparams) w = tf.Variable(tf.linspace(1.0, 10.0, 10), name="weights") _ = pruning.apply_mask(w, prune_option="second_order_gradient") p = pruning.Pruning(pruning_hparams) old_weight_update_op = p.old_weight_update_op() old_old_weight_update_op = p.old_old_weight_update_op() gradient_update_op = p.gradient_update_op() with self.cached_session() as session: tf.global_variables_initializer().run() session.run(old_weight_update_op) session.run(old_old_weight_update_op) session.run(tf.assign(w, tf.math.scalar_mul(2.0, w))) session.run(gradient_update_op) old_weights = pruning.get_old_weights() old_old_weights = pruning.get_old_old_weights() gradients = pruning.get_gradients() old_weight = old_weights[0] old_old_weight = old_old_weights[0] gradient = gradients[0] self.assertAllEqual( gradient.eval(), tf.math.scalar_mul( 0.5, tf.nn.l2_normalize(tf.linspace(1.0, 10.0, 10))).eval()) self.assertAllEqual(old_weight.eval(), old_old_weight.eval())
def get_checkpoint_init_fn(): """Returns the checkpoint init_fn if the checkpoint is provided.""" if FLAGS.fine_tune_checkpoint: variables_to_restore = slim.get_variables_to_restore() global_step_reset = tf.assign(tf.train.get_or_create_global_step(), 0) # When restoring from a floating point model, the min/max values for # quantized weights and activations are not present. # We instruct slim to ignore variables that are missing during restoration # by setting ignore_missing_vars=True slim_init_fn = slim.assign_from_checkpoint_fn( FLAGS.fine_tune_checkpoint, variables_to_restore, ignore_missing_vars=True) def init_fn(sess): slim_init_fn(sess) # If we are restoring from a floating point model, we need to initialize # the global step to zero for the exponential decay to result in # reasonable learning rates. sess.run(global_step_reset) return init_fn else: return None
def _setup_graph(self, n_inp, n_out, drop_frac, start_iter=1, end_iter=4, freq_iter=2): """Setups a trivial training procedure for sparse training.""" tf.reset_default_graph() optim = tf.train.GradientDescentOptimizer(0.1) sparse_optim = sparse_optimizers.SparseSETOptimizer( optim, start_iter, end_iter, freq_iter, drop_fraction=drop_frac) x = tf.random.uniform((1, n_inp)) y = layers.masked_fully_connected(x, n_out, activation_fn=None) global_step = tf.train.get_or_create_global_step() weight = pruning.get_weights()[0] # There is one masked layer to be trained. mask = pruning.get_masks()[0] # Around half of the values of the mask is set to zero with `mask_update`. mask_update = tf.assign( mask, tf.constant(np.random.choice([0, 1], size=(n_inp, n_out), p=[1. / 2, 1. / 2]), dtype=tf.float32)) loss = tf.reduce_mean(y) global_step = tf.train.get_or_create_global_step() train_op = sparse_optim.minimize(loss, global_step) # Init sess = tf.Session() init = tf.global_variables_initializer() sess.run(init) sess.run([mask_update]) return sess, train_op, mask, weight, global_step
def gen_train_op(cost, params, step, iters, flags): """Build the generator train op.""" if flags.lr_decay == 'linear': step_lr = (1. - (tf.cast(step, tf.float32) / iters)) elif flags.lr_decay == 'quadratic': step_lr = ((1. - (tf.cast(step, tf.float32) / iters))**2) elif flags.lr_decay == 'none': step_lr = 1. train_op = tf.train.AdamOptimizer(step_lr * flags.lr_g, flags.beta1, flags.beta2).minimize( cost, var_list=params, colocate_gradients_with_ops=True) if flags.weight_decay_g is not None: decay = (step_lr * flags.weight_decay_g) with tf.control_dependencies([train_op]): weights = [p for p in params if 'weights' in p.name] decayed = [w - (decay * w) for w in weights] decay_op = tf.group( *[tf.assign(w, d) for w, d in zip(weights, decayed)]) train_op = decay_op return train_op
def testAppendGradientsWithLossScaleWithtNan(self): v = tf.Variable(0) training_ops = [] get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)] loss_scale_params = variable_mgr_util.AutoLossScaleParams( enable_auto_loss_scale=True, loss_scale=tf.Variable(4, dtype=tf.float32), loss_scale_normal_steps=tf.Variable(10), inc_loss_scale_every_n=10, is_chief=True) variable_mgr_util.append_gradients_with_loss_scale( training_ops, get_apply_gradients_ops_func, loss_scale_params, grad_has_inf_nan=tf.constant(True)) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) sess.run(training_ops) self.assertEqual(sess.run(v), 0) # Skip updating for v. # halve loss_scale and reset local_scale_normal_steps. self.assertEqual(sess.run(loss_scale_params.loss_scale), 2) self.assertEqual( sess.run(loss_scale_params.loss_scale_normal_steps), 0)
def vq_discrete_bottleneck(x, hparams): """Simple vector quantized discrete bottleneck.""" tf.logging.info("Using EMA with beta = {}".format(hparams.beta)) bottleneck_size = 2**hparams.bottleneck_bits x_shape = common_layers.shape_list(x) x = tf.reshape(x, [-1, hparams.hidden_size]) x_means_hot, e_loss = vq_nearest_neighbor( x, hparams) means, ema_means, ema_count = (hparams.means, hparams.ema_means, hparams.ema_count) # Update the ema variables updated_ema_count = moving_averages.assign_moving_average( ema_count, tf.reduce_sum(x_means_hot, axis=0), hparams.decay, zero_debias=False) dw = tf.matmul(x_means_hot, x, transpose_a=True) updated_ema_means = moving_averages.assign_moving_average( ema_means, dw, hparams.decay, zero_debias=False) n = tf.reduce_sum(updated_ema_count, axis=-1, keepdims=True) updated_ema_count = ( (updated_ema_count + hparams.epsilon) / (n + bottleneck_size * hparams.epsilon) * n) # pylint: disable=g-no-augmented-assignment updated_ema_means = updated_ema_means / tf.expand_dims( updated_ema_count, axis=-1) # pylint: enable=g-no-augmented-assignment with tf.control_dependencies([e_loss]): update_means = tf.assign(means, updated_ema_means) with tf.control_dependencies([update_means]): loss = hparams.beta * e_loss discrete = tf.reshape(x_means_hot, x_shape[:-1] + [bottleneck_size]) return discrete, loss
def set_vars(var_to_value_dict: dict) -> None: """Set the values of given tf.Variables. Equivalent to the following, but more efficient and does not bloat the tf graph: tflib.run([tf.assign(var, value) for var, value in var_to_value_dict.items()] """ assert_tf_initialized() ops = [] feed_dict = {} for var, value in var_to_value_dict.items(): assert is_tf_expression(var) try: setter = tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/setter:0")) # look for existing op except KeyError: with absolute_name_scope(var.name.split(":")[0]): with tf.control_dependencies(None): # ignore surrounding control_dependencies setter = tf.assign(var, tf.placeholder(var.dtype, var.shape, "new_value"), name="setter") # create new setter ops.append(setter) feed_dict[setter.op.inputs[1]] = value run(ops, feed_dict)
def _prepare_solve(self, rs, ps): ops = [] if self._conf['zero_guess']: for r, b, p in zip(rs, self._bs, ps): # Set initial residual. ops.append(tf.assign(r, b)) # Set initial search direction. ops.append(tf.assign(p, b)) else: for r, b, p, Az in zip(rs, self._bs, ps, self._Azs): # Set initial residual. ops.append(tf.assign(r, b - Az)) # Set initial search direction. ops.append(tf.assign(p, b - Az)) ops.append(tf.assign(self._rTr, tf.zeros(shape=[], dtype=rs[0].dtype))) ops.append(tf.assign(self._indefinite, False)) return tf.group(ops)
# _*_ coding utf-8 _*_ # Author:94342 # Time: 2020/9/1717:09 # File: New03.py # Engine:PyCharm import tensorflow.compat.v1 as tf if __name__ == '__main__': tf.compat.v1.disable_eager_execution() v1 = tf.Variable(0, name='counter') one = tf.constant(1) temp = tf.add(v1, one) process = tf.assign(v1, temp) init = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init) print(sess.run(v1)) for i in range(3): sess.run(process) print(sess.run(v1))
def _forward(self, x, y, model_params, init_states, is_training=False): """Computes the logits. Args: x: [batch_size, num_steps], input batch. y: [batch_size, num_steps], output batch. model_params: a `dict` of params to use. init_states: a `dict` of params to use. is_training: if `True`, will apply regularizations. Returns: loss: scalar, cross-entropy loss """ w_emb = model_params['w_emb'] w_lstm = model_params['w_lstm'] w_soft = model_params['w_soft'] prev_c = init_states['c'] prev_h = init_states['h'] emb = tf.nn.embedding_lookup(w_emb, x) if is_training: emb = tf.layers.dropout( emb, self.params.drop_i, [self.params.batch_size, 1, self.params.emb_size], training=True) layer_masks = [None] for _ in range(1, self.params.num_layers - 1): mask = _gen_mask( [self.params.batch_size, self.params.hidden_size], self.params.drop_l) layer_masks.append(mask) layer_masks.append(None) else: layer_masks = [None] * self.params.num_layers out_c, out_h, all_h = _lstm(emb, prev_c, prev_h, w_lstm, layer_masks) top_h = all_h[-1] if is_training: top_h = tf.layers.dropout( top_h, self.params.drop_o, [self.params.batch_size, 1, self.params.emb_size], training=True) carry_on = [] for var, val in zip(prev_c + prev_h, out_c + out_h): carry_on.append(tf.assign(var, val)) logits = tf.einsum('bnh,vh->bnv', top_h, w_soft) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits) loss = tf.reduce_mean(loss) # TODO(hyhieu): watch for num_steps reg_loss = loss # loss + regularization_terms, for training only if is_training: # L2 weight reg reg_loss += self.params.weight_decay * tf.add_n( [tf.reduce_sum(w**2) for w in tf.trainable_variables()]) # activation L2 reg reg_loss += self.params.alpha * tf.add_n( [tf.reduce_mean(h**2) for h in all_h[:-1]]) # activation slowness L2 reg reg_loss += self.params.beta * tf.add_n([ tf.reduce_mean((h[:, 1:, :] - h[:, :-1, :])**2) for h in all_h[:-1] ]) with tf.control_dependencies(carry_on): loss = tf.identity(loss) if is_training: reg_loss = tf.identity(reg_loss) return reg_loss, loss
def __init__(self, net_params, batch_size, num_classes): """ Defines the TensorFlow model, loss, optimisation and accuracy. Then loads the MXNET weights into the model. """ self.gpu_policy = _utils.TensorFlowGPUPolicy() self.gpu_policy.start() for key in net_params.keys(): net_params[key] = _utils.convert_shared_float_array_to_numpy( net_params[key]) _tf.reset_default_graph() self.num_classes = num_classes self.batch_size = batch_size self.input = _tf.placeholder(_tf.float32, [None, 28, 28, 1]) self.one_hot_labels = _tf.placeholder(_tf.int32, [None, self.num_classes]) # Weights weights = { 'drawing_conv0_weight': _tf.Variable(_tf.zeros([3, 3, 1, 16]), name='drawing_conv0_weight'), 'drawing_conv1_weight': _tf.Variable(_tf.zeros([3, 3, 16, 32]), name='drawing_conv1_weight'), 'drawing_conv2_weight': _tf.Variable(_tf.zeros([3, 3, 32, 64]), name='drawing_conv2_weight'), 'drawing_dense0_weight': _tf.Variable(_tf.zeros([576, 128]), name='drawing_dense0_weight'), 'drawing_dense1_weight': _tf.Variable(_tf.zeros([128, self.num_classes]), name='drawing_dense1_weight') } # Biases biases = { 'drawing_conv0_bias': _tf.Variable(_tf.zeros([16]), name='drawing_conv0_bias'), 'drawing_conv1_bias': _tf.Variable(_tf.zeros([32]), name='drawing_conv1_bias'), 'drawing_conv2_bias': _tf.Variable(_tf.zeros([64]), name='drawing_conv2_bias'), 'drawing_dense0_bias': _tf.Variable(_tf.zeros([128]), name='drawing_dense0_bias'), 'drawing_dense1_bias': _tf.Variable(_tf.zeros([self.num_classes]), name='drawing_dense1_bias') } conv_1 = _tf.nn.conv2d(self.input, weights["drawing_conv0_weight"], strides=1, padding='SAME') conv_1 = _tf.nn.bias_add(conv_1, biases["drawing_conv0_bias"]) relu_1 = _tf.nn.relu(conv_1) pool_1 = _tf.nn.max_pool2d(relu_1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') conv_2 = _tf.nn.conv2d(pool_1, weights["drawing_conv1_weight"], strides=1, padding='SAME') conv_2 = _tf.nn.bias_add(conv_2, biases["drawing_conv1_bias"]) relu_2 = _tf.nn.relu(conv_2) pool_2 = _tf.nn.max_pool2d(relu_2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') conv_3 = _tf.nn.conv2d(pool_2, weights["drawing_conv2_weight"], strides=1, padding='SAME') conv_3 = _tf.nn.bias_add(conv_3, biases["drawing_conv2_bias"]) relu_3 = _tf.nn.relu(conv_3) pool_3 = _tf.nn.max_pool2d(relu_3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') # Flatten the data to a 1-D vector for the fully connected layer fc1 = _tf.reshape(pool_3, (-1, 576)) fc1 = _tf.nn.xw_plus_b(fc1, weights=weights["drawing_dense0_weight"], biases=biases["drawing_dense0_bias"]) fc1 = _tf.nn.relu(fc1) out = _tf.nn.xw_plus_b(fc1, weights=weights["drawing_dense1_weight"], biases=biases["drawing_dense1_bias"]) softmax_out = _tf.nn.softmax(out) self.predictions = softmax_out # Loss self.cost = _tf.losses.softmax_cross_entropy( logits=out, onehot_labels=self.one_hot_labels, reduction=_tf.losses.Reduction.NONE) # Optimizer self.optimizer = _tf.train.AdamOptimizer(learning_rate=0.001).minimize( self.cost) # Predictions correct_prediction = _tf.equal(_tf.argmax(self.predictions, 1), _tf.argmax(self.one_hot_labels, 1)) self.sess = _tf.Session() self.sess.run(_tf.global_variables_initializer()) # Assign the initialised weights from C++ to tensorflow layers = [ 'drawing_conv0_weight', 'drawing_conv0_bias', 'drawing_conv1_weight', 'drawing_conv1_bias', 'drawing_conv2_weight', 'drawing_conv2_bias', 'drawing_dense0_weight', 'drawing_dense0_bias', 'drawing_dense1_weight', 'drawing_dense1_bias' ] for key in layers: if 'bias' in key: self.sess.run( _tf.assign( _tf.get_default_graph().get_tensor_by_name(key + ":0"), net_params[key])) else: if 'drawing_dense0_weight' in key: ''' To make output of CoreML pool3 (NCHW) compatible with TF (NHWC). Decompose FC weights to NCHW. Transpose to NHWC. Reshape back to FC. ''' coreml_128_576 = net_params[key] coreml_128_576 = _np.reshape(coreml_128_576, (128, 64, 3, 3)) coreml_128_576 = _np.transpose(coreml_128_576, (0, 2, 3, 1)) coreml_128_576 = _np.reshape(coreml_128_576, (128, 576)) self.sess.run( _tf.assign( _tf.get_default_graph().get_tensor_by_name(key + ":0"), _np.transpose(coreml_128_576, (1, 0)))) elif 'dense' in key: dense_weights = _utils.convert_dense_coreml_to_tf( net_params[key]) self.sess.run( _tf.assign( _tf.get_default_graph().get_tensor_by_name(key + ":0"), dense_weights)) else: # TODO: Call _utils.convert_conv2d_coreml_to_tf when #2513 is merged self.sess.run( _tf.assign( _tf.get_default_graph().get_tensor_by_name(key + ":0"), _np.transpose(net_params[key], (2, 3, 1, 0))))
def _resource_apply_dense(self, grad, handle): var = handle grad = tf.to_float(grad) grad_squared = tf.square(grad) + self._epsilon1 grad_squared_mean = tf.reduce_mean(grad_squared) decay_rate = self._call_if_callable(self._decay_rate) update_scale = self._call_if_callable(self._learning_rate) update_scale = tf.convert_to_tensor(update_scale, name="update_scale") update_scale = tf.cast(update_scale, grad_squared_mean.dtype.base_dtype) old_val = var if var.dtype.base_dtype == tf.bfloat16: old_val = tf.to_float(self._parameter_encoding.decode(old_val)) if self._multiply_by_parameter_scale: update_scale *= tf.to_float(self._parameter_scale(old_val)) # HACK: Make things dependent on grad. # This confounds the XLA rewriter and keeps it from fusing computations # across different variables. This fusion is a bad for HBM usage, since # it causes the gradients to persist in memory. decay_rate += grad_squared_mean * 1e-30 update_scale += grad_squared_mean * 1e-30 # END HACK mixing_rate = 1.0 - decay_rate shape = var.get_shape().as_list() updates = [] if self._should_use_factored_second_moment_estimate(shape): grad_squared_row_mean = tf.reduce_mean(grad_squared, -1) grad_squared_col_mean = tf.reduce_mean(grad_squared, -2) vr = self.get_slot(var, "vr") new_vr = (decay_rate * vr + mixing_rate * grad_squared_row_mean) vc = self.get_slot(var, "vc") new_vc = (decay_rate * vc + mixing_rate * grad_squared_col_mean) vr_update = tf.assign(vr, new_vr, use_locking=self._use_locking) vc_update = tf.assign(vc, new_vc, use_locking=self._use_locking) updates = [vr_update, vc_update] long_term_mean = tf.reduce_mean(new_vr, -1, keepdims=True) r_factor = tf.rsqrt(new_vr / long_term_mean) c_factor = tf.rsqrt(new_vc) x = grad * tf.expand_dims(r_factor, -1) * tf.expand_dims( c_factor, -2) else: v = self.get_slot(var, "v") new_v = decay_rate * v + mixing_rate * grad_squared v_update = tf.assign(v, new_v, use_locking=self._use_locking) updates = [v_update] x = grad * tf.rsqrt(new_v) if self._clipping_threshold is not None: clipping_denom = tf.maximum( 1.0, reduce_rms(x) / self._clipping_threshold) x /= clipping_denom subtrahend = update_scale * x if self._beta1: m = self.get_slot(var, "m") new_m = self._beta1 * tf.to_float(m) + (1.0 - self._beta1) * subtrahend subtrahend = new_m new_m = common_layers.cast_like(new_m, var) updates.append(tf.assign(m, new_m, use_locking=self._use_locking)) new_val = tf.to_float(old_val) - subtrahend if var.dtype.base_dtype == tf.bfloat16: new_val = self._parameter_encoding.encode(new_val, self._quantization_noise) if self._simulated_quantize_bits: new_val = quantization.simulated_quantize( var - subtrahend, self._simulated_quantize_bits, self._quantization_noise) new_val = tf.cast(new_val, var.dtype) var_update = tf.assign(var, new_val, use_locking=self._use_locking) updates = [var_update] + updates return tf.group(*updates)
def __init__(self, args): self.args = args dense = tf.layers.dense inputs = tf.placeholder(shape=(args.batch_size, None), dtype=tf.int32, name='inputs') time_inputs = tf.placeholder(shape=(args.batch_size, None), dtype=tf.int32, name='time_inputs') mask = tf.placeholder(shape=(args.batch_size, None), dtype=tf.float32, name='inputs_mask') seq_length = tf.placeholder(shape=args.batch_size, dtype=tf.float32, name='seq_length') self.s_inputs = s_inputs = tf.placeholder(shape=args.batch_size, dtype=tf.int32, name='s_inputs') self.d_inputs = d_inputs = tf.placeholder(shape=args.batch_size, dtype=tf.int32, name='d_inputs') self.input_form = [inputs, time_inputs, mask, seq_length] decoder_inputs = tf.concat( [tf.zeros(shape=(args.batch_size, 1), dtype=tf.int32), inputs], axis=1) decoder_targets = tf.concat( [inputs, tf.zeros(shape=(args.batch_size, 1), dtype=tf.int32)], axis=1) decoder_mask = tf.concat( [mask, tf.zeros(shape=(args.batch_size, 1), dtype=tf.float32)], axis=1) x_size = out_size = args.map_size[0] * args.map_size[1] embeddings = tf.Variable(tf.random_uniform( [x_size, args.x_latent_size], -1.0, 1.0), dtype=tf.float32) encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, inputs) decoder_inputs_embedded = tf.nn.embedding_lookup( embeddings, decoder_inputs) time_embeddings = tf.Variable(tf.random_uniform( [49, args.x_latent_size], -1.0, 1.0), dtype=tf.float32) encoder_time_inputs_embedded = tf.nn.embedding_lookup( time_embeddings, time_inputs) time_mean = tf.reduce_mean(encoder_time_inputs_embedded, axis=1) mu_c_delta = dense(time_mean, args.rnn_size, activation=None) stack_mu_c_delta = tf.stack([mu_c_delta] * args.mem_num, axis=1) log_sigma_sq_c_delta = dense(time_mean, args.rnn_size, activation=None) stack_log_sigma_sq_c_delta = tf.stack([log_sigma_sq_c_delta] * args.mem_num, axis=1) with tf.variable_scope("encoder"): encoder_cell = tf.nn.rnn_cell.GRUCell(args.rnn_size) _, encoder_final_state = tf.nn.dynamic_rnn( encoder_cell, encoder_inputs_embedded, sequence_length=seq_length, dtype=tf.float32, ) with tf.variable_scope("clusters"): mu_c = tf.get_variable("mu_c", [args.mem_num, args.rnn_size], initializer=tf.random_uniform_initializer( 0.0, 1.0)) log_sigma_sq_c = tf.get_variable( "sigma_sq_c", [args.mem_num, args.rnn_size], initializer=tf.constant_initializer(0.0), trainable=False) log_pi_prior = tf.get_variable( "log_pi_prior", args.mem_num, initializer=tf.constant_initializer(0.0), trainable=False) pi_prior = tf.nn.softmax(log_pi_prior) init_mu_c = tf.placeholder(shape=(args.mem_num, args.rnn_size), dtype=tf.float32, name='init_mu_c') init_sigma_c = tf.placeholder(shape=(args.mem_num, args.rnn_size), dtype=tf.float32, name='init_sigma_c') init_pi = tf.placeholder(shape=args.mem_num, dtype=tf.float32, name='init_pi') self.cluster_init = [init_mu_c, init_sigma_c, init_pi] self.init_mu_c_op = tf.assign(mu_c, init_mu_c) self.init_sigma_c_op = tf.assign(log_sigma_sq_c, init_sigma_c) self.init_pi_op = tf.assign(log_pi_prior, init_pi) self.mu_c = mu_c self.sigma_c = log_sigma_sq_c self.pi = pi_prior stack_mu_c = tf.stack([mu_c] * args.batch_size, axis=0) stack_log_sigma_sq_c = tf.stack([log_sigma_sq_c] * args.batch_size, axis=0) stack_mu_c += stack_mu_c_delta stack_log_sigma_sq_c += stack_log_sigma_sq_c_delta with tf.variable_scope("latent"): mu_z = dense(encoder_final_state, args.rnn_size, activation=None) # shape=(128, 256) log_sigma_sq_z = dense(encoder_final_state, args.rnn_size, activation=None) # shape=(128, 256) eps_z = tf.random_normal(shape=tf.shape(log_sigma_sq_z), mean=0, stddev=1, dtype=tf.float32) z = mu_z + tf.sqrt(tf.exp(log_sigma_sq_z)) * eps_z stack_mu_z = tf.stack([mu_z] * args.mem_num, axis=1) stack_log_sigma_sq_z = tf.stack([log_sigma_sq_z] * args.mem_num, axis=1) stack_z = tf.stack([z] * args.mem_num, axis=1) self.batch_post_embedded = z with tf.variable_scope("sd_attention"): s_embeddings = tf.Variable(tf.random_uniform( [x_size, args.rnn_size], -1.0, 1.0), dtype=tf.float32) d_embeddings = tf.Variable(tf.random_uniform( [x_size, args.rnn_size], -1.0, 1.0), dtype=tf.float32) s = tf.nn.embedding_lookup(s_embeddings, s_inputs) d = tf.nn.embedding_lookup(d_embeddings, d_inputs) sd = tf.concat([s, d], axis=1) hsd1 = dense(sd, args.rnn_size, activation=tf.nn.relu) sd_logits = dense(hsd1, args.mem_num, activation=tf.nn.relu) sd_att = tf.nn.softmax(sd_logits) # for batch_latent_loss with tf.variable_scope("attention"): att_logits = -tf.reduce_sum( tf.square(stack_z - stack_mu_c) / tf.exp(stack_log_sigma_sq_c), axis=-1) att = tf.nn.softmax(att_logits) + 1e-10 self.batch_att = att def generation(h): with tf.variable_scope("generation", reuse=tf.AUTO_REUSE): with tf.variable_scope("decoder"): decoder_init_state = h decoder_cell = tf.nn.rnn_cell.GRUCell(args.rnn_size) decoder_outputs, _ = tf.nn.dynamic_rnn( decoder_cell, decoder_inputs_embedded, initial_state=decoder_init_state, sequence_length=seq_length, dtype=tf.float32, ) with tf.variable_scope("outputs"): out_w = tf.get_variable( "out_w", [out_size, args.rnn_size], tf.float32, tf.random_normal_initializer(stddev=0.02)) out_b = tf.get_variable( "out_b", [out_size], tf.float32, initializer=tf.constant_initializer(0.0)) batch_rec_loss = tf.reduce_mean( decoder_mask * tf.reshape( tf.nn.sampled_softmax_loss( weights=out_w, biases=out_b, labels=tf.reshape(decoder_targets, [-1, 1]), # shape=(None, 1) inputs=tf.reshape( decoder_outputs, [-1, args.rnn_size]), # shape=(None, 256) num_sampled=args.neg_size, num_classes=out_size), [args.batch_size, -1]), axis=-1) target_out_w = tf.nn.embedding_lookup( out_w, decoder_targets) target_out_b = tf.nn.embedding_lookup( out_b, decoder_targets) batch_likelihood = tf.reduce_mean( decoder_mask * tf.log_sigmoid( tf.reduce_sum(decoder_outputs * target_out_w, -1) + target_out_b), axis=-1, name="batch_likelihood") batch_latent_loss = 0.5 * tf.reduce_sum( att * tf.reduce_mean( stack_log_sigma_sq_c + tf.exp(stack_log_sigma_sq_z) / tf.exp(stack_log_sigma_sq_c) + tf.square(stack_mu_z - stack_mu_c) / tf.exp(stack_log_sigma_sq_c), axis=-1), axis=-1) - 0.5 * tf.reduce_mean(1 + log_sigma_sq_z, axis=-1) batch_cate_loss = tf.reduce_mean( tf.reduce_mean(att, axis=0) * tf.log(tf.reduce_mean(att, axis=0))) return batch_rec_loss, batch_latent_loss, batch_cate_loss, batch_likelihood if args.eval: sd_z = tf.matmul( tf.one_hot(tf.argmax(sd_att, axis=-1), depth=args.mem_num, axis=-1), mu_c) # sd_z = tf.matmul(tf.one_hot(tf.argmax(sd_att, axis=-1), depth=args.mem_num, axis=-1), mu_c+tf.reduce_mean(stack_mu_c_delta, 0)) results = generation(sd_z) self.batch_likelihood = results[-1] else: results = generation(z) self.batch_likelihood = results[-1] self.rec_loss = rec_loss = tf.reduce_mean(results[0]) self.latent_loss = latent_loss = tf.reduce_mean(results[1]) self.cate_loss = cate_loss = results[2] self.sd_loss = sd_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=att, logits=sd_logits)) self.loss = loss = rec_loss + latent_loss + 0.1 * cate_loss self.pretrain_loss = pretrain_loss = rec_loss all_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) sd_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='sd_attention') cluster_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='clusters') vae_vars = list(set(all_vars) - set(sd_vars) - set(cluster_vars)) self.pretrain_op = tf.train.AdamOptimizer( args.learning_rate).minimize(pretrain_loss, var_list=vae_vars) self.train_op = tf.train.AdamOptimizer( args.learning_rate).minimize(loss, var_list=vae_vars) self.sd_train_op = tf.train.AdamOptimizer( args.learning_rate).minimize(sd_loss, var_list=sd_vars) saver = tf.train.Saver(tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES), max_to_keep=100) self.save, self.restore = saver.save, saver.restore
def run_update_step(self, session, step_number=None): """Returns the combine update tf OP.""" logging.info('running run_update_step self._global_step is %s name is %s', self._global_step, self.a_matrix_tfvar.op.name) # TODO(wanxin): Resolve tensor infetchable issue and update mask here. if step_number is None: if self._spec.run_update_interval_check != 0: logging.info( 'running run_update_step step_num is null self.globalstep is %s', self._global_step) step_number = session.run(self._global_step) logging.info('running run_update_step step_num is %s', step_number) else: step_number = 1 logging.info( 'In compression op.run_update_step: ' 'step_number is %s, begin, end and update_count are: %s %s %s ', step_number, self._spec.begin_compression_step, self._spec.end_compression_step, self.run_update_count) if (step_number >= self._spec.begin_compression_step and (step_number < self._spec.end_compression_step or self._spec.end_compression_step == -1)): logging.info( 'In compression op.run_update_step:' 'step_number is %s, begin, end and update_count are: %s %s %s ', step_number, self._spec.begin_compression_step, self._spec.end_compression_step, self.run_update_count) self.run_update_count += 1 logging.info('inside compression interval') # Need to persist these python state variables in TF as if a task gets # aborted things get out of sync. self._last_update_step = session.run(self._last_alpha_update_step) logging.info( 'In compression op.run_update_step: ' 'step_number is %s, begin, end, update_count, last_alpha_update' ' are: %s %s %s %s', step_number, self._spec.begin_compression_step, self._spec.end_compression_step, self.run_update_count, self._last_update_step) if self._last_update_step == -1: logging.info( 'In compression op.run_update_step: step_number is %s, ' 'begin, end, update_count are: %s %s %s ', step_number, self._spec.begin_compression_step, self._spec.end_compression_step, self.run_update_count) print('inside compression interval: initial decomposition step') a_matrix = session.run(self.a_matrix_tfvar) pruned_a_matrix = session.run( tf.multiply(self.a_matrix_tfvar, self.mask)) logging.info( 'In compression op.run_update_step: ' 'a_matrix.shape is %s norm is %d', a_matrix.shape, np.linalg.norm(a_matrix)) if self.matrix_compressor.get_spec().is_c_matrix_present: logging.info( 'In compression op.run_update_step: ' 'step_number is %s, begin, end and update_count are: %s %s %s ', step_number, self._spec.begin_compression_step, self._spec.end_compression_step, self.run_update_count) if getattr(self._spec, 'do_transpose', False): [b_matrix, c_matrix ] = self.matrix_compressor.static_matrix_compressor( pruned_a_matrix.T) else: [b_matrix, c_matrix ] = self.matrix_compressor.static_matrix_compressor(pruned_a_matrix) session.run(tf.assign(self.b_matrix_tfvar, b_matrix)) session.run(tf.assign(self.c_matrix_tfvar, c_matrix)) else: [b_matrix ] = self.matrix_compressor.static_matrix_compressor(pruned_a_matrix) session.run(tf.assign(self.b_matrix_tfvar, b_matrix)) logging.info( 'In compression op.run_update_step: ' 'step_number is %s, begin, end and update_count are: %s %s %s ', step_number, self._spec.begin_compression_step, self._spec.end_compression_step, self.run_update_count) alpha = session.run(self.alpha) self.last_alpha_value = alpha if self.last_alpha_value > 0: make_a_zero = False new_alpha = max(alpha - self._spec.alpha_decrement_value, 0) if make_a_zero and new_alpha == 0: logging.info('Making a_matrix all zero for %s', self.a_matrix_tfvar.op.name) a_matrix = np.zeros(shape=self.a_matrix_tfvar.shape) session.run(tf.assign(self.a_matrix_tfvar, a_matrix)) logging.info('in run_update_step decrementing alpha, alpha value is %d', self.last_alpha_value) logging.info( 'running run_update_step self._global_step is %s new and old alpha are %d %d', self._global_step, alpha, new_alpha) session.run(tf.assign(self.alpha, new_alpha)) self.last_alpha_value = new_alpha self._last_update_step = step_number session.run(tf.assign(self._last_alpha_update_step, step_number)) logging.info( 'In compression op.run_update_step: ' 'step_number is %s, begin, end and update_count are: %s %s %s ', step_number, self._spec.begin_compression_step, self._spec.end_compression_step, self.run_update_count)
def _assign(self, ref, value): return tf.assign(ref, value, use_locking=self._use_locking)
def train(agent, replay_buffer, dev_data, objective='mapo'): """Training Loop.""" sgd_steps = 0 train_env_dict = replay_buffer.env_dict train_sample_gen = SampleGenerator( replay_buffer, agent, objective=objective, explore=FLAGS.explore, n_samples=FLAGS.n_replay_samples, use_top_k_samples=FLAGS.use_top_k_samples, min_replay_weight=FLAGS.min_replay_weight) train_sample_generator = train_sample_gen.generate_samples( batch_size=len(train_env_dict), debug=FLAGS.is_debug) if FLAGS.meta_learn: dev_replay_buffer = dev_data dev_env_dict = dev_replay_buffer.env_dict dev_sample_gen = SampleGenerator( dev_replay_buffer, agent, objective=objective, explore=FLAGS.dev_explore) dev_sample_generator = dev_sample_gen.generate_samples( batch_size=len(dev_env_dict), debug=FLAGS.is_debug) else: dev_env_dict = dev_data ckpt_dir = osp.join(FLAGS.train_dir, 'model') if (tf.train.latest_checkpoint(ckpt_dir) is None) and FLAGS.pretrained_ckpt_dir: pretrained_ckpt_dir = osp.join(FLAGS.pretrained_ckpt_dir, 'best_model') # Store weights before loading the checkpoint if FLAGS.pretrained_load_data_only and FLAGS.meta_learn: pi_weights = agent.pi.get_weights() create_checkpoint_manager( agent, pretrained_ckpt_dir, restore=True, include_optimizer=False, meta_learn=False) # Reset the global step to 0 tf.assign(agent.global_step, 0) if FLAGS.pretrained_load_data_only and FLAGS.meta_learn: dev_trajs = agent.sample_trajs(dev_env_dict.values(), greedy=True) dev_replay_buffer.save_trajs(dev_trajs) agent.pi.set_weights(pi_weights) tf.logging.info('Collected data using the pretrained checkpoint') ckpt_manager = create_checkpoint_manager( agent, ckpt_dir, restore=True, include_optimizer=True, meta_learn=FLAGS.meta_learn) best_ckpt_dir = osp.join(FLAGS.train_dir, 'best_model') best_ckpt_manager = create_checkpoint_manager( agent, best_ckpt_dir, restore=False, include_optimizer=False) # Log summaries for the accuracy results summary_writer = contrib_summary.create_file_writer( osp.join(FLAGS.train_dir, 'tb_log'), flush_millis=5000) max_val_acc = helpers.eval_agent(agent, dev_env_dict) with summary_writer.as_default(), \ contrib_summary.always_record_summaries(): while agent.global_step.numpy() < FLAGS.num_steps: if sgd_steps % FLAGS.save_every_n == 0: ckpt_manager.save() train_acc = helpers.eval_agent(agent, train_env_dict) val_acc = helpers.eval_agent(agent, dev_env_dict) contrib_summary.scalar('train_acc', train_acc) contrib_summary.scalar('validation_acc', val_acc) if val_acc > max_val_acc: max_val_acc = val_acc tf.logging.info('Best validation accuracy {}'.format(max_val_acc)) best_ckpt_manager.save() # Sample environments and trajectories samples, contexts = next(train_sample_generator) if FLAGS.meta_learn: dev_samples, dev_contexts = next(dev_sample_generator) agent.update(samples, contexts, dev_samples, dev_contexts) else: # Update the policy agent.update(samples, contexts) # Update the random noise agent.update_eps(agent.global_step.numpy(), FLAGS.num_steps) sgd_steps += 1
bins = 128 npots = 200 validnth = 5 sinval = np.sin([[np.pi * i * j / bins for i in range(1, bins)] for j in range(1, bins // 2)]) cosval = np.cos([[np.pi * i * j / bins for i in range(1, bins)] for j in range(1, bins // 2)]) sqrt2 = np.sqrt(2) defgrdstate = tf.constant( [sqrt2 * np.sin(i * np.pi / bins) for i in range(1, bins)]) psi = tf.Variable(defgrdstate) zerotens = tf.zeros([1]) psil = tf.concat([psi[1:], zerotens], 0) psir = tf.concat([zerotens, psi[:-1]], 0) renorm = tf.assign(psi, tf.divide(psi, tf.sqrt(tf.reduce_mean(tf.square(psi))))) optim = tf.train.GradientDescentOptimizer(0.0625 / bins) reinit = tf.assign(psi, defgrdstate) init = tf.global_variables_initializer() potentials = [] valid_potentials = [] wave_functions = [] valid_functions = [] sess = tf.Session() sess.run(init) for i in range(npots): if i % 10 == 0: print(str((100. * i) / npots) + '% complete') for j in range(3):
import time import tensorflow.compat.v1 as tf # Configuration of cluster worker_hosts = ["9.134.80.230:9501", "9.134.189.246:9501"] ps_hosts = ["9.134.189.246:9500"] cluster = tf.train.ClusterSpec({"worker": worker_hosts, "ps": ps_hosts}) server = tf.train.Server(cluster, job_name='worker', task_index=0) #找到‘worker’名字下的,task0,也就是机器A with tf.device(tf.train.replica_device_setter()): w = tf.get_variable('w', (1), tf.float32, initializer=tf.constant_initializer(2)) add = tf.add(w, 1) update = tf.assign(w, add) with tf.Session(server.target) as sess: sess.run(tf.global_variables_initializer()) for _ in range(100): print("==============================") print(sess.run(w)) print(sess.run(update)) time.sleep(1)
def save_internal_states_ops(self, internal_states): if not self.hparams.concat_internal_states: return [[tf.no_op()]] ops = [[tf.assign(x, y)] for x, y in zip(self.internal_states[0], internal_states[0])] return ops
def save_internal_states_ops(self, internal_states): ops = [[tf.assign(x[0], y[0]), tf.assign(x[1], y[1])] for x, y in zip(self.internal_states, internal_states)] return ops
def train(train_data, test_data=None): G = train_data[0] features = train_data[1] id_map = train_data[2] if not features is None: # pad with dummy zero vector features = np.vstack([features, np.zeros((features.shape[1], ))]) context_pairs = train_data[3] if FLAGS.random_context else None placeholders = construct_placeholders() minibatch = EdgeMinibatchIterator(G, id_map, placeholders, batch_size=FLAGS.batch_size, max_degree=FLAGS.max_degree, num_neg_samples=FLAGS.neg_sample_size, context_pairs=context_pairs) adj_info_ph = tf.placeholder(tf.int32, shape=minibatch.adj.shape) adj_info = tf.Variable(adj_info_ph, trainable=False, name="adj_info") if FLAGS.model == 'graphsage_mean': # Create model sampler = UniformNeighborSampler(adj_info) layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2) ] model = SampleAndAggregate(placeholders, features, adj_info, minibatch.deg, layer_infos=layer_infos, model_size=FLAGS.model_size, identity_dim=FLAGS.identity_dim, logging=True) elif FLAGS.model == 'gcn': # Create model sampler = UniformNeighborSampler(adj_info) layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, 2 * FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, 2 * FLAGS.dim_2) ] model = SampleAndAggregate(placeholders, features, adj_info, minibatch.deg, layer_infos=layer_infos, aggregator_type="gcn", model_size=FLAGS.model_size, identity_dim=FLAGS.identity_dim, concat=False, logging=True) elif FLAGS.model == 'graphsage_seq': sampler = UniformNeighborSampler(adj_info) layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2) ] model = SampleAndAggregate(placeholders, features, adj_info, minibatch.deg, layer_infos=layer_infos, identity_dim=FLAGS.identity_dim, aggregator_type="seq", model_size=FLAGS.model_size, logging=True) elif FLAGS.model == 'graphsage_maxpool': sampler = UniformNeighborSampler(adj_info) layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2) ] model = SampleAndAggregate(placeholders, features, adj_info, minibatch.deg, layer_infos=layer_infos, aggregator_type="maxpool", model_size=FLAGS.model_size, identity_dim=FLAGS.identity_dim, logging=True) elif FLAGS.model == 'graphsage_meanpool': sampler = UniformNeighborSampler(adj_info) layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2) ] model = SampleAndAggregate(placeholders, features, adj_info, minibatch.deg, layer_infos=layer_infos, aggregator_type="meanpool", model_size=FLAGS.model_size, identity_dim=FLAGS.identity_dim, logging=True) elif FLAGS.model == 'n2v': model = Node2VecModel( placeholders, features.shape[0], minibatch.deg, #2x because graphsage uses concat nodevec_dim=2 * FLAGS.dim_1, lr=FLAGS.learning_rate) else: raise Exception('Error: model name unrecognized.') config = tf.ConfigProto(log_device_placement=FLAGS.log_device_placement) config.gpu_options.allow_growth = True #config.gpu_options.per_process_gpu_memory_fraction = GPU_MEM_FRACTION config.allow_soft_placement = True # Initialize session sess = tf.Session(config=config) merged = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(log_dir(), sess.graph) # Init variables sess.run(tf.global_variables_initializer(), feed_dict={adj_info_ph: minibatch.adj}) # Train model train_shadow_mrr = None shadow_mrr = None total_steps = 0 avg_time = 0.0 epoch_val_costs = [] train_adj_info = tf.assign(adj_info, minibatch.adj) val_adj_info = tf.assign(adj_info, minibatch.test_adj) for epoch in range(FLAGS.epochs): minibatch.shuffle() iter = 0 print('Epoch: %04d' % (epoch + 1)) epoch_val_costs.append(0) while not minibatch.end(): # Construct feed dictionary feed_dict = minibatch.next_minibatch_feed_dict() feed_dict.update({placeholders['dropout']: FLAGS.dropout}) t = time.time() # Training step outs = sess.run([ merged, model.opt_op, model.loss, model.ranks, model.aff_all, model.mrr, model.outputs1 ], feed_dict=feed_dict) train_cost = outs[2] train_mrr = outs[5] if train_shadow_mrr is None: train_shadow_mrr = train_mrr # else: train_shadow_mrr -= (1 - 0.99) * (train_shadow_mrr - train_mrr) if iter % FLAGS.validate_iter == 0: # Validation sess.run(val_adj_info.op) val_cost, ranks, val_mrr, duration = evaluate( sess, model, minibatch, size=FLAGS.validate_batch_size) sess.run(train_adj_info.op) epoch_val_costs[-1] += val_cost if shadow_mrr is None: shadow_mrr = val_mrr else: shadow_mrr -= (1 - 0.99) * (shadow_mrr - val_mrr) if total_steps % FLAGS.print_every == 0: summary_writer.add_summary(outs[0], total_steps) # Print results avg_time = (avg_time * total_steps + time.time() - t) / (total_steps + 1) if total_steps % FLAGS.print_every == 0: print( "Iter:", '%04d' % iter, "train_loss=", "{:.5f}".format(train_cost), "train_mrr=", "{:.5f}".format(train_mrr), "train_mrr_ema=", "{:.5f}".format( train_shadow_mrr), # exponential moving average "val_loss=", "{:.5f}".format(val_cost), "val_mrr=", "{:.5f}".format(val_mrr), "val_mrr_ema=", "{:.5f}".format(shadow_mrr), # exponential moving average "time=", "{:.5f}".format(avg_time)) iter += 1 total_steps += 1 if total_steps > FLAGS.max_total_steps: break if total_steps > FLAGS.max_total_steps: break print("Optimization Finished!") if FLAGS.save_embeddings: sess.run(val_adj_info.op) save_val_embeddings(sess, model, minibatch, FLAGS.validate_batch_size, log_dir()) if FLAGS.model == "n2v": # stopping the gradient for the already trained nodes train_ids = tf.constant( [[id_map[n]] for n in G.nodes_iter() if not G.node[n]['val'] and not G.node[n]['test']], dtype=tf.int32) test_ids = tf.constant([[id_map[n]] for n in G.nodes_iter() if G.node[n]['val'] or G.node[n]['test']], dtype=tf.int32) update_nodes = tf.nn.embedding_lookup(model.context_embeds, tf.squeeze(test_ids)) no_update_nodes = tf.nn.embedding_lookup(model.context_embeds, tf.squeeze(train_ids)) update_nodes = tf.scatter_nd(test_ids, update_nodes, tf.shape(model.context_embeds)) no_update_nodes = tf.stop_gradient( tf.scatter_nd(train_ids, no_update_nodes, tf.shape(model.context_embeds))) model.context_embeds = update_nodes + no_update_nodes sess.run(model.context_embeds) # run random walks from graphsage.utils import run_random_walks nodes = [ n for n in G.nodes_iter() if G.node[n]["val"] or G.node[n]["test"] ] start_time = time.time() pairs = run_random_walks(G, nodes, num_walks=50) walk_time = time.time() - start_time test_minibatch = EdgeMinibatchIterator( G, id_map, placeholders, batch_size=FLAGS.batch_size, max_degree=FLAGS.max_degree, num_neg_samples=FLAGS.neg_sample_size, context_pairs=pairs, n2v_retrain=True, fixed_n2v=True) start_time = time.time() print("Doing test training for n2v.") test_steps = 0 for epoch in range(FLAGS.n2v_test_epochs): test_minibatch.shuffle() while not test_minibatch.end(): feed_dict = test_minibatch.next_minibatch_feed_dict() feed_dict.update({placeholders['dropout']: FLAGS.dropout}) outs = sess.run([ model.opt_op, model.loss, model.ranks, model.aff_all, model.mrr, model.outputs1 ], feed_dict=feed_dict) if test_steps % FLAGS.print_every == 0: print("Iter:", '%04d' % test_steps, "train_loss=", "{:.5f}".format(outs[1]), "train_mrr=", "{:.5f}".format(outs[-2])) test_steps += 1 train_time = time.time() - start_time save_val_embeddings(sess, model, minibatch, FLAGS.validate_batch_size, log_dir(), mod="-test") print("Total time: ", train_time + walk_time) print("Walk time: ", walk_time) print("Train time: ", train_time)
# subdircount += 1 tfprvs = tf.placeholder(tf.float32, shape=[4, 256, 448, 3], name="first_frame") tfnext = tf.placeholder(tf.float32, shape=[4, 256, 448, 3], name="second_frame") l_r = tf.placeholder(tf.float32, shape=[], name='learning_rate') lamda = tf.placeholder(tf.int16, shape=[], name="train_lambda") recon, mse, bpp = net(tfprvs, tfnext) train_loss = tf.cast(lamda, tf.float32) * mse + bpp train = tf.train.AdamOptimizer(learning_rate=l_r).minimize(train_loss) aux_step1 = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(net.ofcomp.entropy_bottleneck.losses[0]) aux_step2 = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(net.rescomp.entropy_bottleneck.losses[0]) tfvideo_batch = tf.get_variable("tfvideo_batch", initializer=tf.constant(0)) increment_video_batch = tf.assign(tfvideo_batch, tfvideo_batch + 1) directory = tf.get_variable("directory", initializer=tf.constant(1)) increment_directory = tf.assign(directory, directory + 1) init_video_batch_updater = tf.assign(tfvideo_batch, 0) init_directory_updater = tf.assign(directory, 1) init = tf.global_variables_initializer() saver = tf.train.Saver() starting = args.restore with tf.Session() as sess: sess.run(init) if starting:
def simulate(self, action): with tf.name_scope("environment/simulate"): actions = tf.concat([tf.expand_dims(action, axis=1)] * self._num_frames, axis=1) history = self.history_buffer.get_all_elements() with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): # We only need 1 target frame here, set it. hparams_target_frames = self._model.hparams.video_num_target_frames self._model.hparams.video_num_target_frames = 1 model_output = self._model.infer({ "inputs": history, "input_action": actions, "reset_internal_states": self._reset_model.read_value() }) self._model.hparams.video_num_target_frames = hparams_target_frames observ = tf.cast(tf.squeeze(model_output["targets"], axis=1), self.observ_dtype) reward = tf.to_float(model_output["target_reward"]) reward = tf.reshape(reward, shape=(self.batch_size, )) + self._min_reward if self._intrinsic_reward_scale: # Use the model's uncertainty about its prediction as an intrinsic # reward. The uncertainty is measured by the log probability of the # predicted pixel value. if "targets_logits" not in model_output: raise ValueError( "The use of intrinsic rewards requires access to " "the logits. Ensure that model.infer returns " "'targets_logits'") uncertainty_reward = compute_uncertainty_reward( model_output["targets_logits"], model_output["targets"]) uncertainty_reward = tf.minimum( 1., self._intrinsic_reward_scale * uncertainty_reward) uncertainty_reward = tf.Print(uncertainty_reward, [uncertainty_reward], message="uncertainty_reward", first_n=1, summarize=8) reward += uncertainty_reward done = tf.constant(False, tf.bool, shape=(self.batch_size, )) with tf.control_dependencies([observ]): dump_frame_op = tf.cond( self._video_condition, lambda: tf.py_func( self._video_dump_frame, # pylint: disable=g-long-lambda [observ, reward], []), tf.no_op) with tf.control_dependencies([ self._observ.assign(observ), self.history_buffer.move_by_one_element(observ), dump_frame_op ]): clear_reset_model_op = tf.assign(self._reset_model, tf.constant(0.0)) with tf.control_dependencies([clear_reset_model_op]): return tf.identity(reward), tf.identity(done)
def set_state(self, state): return [ tf.assign(self.mean_variable, state[0]), tf.assign(self.log_var, state[1]) ]
def _build_params(self): """Create and count model parameters.""" print('-' * 80) print('Building model params') with tf.variable_scope(self.name): with tf.variable_scope('embedding'): initializer = tf.initializers.random_uniform( -self.params.init_range, self.params.init_range) w_emb = tf.get_variable( 'w', [self.params.vocab_size, self.params.emb_size], initializer=initializer) dropped_w_emb = tf.layers.dropout(w_emb, self.params.drop_e, [self.params.vocab_size, 1], training=True) w_lstm = [] dropped_w_lstm = [] with tf.variable_scope('lstm'): for i in range(self.params.num_layers): inp_size = self.params.emb_size if i == 0 else self.params.hidden_size hid_size = (self.params.emb_size if i == self.params.num_layers - 1 else self.params.hidden_size) init_range = 1.0 / np.sqrt(hid_size) initializer = tf.initializers.random_uniform( -init_range, init_range) with tf.variable_scope('layer_{0}'.format(i)): w = tf.get_variable( 'w', [inp_size + hid_size, 4 * hid_size], initializer=initializer) i_mask = tf.ones([inp_size, 4 * hid_size], dtype=tf.float32) h_mask = _gen_mask([hid_size, 4 * hid_size], self.params.drop_w) mask = tf.concat([i_mask, h_mask], axis=0) dropped_w = w * mask w_lstm.append(w) dropped_w_lstm.append(dropped_w) with tf.variable_scope('init_states'): batch_prev_c, batch_prev_h, batch_reset = [], [], [] test_prev_c, test_prev_h, test_reset = [], [], [] for i in range(self.params.num_layers): inp_size = self.params.emb_size if i == 0 else self.params.hidden_size hid_size = (self.params.emb_size if i == self.params.num_layers - 1 else self.params.hidden_size) with tf.variable_scope('layer_{0}'.format(i)): with tf.variable_scope('batch'): init_shape = [self.params.batch_size, hid_size] batch_prev_c.append( tf.get_variable('c', init_shape, dtype=tf.float32, trainable=False)) batch_prev_h.append( tf.get_variable('h', init_shape, dtype=tf.float32, trainable=False)) zeros = np.zeros(init_shape, dtype=np.float32) batch_reset.append( tf.assign(batch_prev_c[-1], zeros)) batch_reset.append( tf.assign(batch_prev_h[-1], zeros)) with tf.variable_scope('test'): init_shape = [1, hid_size] test_prev_c.append( tf.get_variable('c', init_shape, dtype=tf.float32, trainable=False)) test_prev_h.append( tf.get_variable('h', init_shape, dtype=tf.float32, trainable=False)) zeros = np.zeros(init_shape, dtype=np.float32) test_reset.append(tf.assign( test_prev_c[-1], zeros)) test_reset.append(tf.assign( test_prev_h[-1], zeros)) num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()]) print('Model has {0} params'.format(num_params)) self.batch_init_states = { 'c': batch_prev_c, 'h': batch_prev_h, 'reset': batch_reset, } self.train_params = { 'w_emb': dropped_w_emb, 'w_lstm': dropped_w_lstm, 'w_soft': w_emb, } self.test_init_states = { 'c': test_prev_c, 'h': test_prev_h, 'reset': test_reset, } self.eval_params = { 'w_emb': w_emb, 'w_lstm': w_lstm, 'w_soft': w_emb, }
def set_state(self, state): ops = list(FactorisedPosterior.set_state(self, state[:-1])) ops += [tf.assign(self.off_diag_vars_base, state[-1], validate_shape=False)] return ops
def _create_average_ops(self): """Build moving average ops.""" print('Creating moving average ops') with tf.variable_scope('moving_avg_flag'): self.moving_avg_started = tf.get_variable( 'flag', [], tf.int32, initializer=tf.initializers.zeros(), trainable=False) self.start_moving_avg_op = tf.assign(self.moving_avg_started, 1) all_vars = tf.trainable_variables() average_pairs = [] var_cnt = 0 with tf.variable_scope('average'): for v in all_vars: avg_v = tf.get_variable(str(var_cnt), shape=v.shape, dtype=v.dtype, initializer=tf.zeros_initializer, trainable=False) var_cnt += 1 average_pairs.append([v, avg_v]) backup_pairs = [] var_cnt = 0 with tf.variable_scope('backup'): for v in all_vars: backup_v = tf.get_variable(str(var_cnt), shape=v.shape, dtype=v.dtype, trainable=False) var_cnt += 1 backup_pairs.append([v, backup_v]) with tf.variable_scope('avg_step'): avg_step = tf.get_variable('step', [], dtype=tf.float32, trainable=False) with tf.control_dependencies([tf.assign_add(avg_step, 1.0)]): average_op = [] for v, avg_v in average_pairs: mu = 1 / avg_step new_avg = mu * v + (1 - mu) * avg_v with tf.control_dependencies([new_avg]): average_op.append(tf.assign(avg_v, new_avg)) assert len(average_pairs) == len(all_vars) assert len(average_pairs) == len(backup_pairs) use_average_op = [] for i in range(len(average_pairs)): v, avg_v = average_pairs[i] _, backup_v = backup_pairs[i] with tf.control_dependencies([tf.assign(backup_v, v)]): use_average_op.append(tf.assign(v, avg_v)) use_average_op = tf.group(*use_average_op) reverse_average_op = [] for v, backup_v in backup_pairs: reverse_average_op.append(tf.assign(v, backup_v)) reverse_average_op = tf.group(*reverse_average_op) return average_op, use_average_op, reverse_average_op
def __init__( self, n_actions, n_features, #observation/state 的属性,如长宽高 learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=300, memory_size=500, double=True, batch_size=32, e_greedy_increment=None, prioritized=True, output_graph=False, ): self.n_actions = n_actions self.n_features = n_features #observation/state 的属性,如长宽高 self.lr = learning_rate self.gamma = reward_decay self.epsilon_max = e_greedy # epsilon 的最大值 self.replace_target_iter = replace_target_iter # 更换 target_net 的步数 self.memory_size = memory_size # 记忆上限 self.batch_size = batch_size # 每次更新时从 memory 里面取多少记忆出来 self.epsilon_increment = e_greedy_increment # epsilon 的增量 self.epsilon = 0 if e_greedy_increment is not None else self.epsilon_max # 是否开启探索模式, 并逐步减少探索次数,e_greedy_increment=None-->self.epsilon = 0, # e_greedy_increment!=None-->self.epsilon=self.epsilon_max # TODO(xhx):探索模式后续如何启动? self.double = double self.prioritized = prioritized # 记录学习次数 (用于判断是否更换 target_net 参数) self.learn_step_counter = 0 #############################prioritized#################################################### if self.prioritized: self.memory = Memory(capacity=memory_size) else: self.memory = np.zeros( (self.memory_size, n_features * 2 + 2)) # 初始化全 0 记忆 [s, a, r, s_] #############################prioritized#################################################### # size = s特征数+s_特征数+a(0/1/2/3)+r # self.memory = np.zeros((self.memory_size, n_features * 2 + 2)) # 和视频中不同, 因为 pandas 运算比较慢, 这里改为直接用 numpy # 创建 [target_net, evaluate_net] self._build_net() # 替换 target net 的参数 # TODO(xhx):替换参数这四行代码没看懂 #在build_net中,各自的w1,b1,w2,b2都放进collection 'target_net_params' 和'eval_net_params' t_params = tf.get_collection('target_net_params') # 提取 target_net 的参数 e_params = tf.get_collection('eval_net_params') # 提取 eval_net 的参数 self.replace_target_op = [ tf.assign(t, e) for t, e in zip(t_params, e_params) ] # 更新 target_net 参数 self.sess = tf.Session() # 输出 tensorboard 文件 if output_graph: # $ tensorboard --logdir=logs tf.summary.FileWriter("logs/", self.sess.graph) self.sess.run(tf.global_variables_initializer()) self.cost_his = [] # 记录所有 cost 变化, 用于最后 plot 出来观看
def learn( env, model_path, data_path, policy_fn, *, horizon=150, # timesteps per actor per update rolloutSize=50, clip_param=0.2, entcoeff=0.02, # clipping parameter epsilon, entropy coeff optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=32, # optimization hypers gamma=0.99, lam=0.95, # advantage estimation max_iters=0, # time constraint adam_epsilon=1e-4, schedule='constant', # annealing for stepsize parameters (epsilon and adam) retrain=False): # Setup losses and policy ob_space = env.observation_space ac_space = env.action_space pi = policy_fn("pi", ob_space, ac_space) # Construct network for new policy oldpi = policy_fn("oldpi", ob_space, ac_space) # Network for old policy atarg = tf.placeholder( dtype=tf.float32, shape=[None]) # Target advantage function (if applicable) ret = tf.placeholder(dtype=tf.float32, shape=[None]) # Empirical return lrmult = tf.placeholder( name='lrmult', dtype=tf.float32, shape=[]) # learning rate multiplier, updated with schedule ob = U.get_placeholder_cached(name="ob") ac = pi.pdtype.sample_placeholder([None]) kloldnew = oldpi.pd.kl(pi.pd) ent = pi.pd.entropy() meankl = tf.reduce_mean(kloldnew) meanent = tf.reduce_mean(ent) pol_entpen = (-entcoeff) * meanent ratio = tf.exp(pi.pd.logp(ac) - oldpi.pd.logp(ac)) # pnew / pold surr1 = ratio * atarg # surrogate from conservative policy iteration surr2 = tf.clip_by_value(ratio, 1.0 - clip_param, 1.0 + clip_param) * atarg # pol_surr = -tf.reduce_mean(tf.minimum( surr1, surr2)) # PPO's pessimistic surrogate (L^CLIP) vf_loss = tf.reduce_mean(tf.square(pi.vpred - ret)) total_loss = pol_surr + pol_entpen + vf_loss losses = [pol_surr, pol_entpen, vf_loss, meankl, meanent] loss_names = ["pol_surr", "pol_entpen", "vf_loss", "kl", "ent"] var_list = pi.get_trainable_variables() lossandgrad = U.function([ob, ac, atarg, ret, lrmult], losses + [U.flatgrad(total_loss, var_list)]) adam = MpiAdam(var_list, epsilon=adam_epsilon) assign_old_eq_new = U.function( [], [], updates=[ tf.assign(oldv, newv) for (oldv, newv) in zipsame(oldpi.get_variables(), pi.get_variables()) ]) compute_losses = U.function([ob, ac, atarg, ret, lrmult], losses) U.initialize() adam.sync() # Prepare for rollouts episodes_so_far = 0 timesteps_so_far = 0 iters_so_far = 0 tstart = time.time() lenbuffer = deque(maxlen=5) # rolling buffer for episode lengths rewbuffer = deque(maxlen=5) # rolling buffer for episode rewards p = [] # for saving the rollouts if retrain == True: print("Retraining the policy from saved path") time.sleep(2) U.load_state(model_path) max_timesteps = int(horizon * rolloutSize * max_iters) while True: if max_iters and iters_so_far >= max_iters: break if schedule == 'constant': cur_lrmult = 1.0 elif schedule == 'linear': cur_lrmult = max(1.0 - float(timesteps_so_far) / max_timesteps, 0) else: raise NotImplementedError logger.log("********** Iteration %i ************" % iters_so_far) print("Collecting samples for policy optimization !! ") if iters_so_far > 70: render = True else: render = False rollouts = sample_trajectory(pi, env, horizon=horizon, rolloutSize=rolloutSize, stochastic=True, render=render) # Save rollouts data = {'rollouts': rollouts} p.append(data) del data data_file_name = data_path + 'rollout_data.pkl' pickle.dump(p, open(data_file_name, "wb")) add_vtarg_and_adv(rollouts, gamma, lam) ob, ac, atarg, tdlamret = rollouts["ob"], rollouts["ac"], rollouts[ "adv"], rollouts["tdlamret"] atarg = (atarg - atarg.mean() ) / atarg.std() # standardized advantage function estimate d = Dataset(dict(ob=ob, ac=ac, atarg=atarg, vtarg=tdlamret), deterministic=pi.recurrent) optim_batchsize = optim_batchsize or ob.shape[0] if hasattr(pi, "ob_rms"): pi.ob_rms.update(ob) # update running mean/std for policy assign_old_eq_new() # set old parameter values to new parameter values logger.log("Optimizing...") # Here we do a bunch of optimization epochs over the data for _ in range(optim_epochs): losses = [ ] # list of tuples, each of which gives the loss for a minibatch for batch in d.iterate_once(optim_batchsize): *newlosses, g = lossandgrad(batch["ob"], batch["ac"], batch["atarg"], batch["vtarg"], cur_lrmult) adam.update(g, optim_stepsize * cur_lrmult) losses.append(newlosses) lrlocal = (rollouts["ep_lens"], rollouts["ep_rets"]) # local values listoflrpairs = MPI.COMM_WORLD.allgather(lrlocal) # list of tuples lens, rews = map(flatten_lists, zip(*listoflrpairs)) lenbuffer.extend(lens) rewbuffer.extend(rews) logger.record_tabular("Success", rollouts["success"]) logger.record_tabular("EpLenMean", np.mean(lenbuffer)) logger.record_tabular("EpRewMean", np.mean(rewbuffer)) logger.record_tabular("EpThisIter", len(lens)) episodes_so_far += len(lens) timesteps_so_far += sum(lens) iters_so_far += 1 logger.record_tabular("EpisodesSoFar", episodes_so_far) logger.record_tabular("TimestepsSoFar", timesteps_so_far) logger.record_tabular("TimeElapsed", time.time() - tstart) if MPI.COMM_WORLD.Get_rank() == 0: logger.dump_tabular() return pi
def reset_cell_states(self): for cell_group_key in self.cell_groups: for state_key in self.cell_groups[cell_group_key].states: state = self.cell_groups[cell_group_key].states[state_key] self.session.run(tf.assign(state, tf.zeros_like(state)))