def discretize_range(discretize_fn, levels, low, high, thermometer=False):
  """Get range of discretized values for in the interval (low, high).

    For example, assume discretize_fn uniformly discretizes the values
    between 0 and 1 into 10 bins each represented by either a one hot encoding
    or a thermometer encoding. Then discretize_range(discretize_fn, .3, .7)
    would return [0., 0., 0., 1., 1., 1., 1., 0., 0., 0.]. Note that it's output
    is independent of the encoding used.

  Args:
    discretize_fn: Discretization function used to discretize input.
    levels: Number of levels to discretize the input into.
    low: Minimum value in the interval.
    high: Maximum value in the interval.
    thermometer: If True, then the discretize_fn returns thermometer codes,
        else it returns one hot codes. (Default: False).

  Returns:
    Mask of 1's over the interval.
  """
  low = tf.clip_by_value(low, 0., 1.)
  high = tf.clip_by_value(high, 0., 1.)
  out = 0.
  for alpha in np.linspace(0., 1., levels):
    q = discretize_fn(alpha * low + (1. - alpha) * high, levels, thermometer)

    # Convert into one hot encoding if q is in thermometer encoding
    if thermometer:
      q = discretization_utils.thermometer_to_one_hot(q, levels, flattened=True)
    out += q
  return tf.to_float(tf.greater(out, 0.))
def clip_weights_with_threshold(max_threshold):
    global weights
    for op,w in weights.items():
        if 'conv' in op:
            weights[op] = tf.clip_by_value(weights[op], -max_threshold, max_threshold, name=None)
        elif 'fulcon' in op:
            weights[op] = tf.clip_by_value(weights[op], -max_threshold, max_threshold, name=None)
Ejemplo n.º 3
0
    def _create_loss_and_optimizer(self, inputs, x_reconstr_mean, z_log_sigma_sq, z_mean):
        # The loss is composed of two terms:
        # 1.) The reconstruction loss (the negative log probability
        #     of the input under the reconstructed Bernoulli distribution
        #     induced by the decoder in the data space).
        #     This can be interpreted as the number of "nats" required
        #     for reconstructing the input when the activation in latent
        #     is given.
        # Adding 1e-10 to avoid evaluation of log(0.0)
        self.reconstr_loss = \
            -tf.reduce_sum(inputs * tf.log(tf.clip_by_value(x_reconstr_mean, 1e-9, 1.0))
                           + (1 - inputs) * tf.log(tf.clip_by_value(1 - x_reconstr_mean, 1e-9, 1.0)),
                           1)
        # 2.) The latent loss, which is defined as the Kullback Leibler divergence
        ##    between the distribution in latent space induced by the encoder on
        #     the data and some prior. This acts as a kind of regularize.
        #     This can be interpreted as the number of "nats" required
        #     for transmitting the the latent space distribution given
        #     the prior.
        self.latent_loss = -0.5 * tf.reduce_sum(1 + z_log_sigma_sq
                                                - tf.square(z_mean)
                                                - tf.exp(z_log_sigma_sq), 1)
        loss = tf.reduce_mean(self.reconstr_loss + self.latent_loss)   # average over batch

        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(loss)
        return loss, optimizer
Ejemplo n.º 4
0
def tf_bivariate_normal(y, mu, sigma, rho, n_mixtures, batch_size):
    mu = tf.verify_tensor_all_finite(mu, "Mu not finite!")
    y = tf.verify_tensor_all_finite(y, "Y not finite!")
    delta = tf.sub(tf.tile(tf.expand_dims(y, 1), [1, n_mixtures, 1]), mu)
    delta = tf.verify_tensor_all_finite(delta, "Delta not finite!")
    sigma = tf.verify_tensor_all_finite(sigma, "Sigma not finite!")
    s = tf.reduce_prod(sigma, 2)
    s = tf.verify_tensor_all_finite(s, "S not finite!")
    # -1 <= rho <= 1
    z = tf.reduce_sum(tf.square(tf.div(delta, sigma + epsilon) + epsilon), 2) - \
        2 * tf.div(tf.mul(rho, tf.reduce_prod(delta, 2)), s + epsilon)
    
    z = tf.verify_tensor_all_finite(z, "Z not finite!")
    # 0 < negRho <= 1
    rho = tf.verify_tensor_all_finite(rho, "rho in bivariate normal not finite!")
    negRho = tf.clip_by_value(1 - tf.square(rho), epsilon, 1.0)
    negRho = tf.verify_tensor_all_finite(negRho, "negRho not finite!")
    # Note that if negRho goes near zero, or z goes really large, this explodes.
    negRho = tf.verify_tensor_all_finite(negRho, "negRho in bivariate normal not finite!")
    
    result = tf.clip_by_value(tf.exp(tf.div(-z, 2 * negRho)), 1.0e-8, 1.0e8)
    result = tf.verify_tensor_all_finite(result, "Result in bivariate normal not finite!")
    denom = 2 * np.pi * tf.mul(s, tf.sqrt(negRho))
    denom = tf.verify_tensor_all_finite(denom, "Denom in bivariate normal not finite!")
    result = tf.clip_by_value(tf.div(result, denom + epsilon), epsilon, 1.0)
    result = tf.verify_tensor_all_finite(result, "Result2 in bivariate normal not finite!")
    return result, delta
Ejemplo n.º 5
0
Archivo: losses.py Proyecto: imito/odin
def bayes_crossentropy(y_true, y_pred, nb_classes=None, reduction=tf.reduce_mean,
                       name=None):
  with tf.name_scope(name, "bayes_crossentropy", [y_true, y_pred]):
    y_pred_shape = y_pred.shape
    if y_pred_shape.ndims == 1 or y_pred_shape[-1].value == 1:
      if y_pred_shape.ndims == 1:
        y_pred = tf.expand_dims(y_pred, -1)
      y_pred0 = 1. - y_pred
      y_pred = tf.concat([y_pred0, y_pred], axis=-1)
    # get number of classes
    if y_true.shape.ndims == 1:
      if nb_classes is None:
        raise Exception('y_pred and y_true must be one_hot encoded, '
                        'otherwise you have to provide nb_classes.')
      y_true = tf.one_hot(y_true, depth=nb_classes)
    elif nb_classes is None:
      nb_classes = y_true.shape[1].value
    # avoid numerical instability with _EPSILON clipping
    y_pred = tf.clip_by_value(y_pred, EPS, 1.0 - EPS)
    # ====== check distribution ====== #
    distribution = tf.reduce_sum(y_true, axis=0)
    # probability distribution of each class
    prob_distribution = dimshuffle(distribution / tf.reduce_sum(distribution),
                                   ('x', 0))
    # we need to clip the prior probability distribution also
    prob_distribution = tf.clip_by_value(prob_distribution, EPS, 1.0 - EPS)
    # ====== init confusion info loss ====== #
    # weighted by y_true
    loss = y_true * tf.log(y_pred)
    loss = - 1 / nb_classes * tf.reduce_sum(loss / prob_distribution, axis=1)
    return reduction(loss)
Ejemplo n.º 6
0
def translate(U, theta, out_height, out_width):
    num_batch = tf.shape(U)[0]
    height, width, num_ch = U.get_shape()[1:]
    height = height.value
    width = width.value
    num_ch = num_ch.value
    hwc = height*width*num_ch

    nind = tf.range(num_batch)
    x = repeat(tf.range(height), width)
    y = tf.tile(tf.range(width), tf.pack([height]))
    cind = tf.range(num_ch)

    nind = tf.expand_dims(repeat(nind, hwc), 1)
    x = tf.tile(tf.expand_dims(repeat(x, num_ch), 1), tf.pack([num_batch,1]))
    y = tf.tile(tf.expand_dims(repeat(y, num_ch), 1), tf.pack([num_batch,1]))
    cind = tf.tile(tf.expand_dims(cind, 1), tf.pack([num_batch*height*width,1]))

    dx, dy = tf.split(1, 2, theta)
    dx = tf.cast(tf.clip_by_value(dx, 0, out_height-height), 'int32')
    dx = tf.reshape(tf.tile(dx, tf.pack([1,hwc])), [-1,1])
    dy = tf.cast(tf.clip_by_value(dy, 0, out_width-width), 'int32')
    dy = tf.reshape(tf.tile(dy, tf.pack([1,hwc])), [-1,1])
    x = x + dx
    y = y + dy

    tind = tf.concat(1, [nind, x, y, cind])
    val = tf.reshape(U, [-1])
    T = tf.sparse_to_dense(tind,
            tf.pack([num_batch, out_height, out_width, num_ch]),
            val)
    T.set_shape([None, out_height, out_width, num_ch])
    return T
Ejemplo n.º 7
0
    def prob_is_largest(self, Y, mu, var, gh_x, gh_w):
        # work out what the mean and variance is of the indicated latent function.
        oh_on = tf.cast(tf.one_hot(tf.reshape(Y, (-1,)), self.num_classes, 1.0, 0.0), float_type)
        mu_selected = tf.reduce_sum(oh_on * mu, 1)
        var_selected = tf.reduce_sum(oh_on * var, 1)

        # generate Gauss Hermite grid
        X = tf.reshape(mu_selected, (-1, 1)) + gh_x * tf.reshape(
            tf.sqrt(tf.clip_by_value(2.0 * var_selected, 1e-10, np.inf)), (-1, 1)
        )

        # compute the CDF of the Gaussian between the latent functions and the grid (including the selected function)
        dist = (tf.expand_dims(X, 1) - tf.expand_dims(mu, 2)) / tf.expand_dims(
            tf.sqrt(tf.clip_by_value(var, 1e-10, np.inf)), 2
        )
        cdfs = 0.5 * (1.0 + tf.erf(dist / np.sqrt(2.0)))

        cdfs = cdfs * (1 - 2e-4) + 1e-4

        # blank out all the distances on the selected latent function
        oh_off = tf.cast(tf.one_hot(tf.reshape(Y, (-1,)), self.num_classes, 0.0, 1.0), float_type)
        cdfs = cdfs * tf.expand_dims(oh_off, 2) + tf.expand_dims(oh_on, 2)

        # take the product over the latent functions, and the sum over the GH grid.
        return tf.matmul(tf.reduce_prod(cdfs, reduction_indices=[1]), tf.reshape(gh_w / np.sqrt(np.pi), (-1, 1)))
Ejemplo n.º 8
0
    def _loss_x_entropy(self, x, z, noise=None):
        with tf.name_scope("xentropy_loss"):
            z_clipped = tf.clip_by_value(z, FLAGS.zero_bound, FLAGS.one_bound)
            z_minus_1_clipped = tf.clip_by_value((1.0 - z), FLAGS.zero_bound, FLAGS.one_bound)
            x_clipped = tf.clip_by_value(x, FLAGS.zero_bound, FLAGS.one_bound)
            x_minus_1_clipped = tf.clip_by_value((1.0 - x), FLAGS.zero_bound, FLAGS.one_bound)
            
            # cross_entropy = x * log(z) + (1 - x) * log(1 - z)
            
            cross_entropy = tf.add(tf.mul(tf.log(z_clipped), x_clipped),
                                   tf.mul(tf.log(z_minus_1_clipped), x_minus_1_clipped), name='X-Entr')

            if noise:
                with tf.name_scope("Given_Emphasis"):
                    a, b = self._get_emph_params
                    corrupted = tf.select(noise, cross_entropy, tf.zeros_like(cross_entropy), name='Corrupted_Emphasis')
                    
                    # OR -- tf.select(tf.logical_not(noisy_points), cross_entropy, tf.zeros_like(cross_entropy), name='Uncorrupted_Emphasis')
                    uncorrupted = tf.select(noise, tf.zeros_like(cross_entropy), cross_entropy, name='Uncorrupted_Emphasis')
                    
                    loss = a * (-1 * tf.reduce_sum(corrupted, 1)) + b * (-1 * tf.reduce_sum(uncorrupted, 1))
            else:
                # Sum the cost for each example
                loss = -1 * tf.reduce_sum(cross_entropy, 1)
        
            # Reduce mean to find the overall cost of the loss
            cross_entropy_mean = tf.reduce_mean(loss, name='xentropy_mean')
    
            return cross_entropy_mean
Ejemplo n.º 9
0
	def build_decoder(self, input_var):
		# Build the decoder
		if len(self.p_layers) > 0:
			self._decoder = Sequential('vae_decoder')
			self._decoder += FullyConnected(self.latent_dims, self.p_layers[0], coder_act_fn, name='fc_1')
			for i in xrange(1, len(self.p_layers)):
				self._decoder += FullyConnected(self.p_layers[i-1], self.p_layers[i], coder_act_fn, name='fc_%d'%(i+1))

			self.decoder = self._decoder(input_var)

			self._dec_mean = FullyConnected(self.p_layers[-1], self.input_dims, dec_mean_act_fn, name='dec_mean')
			self.dec_mean = self._dec_mean(self.decoder)
			self._dec_log_std_sq = FullyConnected(self.p_layers[-1], self.input_dims, mean_std_act_fn, name='dec_std')
			self.dec_log_std_sq = tf.clip_by_value(
				self._dec_log_std_sq(self.decoder),
				-self.sigma_clip,
				self.sigma_clip
			)

		else:
			self.decoder = input_var

			self._dec_mean = FullyConnected(self.latent_dims, self.input_dims, dec_mean_act_fn, name='dec_mean')
			self.dec_mean = self._dec_mean(self.decoder)
			self._dec_log_std_sq = FullyConnected(self.latent_dims, self.input_dims, mean_std_act_fn, name='dec_std')
			self.dec_log_std_sq = tf.clip_by_value(
				self._dec_log_std_sq(self.decoder),
				-self.sigma_clip,
				self.sigma_clip
			)
def focal_loss(prediction_tensor, target_tensor, weights=None, alpha=0.25, gamma=2):
    r"""Compute focal loss for predictions.

        Multi-labels Focal loss formula:
            FL = -alpha * (z-p)^gamma * log(p) -(1-alpha) * p^gamma * log(1-p)
                 ,which alpha = 0.25, gamma = 2, p = sigmoid(x), z = target_tensor.

    Args:
     prediction_tensor: A float tensor of shape [batch_size, num_anchors,
        num_classes] representing the predicted logits for each class
     target_tensor: A float tensor of shape [batch_size, num_anchors,
        num_classes] representing one-hot encoded classification targets
     weights: A float tensor of shape [batch_size, num_anchors]
     alpha: A scalar tensor for focal loss alpha hyper-parameter
     gamma: A scalar tensor for focal loss gamma hyper-parameter
    Returns:
        loss: A (scalar) tensor representing the value of the loss function
    """
    sigmoid_p = tf.nn.sigmoid(prediction_tensor)
    zeros = array_ops.zeros_like(sigmoid_p, dtype=sigmoid_p.dtype)
    pos_p_sub = array_ops.where(target_tensor >= sigmoid_p, target_tensor - sigmoid_p, zeros)
    neg_p_sub = array_ops.where(target_tensor > zeros, zeros, sigmoid_p)
    per_entry_cross_ent = - alpha * (pos_p_sub ** gamma) * tf.log(tf.clip_by_value(sigmoid_p, 1e-8, 1.0)) \
                          - (1 - alpha) * (neg_p_sub ** gamma) * tf.log(tf.clip_by_value(1.0 - sigmoid_p, 1e-8, 1.0))
    return tf.reduce_mean(per_entry_cross_ent)
Ejemplo n.º 11
0
    def _create_cost_function_node(self, model_output, ref_input, regterm=None):

        """ Create the cost function node.
        :param model_output: model output node
        :param ref_input: reference input placeholder node
        :param regterm: regularization term
        :return: self
        """

        with tf.name_scope("cost"):
            if self.loss_func == 'cross_entropy':
                cost = - tf.reduce_mean(ref_input * tf.log(tf.clip_by_value(model_output, 1e-10, float('inf'))) +
                                        (1 - ref_input) * tf.log(tf.clip_by_value(1 - model_output, 1e-10, float('inf'))))

            elif self.loss_func == 'softmax_cross_entropy':
                softmax = tf.nn.softmax(model_output)
                cost = - tf.reduce_mean(ref_input * tf.log(softmax) + (1 - ref_input) * tf.log(1 - softmax))

            elif self.loss_func == 'mean_squared':
                cost = tf.sqrt(tf.reduce_mean(tf.square(ref_input - model_output)))

            else:
                cost = None

        if cost is not None:
            self.cost = cost + regterm if regterm is not None else cost
            _ = tf.scalar_summary(self.loss_func, self.cost)
        else:
            self.cost = None
Ejemplo n.º 12
0
def batchnorm(x, gamma, beta, r_mean, r_var):
  mean, var = tf.nn.moments(x,[0])
  update_mean = tf.assign(r_mean,0.9 * r_mean + 0.1 * mean)
  update_var = tf.assign(r_var,0.9 * r_var + 0.1 * var)
  with tf.control_dependencies([update_mean,update_var]):
    return tf.nn.batch_normalization(x,tf.clip_by_value(r_mean,1e-10,100),tf.clip_by_value(r_var,1e-10,100),
                                     offset=beta,scale=gamma,variance_epsilon=1e-5)
Ejemplo n.º 13
0
    def cross_entropy(u, label_u, alpha=0.5, normed=False):

        label_ip = tf.cast(
            tf.matmul(label_u, tf.transpose(label_u)), tf.float32)
        s = tf.clip_by_value(label_ip, 0.0, 1.0)

        # compute balance param
        # s_t \in {-1, 1}
        s_t = tf.multiply(tf.add(s, tf.constant(-0.5)), tf.constant(2.0))
        sum_1 = tf.reduce_sum(s)
        sum_all = tf.reduce_sum(tf.abs(s_t))
        balance_param = tf.add(tf.abs(tf.add(s, tf.constant(-1.0))),
                               tf.multiply(tf.div(sum_all, sum_1), s))

        if normed:
            # ip = tf.clip_by_value(tf.matmul(u, tf.transpose(u)), -1.5e1, 1.5e1)
            ip_1 = tf.matmul(u, tf.transpose(u))

            def reduce_shaper(t):
                return tf.reshape(tf.reduce_sum(t, 1), [tf.shape(t)[0], 1])
            mod_1 = tf.sqrt(tf.matmul(reduce_shaper(tf.square(u)),
                                      reduce_shaper(tf.square(u)), transpose_b=True))
            ip = tf.div(ip_1, mod_1)
        else:
            ip = tf.clip_by_value(tf.matmul(u, tf.transpose(u)), -1.5e1, 1.5e1)
        ones = tf.ones([tf.shape(u)[0], tf.shape(u)[0]])
        return tf.reduce_mean(tf.multiply(tf.log(ones + tf.exp(alpha * ip)) - s * alpha * ip, balance_param))
Ejemplo n.º 14
0
def test():
    saver.restore(sess, FLAGS.save_dir+'/model.ckpt')
    batch_x = test_x[0:100]


    fig = plt.figure('original')
    plt.gray()
    plt.axis('off')
    plt.imshow(batchmat_to_tileimg(batch_x, (height, width), (10, 10)))
    fig.savefig(FLAGS.save_dir+'/original.png')

    fa, sa = sess.run([tf.clip_by_value(x_att0, 0, 1),
        tf.clip_by_value(x_att1, 0, 1)], {x:batch_x})
    fig = plt.figure('first att')
    plt.gray()
    plt.axis('off')
    plt.imshow(batchmat_to_tileimg(fa, (N, N), (10, 10)))
    fig.savefig(FLAGS.save_dir+'/first_attention.png')

    fig = plt.figure('second att')
    plt.gray()
    plt.axis('off')
    plt.imshow(batchmat_to_tileimg(sa, (N, N), (10, 10)))
    fig.savefig(FLAGS.save_dir+'/second_attention.png')

    fr, sr = sess.run([tf.clip_by_value(p0, 0, 1),
        tf.clip_by_value(p1, 0, 1)], {x:batch_x})
    fig = plt.figure('first recon')
    plt.gray()
    plt.axis('off')
    plt.imshow(batchmat_to_tileimg(fr, (height, width), (10, 10)))
    fig.savefig(FLAGS.save_dir+'/first_recon.png')

    fig = plt.figure('second recon')
    plt.gray()
    plt.axis('off')
    plt.imshow(batchmat_to_tileimg(sr, (height, width), (10, 10)))
    fig.savefig(FLAGS.save_dir+'/second_recon.png')


    fig = plt.figure('reconstructed')
    plt.gray()
    plt.axis('off')
    p_recon = sess.run(p, {x:batch_x})
    plt.imshow(batchmat_to_tileimg(p_recon, (height, width), (10, 10)))
    fig.savefig(FLAGS.save_dir+'/reconstructed.png')

    p_gen = sess.run(p, {z0_c:np.random.normal(size=(100, n_lat_c)),
                        z0_t:np.random.normal(size=(100, n_lat_t)),
                        z1_c:np.random.normal(size=(100, n_lat_c)),
                        z1_t:np.random.normal(size=(100, n_lat_t))})
    I_gen = batchmat_to_tileimg(p_gen, (height, width), (10, 10))
    fig = plt.figure('generated')
    plt.gray()
    plt.axis('off')
    plt.imshow(I_gen)
    fig.savefig(FLAGS.save_dir+'/generated.png')

    plt.show()
Ejemplo n.º 15
0
  def build_model(self, reuse, dev, ntype):
    with tf.variable_scope(self.name) and tf.device(dev):
      if reuse:
        tf.get_variable_scope().reuse_variables()
        assert tf.get_variable_scope().reuse

      # Set inputs of networks
      self.minimap = tf.placeholder(tf.float32, [None, U.minimap_channel(), self.msize, self.msize], name='minimap')
      self.screen = tf.placeholder(tf.float32, [None, U.screen_channel(), self.ssize, self.ssize], name='screen')
      self.info = tf.placeholder(tf.float32, [None, self.isize], name='info')

      # Build networks
      net = build_net(self.minimap, self.screen, self.info, self.msize, self.ssize, len(actions.FUNCTIONS), ntype)
      self.spatial_action, self.non_spatial_action, self.value = net

      # Set targets and masks
      self.valid_spatial_action = tf.placeholder(tf.float32, [None], name='valid_spatial_action')
      self.spatial_action_selected = tf.placeholder(tf.float32, [None, self.ssize**2], name='spatial_action_selected')
      self.valid_non_spatial_action = tf.placeholder(tf.float32, [None, len(actions.FUNCTIONS)], name='valid_non_spatial_action')
      self.non_spatial_action_selected = tf.placeholder(tf.float32, [None, len(actions.FUNCTIONS)], name='non_spatial_action_selected')
      self.value_target = tf.placeholder(tf.float32, [None], name='value_target')

      # Compute log probability
      spatial_action_prob = tf.reduce_sum(self.spatial_action * self.spatial_action_selected, axis=1)
      spatial_action_log_prob = tf.log(tf.clip_by_value(spatial_action_prob, 1e-10, 1.))
      non_spatial_action_prob = tf.reduce_sum(self.non_spatial_action * self.non_spatial_action_selected, axis=1)
      valid_non_spatial_action_prob = tf.reduce_sum(self.non_spatial_action * self.valid_non_spatial_action, axis=1)
      valid_non_spatial_action_prob = tf.clip_by_value(valid_non_spatial_action_prob, 1e-10, 1.)
      non_spatial_action_prob = non_spatial_action_prob / valid_non_spatial_action_prob
      non_spatial_action_log_prob = tf.log(tf.clip_by_value(non_spatial_action_prob, 1e-10, 1.))
      self.summary.append(tf.summary.histogram('spatial_action_prob', spatial_action_prob))
      self.summary.append(tf.summary.histogram('non_spatial_action_prob', non_spatial_action_prob))

      # Compute losses, more details in https://arxiv.org/abs/1602.01783
      # Policy loss and value loss
      action_log_prob = self.valid_spatial_action * spatial_action_log_prob + non_spatial_action_log_prob
      advantage = tf.stop_gradient(self.value_target - self.value)
      policy_loss = - tf.reduce_mean(action_log_prob * advantage)
      value_loss = - tf.reduce_mean(self.value * advantage)
      self.summary.append(tf.summary.scalar('policy_loss', policy_loss))
      self.summary.append(tf.summary.scalar('value_loss', value_loss))

      # TODO: policy penalty
      loss = policy_loss + value_loss

      # Build the optimizer
      self.learning_rate = tf.placeholder(tf.float32, None, name='learning_rate')
      opt = tf.train.RMSPropOptimizer(self.learning_rate, decay=0.99, epsilon=1e-10)
      grads = opt.compute_gradients(loss)
      cliped_grad = []
      for grad, var in grads:
        self.summary.append(tf.summary.histogram(var.op.name, var))
        self.summary.append(tf.summary.histogram(var.op.name+'/grad', grad))
        grad = tf.clip_by_norm(grad, 10.0)
        cliped_grad.append([grad, var])
      self.train_op = opt.apply_gradients(cliped_grad)
      self.summary_op = tf.summary.merge(self.summary)

      self.saver = tf.train.Saver(max_to_keep=100)
Ejemplo n.º 16
0
    def _forward(self, x, gpu):
        hps = self.hps

        x = tf.to_float(x)
        x = tf.clip_by_value((x + 0.5) / 256.0, 0.0, 1.0) - 0.5

        # Input images are repeated k times on the input.
        # This is used for Importance Sampling loss (k is number of samples).
        data_size = hps.batch_size * hps.k
        x = repeat(x, hps.k)

        orig_x = x
        h_size = hps.h_size

        with arg_scope([conv2d, deconv2d], init=(self.mode == "init")):
            layers = []
            for i in range(hps.depth):
                layers.append([])
                for j in range(hps.num_blocks):
                    downsample = (i > 0) and (j == 0)
                    layers[-1].append(IAFLayer(hps, self.mode, downsample))

            h = conv2d("x_enc", x, h_size, [5, 5], [2, 2])  # -> [16, 16]
            for i, layer in enumerate(layers):
                for j, sub_layer in enumerate(layer):
                    with tf.variable_scope("IAF_%d_%d" % (i, j)):
                        h = sub_layer.up(h)

            # top->down
            self.h_top = h_top = tf.get_variable("h_top", [h_size], initializer=tf.zeros_initializer)
            h_top = tf.reshape(h_top, [1, -1, 1, 1])
            h = tf.tile(h_top, [data_size, 1, hps.image_size / 2 ** len(layers), hps.image_size / 2 ** len(layers)])
            kl_cost = kl_obj = 0.0

            for i, layer in reversed(list(enumerate(layers))):
                for j, sub_layer in reversed(list(enumerate(layer))):
                    with tf.variable_scope("IAF_%d_%d" % (i, j)):
                        h, cur_obj, cur_cost = sub_layer.down(h)
                        kl_obj += cur_obj
                        kl_cost += cur_cost

                        if self.mode == "train" and gpu == hps.num_gpus - 1:
                            tf.scalar_summary("model/kl_obj_%02d_%02d" % (i, j), tf.reduce_mean(cur_obj))
                            tf.scalar_summary("model/kl_cost_%02d_%02d" % (i, j), tf.reduce_mean(cur_cost))

            x = tf.nn.elu(h)
            x = deconv2d("x_dec", x, 3, [5, 5])
            x = tf.clip_by_value(x, -0.5 + 1 / 512., 0.5 - 1 / 512.)

        log_pxz = discretized_logistic(x, self.dec_log_stdv, sample=orig_x)
        obj = tf.reduce_sum(kl_obj - log_pxz)

        if self.mode == "train" and gpu == hps.num_gpus - 1:
            tf.scalar_summary("model/log_pxz", -tf.reduce_mean(log_pxz))
            tf.scalar_summary("model/kl_obj", tf.reduce_mean(kl_obj))
            tf.scalar_summary("model/kl_cost", tf.reduce_mean(kl_cost))

        loss = tf.reduce_sum(compute_lowerbound(log_pxz, kl_cost, hps.k))
        return x, obj, loss
Ejemplo n.º 17
0
 def scale(self, x):
   """Scale x from -0.5 - 0.5 to 0 - 255."""
   x = tf.where(tf.is_nan(x), tf.ones_like(x), x)
   x = tf.where(tf.is_inf(x), tf.ones_like(x), x)
   x = tf.clip_by_value(x, -0.5, 0.5)
   x += 0.5
   x = x * 2**self.hparams.n_bits_x
   return tf.cast(tf.clip_by_value(x, 0, 255), dtype=tf.uint8)
Ejemplo n.º 18
0
def _interpolate2d(imgs, x, y):
    n_batch = tf.shape(imgs)[0]
    xlen = tf.shape(imgs)[1]
    ylen = tf.shape(imgs)[2]
    n_channel = tf.shape(imgs)[3]

    x = tf.to_float(x)
    y = tf.to_float(y)
    xlen_f = tf.to_float(xlen)
    ylen_f = tf.to_float(ylen)
    zero = tf.zeros([], dtype='int32')
    max_x = tf.cast(xlen - 1, 'int32')
    max_y = tf.cast(ylen - 1, 'int32')

    # scale indices from [-1, 1] to [0, xlen/ylen]
    x = (x + 1.) * (xlen_f - 1.) * 0.5
    y = (y + 1.) * (ylen_f - 1.) * 0.5

    # do sampling
    x0 = tf.cast(tf.floor(x), 'int32')
    x1 = x0 + 1
    y0 = tf.cast(tf.floor(y), 'int32')
    y1 = y0 + 1

    x0 = tf.clip_by_value(x0, zero, max_x)
    x1 = tf.clip_by_value(x1, zero, max_x)
    y0 = tf.clip_by_value(y0, zero, max_y)
    y1 = tf.clip_by_value(y1, zero, max_y)
    base = _repeat(tf.range(n_batch) * xlen * ylen, ylen * xlen)
    base_x0 = base + x0 * ylen
    base_x1 = base + x1 * ylen
    index00 = base_x0 + y0
    index01 = base_x0 + y1
    index10 = base_x1 + y0
    index11 = base_x1 + y1

    # use indices to lookup pixels in the flat image and restore
    # n_channel dim
    imgs_flat = tf.reshape(imgs, [-1, n_channel])
    imgs_flat = tf.to_float(imgs_flat)
    I00 = tf.gather(imgs_flat, index00)
    I01 = tf.gather(imgs_flat, index01)
    I10 = tf.gather(imgs_flat, index10)
    I11 = tf.gather(imgs_flat, index11)

    # and finally calculate interpolated values
    dx = x - tf.to_float(x0)
    dy = y - tf.to_float(y0)
    w00 = tf.expand_dims((1. - dx) * (1. - dy), 1)
    w01 = tf.expand_dims((1. - dx) * dy, 1)
    w10 = tf.expand_dims(dx * (1. - dy), 1)
    w11 = tf.expand_dims(dx * dy, 1)
    output = tf.add_n([w00*I00, w01*I01, w10*I10, w11*I11])

    # reshape
    output = tf.reshape(output, [n_batch, xlen, ylen, n_channel])

    return output
    def __init__(self, scope, globalAC=None):

        self.scope = scope
        if scope == GLOBAL_NET_SCOPE:
            ## global network only do inference
            with tf.variable_scope(scope):
                self.s = tf.placeholder(tf.float32, [None, N_S], 'S')
                self._build_net()
                self.a_params = tl.layers.get_variables_with_name(scope + '/actor', True, False)
                self.c_params = tl.layers.get_variables_with_name(scope + '/critic', True, False)

                normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma) # for continuous action space

                with tf.name_scope('choose_a'):  # use local params to choose action
                    self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND)

        else:
            ## worker network calculate gradient locally, update on global network
            with tf.variable_scope(scope):
                self.s = tf.placeholder(tf.float32, [None, N_S], 'S')
                self.a_his = tf.placeholder(tf.float32, [None, N_A], 'A')
                self.v_target = tf.placeholder(tf.float32, [None, 1], 'Vtarget')

                self._build_net()

                td = tf.subtract(self.v_target, self.v, name='TD_error')
                with tf.name_scope('c_loss'):
                    self.c_loss = tf.reduce_mean(tf.square(td))

                with tf.name_scope('wrap_a_out'):
                    self.test = self.sigma[0]
                    self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5

                normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma) # for continuous action space

                with tf.name_scope('a_loss'):
                    log_prob = normal_dist.log_prob(self.a_his)
                    exp_v = log_prob * td
                    entropy = normal_dist.entropy()  # encourage exploration
                    self.exp_v = ENTROPY_BETA * entropy + exp_v
                    self.a_loss = tf.reduce_mean(-self.exp_v)

                with tf.name_scope('choose_a'):  # use local params to choose action
                    self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND)

                with tf.name_scope('local_grad'):
                    self.a_params = tl.layers.get_variables_with_name(scope + '/actor', True, False)
                    self.c_params = tl.layers.get_variables_with_name(scope + '/critic', True, False)
                    self.a_grads = tf.gradients(self.a_loss, self.a_params)
                    self.c_grads = tf.gradients(self.c_loss, self.c_params)

            with tf.name_scope('sync'):
                with tf.name_scope('pull'):
                    self.pull_a_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.a_params, globalAC.a_params)]
                    self.pull_c_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.c_params, globalAC.c_params)]
                with tf.name_scope('push'):
                    self.update_a_op = OPT_A.apply_gradients(zip(self.a_grads, globalAC.a_params))
                    self.update_c_op = OPT_C.apply_gradients(zip(self.c_grads, globalAC.c_params))
Ejemplo n.º 20
0
def BatchRenorm(x, rmax, dmax, decay=0.9, epsilon=1e-5,
                use_scale=True, use_bias=True):
    """
    Batch Renormalization layer, as described in the paper:
    `Batch Renormalization: Towards Reducing Minibatch Dependence in Batch-Normalized Models
    <https://arxiv.org/abs/1702.03275>`_.

    Args:
        x (tf.Tensor): a NHWC or NC tensor.
        rmax, dmax (tf.Tensor): a scalar tensor, the maximum allowed corrections.
        decay (float): decay rate of moving average.
        epsilon (float): epsilon to avoid divide-by-zero.
        use_scale, use_bias (bool): whether to use the extra affine transformation or not.

    Returns:
        tf.Tensor: a tensor named ``output`` with the same shape of x.

    Variable Names:

    * ``beta``: the bias term.
    * ``gamma``: the scale term. Input will be transformed by ``x * gamma + beta``.
    * ``mean/EMA``: the moving average of mean.
    * ``variance/EMA``: the moving average of variance.
    """

    shape = x.get_shape().as_list()
    assert len(shape) in [2, 4]
    n_out = shape[-1]
    if len(shape) == 2:
        x = tf.reshape(x, [-1, 1, 1, n_out])
    beta, gamma, moving_mean, moving_var = get_bn_variables(
        n_out, use_scale, use_bias, tf.constant_initializer(1.0))

    ctx = get_current_tower_context()
    use_local_stat = ctx.is_training
    # for BatchRenorm, use_local_stat should always be is_training, unless a
    # different usage comes out in the future.

    if use_local_stat:
        xn, batch_mean, batch_var = tf.nn.fused_batch_norm(x, gamma, beta,
                                                           epsilon=epsilon, is_training=True)
        inv_sigma = tf.rsqrt(moving_var, 'inv_sigma')
        r = tf.stop_gradient(tf.clip_by_value(
            tf.sqrt(batch_var) * inv_sigma, 1.0 / rmax, rmax))
        d = tf.stop_gradient(tf.clip_by_value(
            (batch_mean - moving_mean) * inv_sigma,
            -dmax, dmax))
        xn = xn * r + d
    else:
        xn = tf.nn.batch_normalization(
            x, moving_mean, moving_var, beta, gamma, epsilon)

    if len(shape) == 2:
        xn = tf.squeeze(xn, [1, 2])
    if ctx.is_main_training_tower:
        return update_bn_ema(xn, batch_mean, batch_var, moving_mean, moving_var, decay)
    else:
        return tf.identity(xn, name='output')
Ejemplo n.º 21
0
  def _get_coordinatewise_learning_rate(self, grad, var):
    # Compute the learning rate using a moving average for the diagonal of BB^T
    avg_first = self.get_slot(var, 'first_moment')
    avg_second = self.get_slot(var, 'second_moment')
    decay_tensor = tf.cast(self._decay_tensor, var.dtype)
    batch_size = tf.cast(self._batch_size_tensor, var.dtype)

    # Create an estimator for the moving average of gradient mean and variance
    # via Welford's algorithm
    if isinstance(grad, tf.Tensor):
      delta = grad - avg_first
      first_moment_update = avg_first.assign_add(
          delta * tf.where(self._counter < 1,
                           tf.cast(1, var.dtype),
                           1. - decay_tensor))

      with tf.control_dependencies([first_moment_update]):
        second_moment_update = avg_second.assign_add(
            tf.cast(self._counter < 1, var.dtype) *
            -(1. - decay_tensor) * (
                avg_second - decay_tensor  * tf.square(delta)))
      diag_preconditioner = control_flow_ops.with_dependencies(
          [second_moment_update],
          tf.clip_by_value(avg_second, 1e-12, 1e12))
    elif isinstance(grad, tf.IndexedSlices):
      delta = grad.values - tf.gather_nd(avg_first, grad.indices)
      first_moment_update = tf.scatter_add(
          avg_first,
          grad.indices,
          delta * tf.where(self._counter < 1,
                           tf.cast(1., var.dtype),
                           1. - decay_tensor))

      with tf.control_dependencies([first_moment_update]):
        avg_second = tf.scatter_add(
            avg_second,
            grad.indices,
            tf.cast(self._counter < 1, var.dtype) *
            -(1. - decay_tensor) * (
                tf.gather_nd(avg_second, grad.indices) - decay_tensor *
                tf.square(delta)))
        avg_second = tf.gather_nd(avg_second, grad.indices)
        # TODO(b/70783772): Needs dtype specific clipping.
        diag_preconditioner = tf.clip_by_value(avg_second, 1e-12, 1e12)
    else:
      raise tf.errors.InvalidArgumentError(
          None, None, 'grad must of type Tensor or IndexedSlice')

    diag_preconditioner *= batch_size

    if self._use_single_learning_rate:
      diag_preconditioner = tf.reduce_mean(diag_preconditioner)

    # From Theorem 2 Corollary 1 of Mandt et al. 2017
    return 2. * batch_size / (
        tf.cast(self._total_num_examples, var.dtype.base_dtype) *
        diag_preconditioner)
    def __init__(self, env, env_name, _optimizer='adam'):
        """
        :param env:
        Output of this Discriminator is reward for learning agent. Not the cost.
        Because discriminator predicts  P(expert|s,a) = 1 - P(agent|s,a).
        """
        self._optimizer = _optimizer
        env_header = env_name.split('-')[0]
        # CartPole-v1, Arcobot-v1, Pendulum-v0, HalfCheetah-v2, Hopper-v2, Walker2d-v2, Humanoid-v2
        if env_header == 'CartPole' or env_header == 'Arcobot' or env_header == 'Pendulum' or env_header == 'MountainCar': #Classic control Gym
            action_space_count = env.action_space.n
        else: #Mujoco
            action_space_count = env.action_space.shape[0]

        with tf.variable_scope('discriminator'):
            self.scope = tf.get_variable_scope().name
            self.expert_s = tf.placeholder(dtype=tf.float32, shape=[None] + list(env.observation_space.shape))
            self.expert_a = tf.placeholder(dtype=tf.int32, shape=[None])
            expert_a_one_hot = tf.one_hot(self.expert_a, depth=action_space_count)
            # add noise for stabilise training
            expert_a_one_hot += tf.random_normal(tf.shape(expert_a_one_hot), mean=0.2, stddev=0.1, dtype=tf.float32)/1.2
            expert_s_a = tf.concat([self.expert_s, expert_a_one_hot], axis=1)

            self.agent_s = tf.placeholder(dtype=tf.float32, shape=[None] + list(env.observation_space.shape))
            self.agent_a = tf.placeholder(dtype=tf.int32, shape=[None])
            agent_a_one_hot = tf.one_hot(self.agent_a, depth=action_space_count)
            # add noise for stabilise training
            agent_a_one_hot += tf.random_normal(tf.shape(agent_a_one_hot), mean=0.2, stddev=0.1, dtype=tf.float32)/1.2
            agent_s_a = tf.concat([self.agent_s, agent_a_one_hot], axis=1)

            with tf.variable_scope('network') as network_scope:
                prob_1 = self.construct_network(input=expert_s_a)
                network_scope.reuse_variables()  # share parameter
                prob_2 = self.construct_network(input=agent_s_a)

            with tf.variable_scope('loss'):
                loss_expert = tf.reduce_mean(tf.log(tf.clip_by_value(prob_1, 0.01, 1)))
                loss_agent = tf.reduce_mean(tf.log(tf.clip_by_value(1 - prob_2, 0.01, 1)))
                loss = loss_expert + loss_agent
                loss = -loss
                tf.summary.scalar('discriminator', loss)

            # optimizer: adagrad, rmsprop, adadelta, adam, cocob
            if self._optimizer == 'adagrad':
                optimizer = tf.train.AdagradOptimizer(learning_rate=0.01)  # initial_accumulator_value=0.1
            elif self._optimizer == 'rmsprop':
                optimizer = tf.train.RMSPropOptimizer(learning_rate=0.00025)  # decay=0.9, momentum=0.0, epsilon=1e-10, use_locking=False, centered=False
            elif self._optimizer == 'adadelta':
                optimizer = tf.train.AdadeltaOptimizer(learning_rate=0.5)  # learning_rate=0.001, rho=0.95, epsilon=1e-08, use_locking=False
            elif self._optimizer == 'cocob':
                optimizer = cocob.COCOB()
            else:  # adam
                optimizer = tf.train.AdamOptimizer()  # lr=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False
            self.train_op = optimizer.minimize(loss)

            self.rewards = tf.log(tf.clip_by_value(prob_2, 1e-10, 1))  # log(P(expert|s,a)) larger is better for agent
Ejemplo n.º 23
0
 def get_reconstruction_cost(self):
     """Compute the cross-entropy of the original input and the reconstruction"""
     activation_h = self.propup(self.input)
     activation_v = self.propdown(activation_h)
     # Do this to not get Nan
     activation_v_clip = tf.clip_by_value(activation_v, clip_value_min=1e-30, clip_value_max=1.0)
     reduce_activation_v_clip = tf.clip_by_value(1.0 - activation_v, clip_value_min=1e-30, clip_value_max=1.0)
     cross_entropy = -tf.reduce_mean(tf.reduce_sum(self.input*(tf.log(activation_v_clip)) + 
                                 (1.0 - self.input)*(tf.log(reduce_activation_v_clip)), axis=1))
     return cross_entropy   
Ejemplo n.º 24
0
def conv_cross_entropy(hypo, actual_value):
    """Calculate Cross Entropy
    Args:
        hypo         -- TensorFlow variable of the hypothesis
        actual_value -- TensorFlow variable of the expected value
    Returns:
        TensorFlow variable of the Cross Entropy
    """
    return -tf.reduce_mean(
        actual_value * tf.log(tf.clip_by_value(hypo, 1e-10, 1.0)) +
        (1-actual_value) * tf.log(tf.clip_by_value(1-hypo, 1e-10, 1.0)))
    def __init__(self, config, is_training=True):
        self.batch_size = tf.Variable(0, dtype=tf.int32, trainable=False)

        num_step = config.num_step
        embed_dim = config.embed_dim
        self.input_data_s1 = tf.placeholder(tf.float64, [None, num_step, embed_dim])
        self.input_data_s2 = tf.placeholder(tf.float64, [None, num_step, embed_dim])
        self.target = tf.placeholder(tf.float64, [None])
        self.mask_s1 = tf.placeholder(tf.float64, [None, num_step])
        self.mask_s2 = tf.placeholder(tf.float64, [None, num_step])

        self.hidden_neural_size = config.hidden_neural_size
        self.new_batch_size = tf.placeholder(tf.int32, shape=[], name="new_batch_size")
        self._batch_size_update = tf.assign(self.batch_size, self.new_batch_size)

        with tf.name_scope('lstm_output_layer'):
            self.cell_outputs1 = self.singleRNN(x=self.input_data_s1, scope='side1', cell='lstm', reuse=None)
            self.cell_outputs2 = self.singleRNN(x=self.input_data_s2, scope='side1', cell='lstm', reuse=True)

        with tf.name_scope('Sentence_Layer'):
            # self.sent1 = tf.reduce_sum(self.cell_outputs1 * self.mask_s1[:, :, None], axis=1)
            # self.sent2 = tf.reduce_sum(self.cell_outputs2 * self.mask_s2[:, :, None], axis=1)
            # self.mask_s1_sum=tf.reduce_sum(self.mask_s1,axis=0)
            # self.mask_s2_sum=tf.reduce_sum(self.mask_s2,axis=0)
            # self.mask_s1_sum1 = tf.reduce_sum(self.mask_s1, axis=1)
            # self.mask_s2_sum1 = tf.reduce_sum(self.mask_s2, axis=1)
            self.sent1 = tf.reduce_sum(self.cell_outputs1 * self.mask_s1[:, :, None], axis=1)
            self.sent2 = tf.reduce_sum(self.cell_outputs2 * self.mask_s2[:, :, None], axis=1)

        with tf.name_scope('loss'):
            diff = tf.abs(tf.subtract(self.sent1, self.sent2), name='err_l1')
            diff = tf.reduce_sum(diff, axis=1)
            self.sim = tf.clip_by_value(tf.exp(-1.0 * diff), 1e-7, 1.0 - 1e-7)
            self.loss = tf.square(tf.subtract(self.sim, tf.clip_by_value((self.target - 1.0) / 4.0, 1e-7, 1.0 - 1e-7)))

        with tf.name_scope('cost'):
            self.cost = tf.reduce_mean(self.loss)
            self.truecost = tf.reduce_mean(tf.square(tf.subtract(self.sim * 4.0 + 1.0, self.target)))

        if not is_training:
            return

        self.globle_step = tf.Variable(0, name="globle_step", trainable=False, dtype=tf.float64)
        self.lr = tf.Variable(0.0, trainable=False, dtype=tf.float64)

        tvars = tf.trainable_variables()
        grads = tf.gradients(self.cost, tvars)
        optimizer = tf.train.AdadeltaOptimizer(learning_rate=self.lr, epsilon=1e-6)

        with tf.name_scope('train'):
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        self.new_lr = tf.placeholder(tf.float64, shape=[], name="new_learning_rate")
        self._lr_update = tf.assign(self.lr, self.new_lr)
Ejemplo n.º 26
0
def mean_squared_logarithmic_error(y_true, y_pred):
  """Mean squared logarithmic error loss.

  Args:
    y_true: tf.Tensor.
    y_pred: tf.Tensor.
      Tensors of same shape and type.
  """
  first_log = tf.log(tf.clip_by_value(y_pred, 1e-8, np.inf) + 1.0)
  second_log = tf.log(tf.clip_by_value(y_true, 1e-8, np.inf) + 1.0)
  return tf.reduce_mean(tf.square(first_log - second_log))
Ejemplo n.º 27
0
 def build(self, y_hat, y, mask=None):
     if mask is None:
         self.loss = tf.reduce_mean(
             -tf.reduce_sum(y * tf.log(tf.clip_by_value(y_hat, 1e-10, 1.0)),
                            reduction_indices=[1]))
     else:
         self.loss = tf.reduce_mean(
             -tf.reduce_sum(
                 mask * y * tf.log(tf.clip_by_value(y_hat, 1e-10, 1.0)),
                 reduction_indices=[1]))
     return self.loss
Ejemplo n.º 28
0
  def critic_loss(self, states, actions, rewards, discounts,
                  next_states):
    """Computes a loss for training the critic network.

    The loss is the mean squared error between the Q value predictions of the
    critic and Q values estimated using TD-lambda.

    Args:
      states: A [batch_size, num_state_dims] tensor representing a batch
        of states.
      actions: A [batch_size, num_action_dims] tensor representing a batch
        of actions.
      rewards: A [batch_size, ...] tensor representing a batch of rewards,
        broadcastable to the critic net output.
      discounts: A [batch_size, ...] tensor representing a batch of discounts,
        broadcastable to the critic net output.
      next_states: A [batch_size, num_state_dims] tensor representing a batch
        of next states.
    Returns:
      A rank-0 tensor representing the critic loss.
    Raises:
      ValueError: If any of the inputs do not have the expected dimensions, or
        if their batch_sizes do not match.
    """
    self._validate_states(states)
    self._validate_actions(actions)
    self._validate_states(next_states)

    target_q_values = self.target_value_net(next_states, for_critic_loss=True)
    td_targets = target_q_values * discounts + rewards
    if self._target_q_clipping is not None:
      td_targets = tf.clip_by_value(td_targets, self._target_q_clipping[0],
                                    self._target_q_clipping[1])
    q_values = self.critic_net(states, actions, for_critic_loss=True)
    td_errors = td_targets - q_values
    if self._debug_summaries:
      gen_debug_td_error_summaries(
          target_q_values, q_values, td_targets, td_errors)

    loss = self._td_errors_loss(td_targets, q_values)

    if self._residual_phi > 0.0:  # compute residual gradient loss
      residual_q_values = self.value_net(next_states, for_critic_loss=True)
      residual_td_targets = residual_q_values * discounts + rewards
      if self._target_q_clipping is not None:
        residual_td_targets = tf.clip_by_value(residual_td_targets,
                                               self._target_q_clipping[0],
                                               self._target_q_clipping[1])
      residual_td_errors = residual_td_targets - q_values
      residual_loss = self._td_errors_loss(
          residual_td_targets, residual_q_values)
      loss = (loss * (1.0 - self._residual_phi) +
              residual_loss * self._residual_phi)
    return loss
Ejemplo n.º 29
0
def mean_squared_logarithmic_error(y_true, y_pred):
    """
    Parameters
    ----------
    y_true : tf.Tensor
    y_pred : tf.Tensor
        Tensors of same shape and type.
    """
    first_log = tf.log(tf.clip_by_value(y_pred, 1e-8, np.inf) + 1.0)
    second_log = tf.log(tf.clip_by_value(y_true, 1e-8, np.inf) + 1.0)
    return tf.reduce_mean(tf.square(first_log - second_log))
Ejemplo n.º 30
0
    def __init__(self,
                 p_values,
                 low_action,
                 high_action,
                 stochastic,
                 eps,
                 theta=0.15,
                 sigma=0.2,
                 use_gaussian_noise=False,
                 act_noise=0.1,
                 is_target=False,
                 target_noise=0.2,
                 noise_clip=0.5,
                 parameter_noise=False):

        # shape is [None, dim_action]
        deterministic_actions = (
            (high_action - low_action) * p_values + low_action)

        if use_gaussian_noise:
            if is_target:
                normal_sample = tf.random_normal(
                    tf.shape(deterministic_actions), stddev=target_noise)
                normal_sample = tf.clip_by_value(normal_sample, -noise_clip,
                                                 noise_clip)
                stochastic_actions = tf.clip_by_value(
                    deterministic_actions + normal_sample, low_action,
                    high_action)
            else:
                normal_sample = tf.random_normal(
                    tf.shape(deterministic_actions), stddev=act_noise)
                stochastic_actions = tf.clip_by_value(
                    deterministic_actions + normal_sample, low_action,
                    high_action)
        else:
            exploration_sample = tf.get_variable(
                name="ornstein_uhlenbeck",
                dtype=tf.float32,
                initializer=low_action.size * [.0],
                trainable=False)
            normal_sample = tf.random_normal(
                shape=[low_action.size], mean=0.0, stddev=1.0)
            exploration_value = tf.assign_add(
                exploration_sample,
                theta * (.0 - exploration_sample) + sigma * normal_sample)
            stochastic_actions = tf.clip_by_value(
                deterministic_actions +
                eps * (high_action - low_action) * exploration_value,
                low_action, high_action)

        self.actions = tf.cond(
            tf.logical_and(stochastic, not parameter_noise),
            lambda: stochastic_actions, lambda: deterministic_actions)
Ejemplo n.º 31
0
def lerp_clip(a, b, t):
    with tf.name_scope('LerpClip'):
        return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0)
Ejemplo n.º 32
0
 def window_poly6(r_sqr):
     return tf.clip_by_value((1 - r_sqr)**3, 0, 1)
Ejemplo n.º 33
0
def quantize_grad(op, grad):
    return tf.clip_by_value(tf.identity(grad), -1, 1)
Ejemplo n.º 34
0
    def buildActorNetwork(self,d=128,dv=16,dout=128,nv=8):
        init_w = tf.random_normal_initializer(0., 0.01)
        init_b = tf.constant_initializer(0.01)
        with tf.variable_scope('update_Actor_network' + self.name):
            # enc
            f_dim = 128
            encode_layer1 = tf.layers.Dense(512, activation=tf.nn.relu,
                                            kernel_initializer=init_w, bias_initializer=init_b, name='encoder_l1',
                                            trainable=True)
            encode_layer2 = tf.layers.Dense(f_dim, activation=tf.nn.relu,
                                            kernel_initializer=init_w, bias_initializer=init_b, name='encoder_l2',
                                            trainable=True)

            for i in range(self.agent_num):
                e1 = encode_layer1(self.state_holder[:, i * self.state_dim:(i + 1) * self.state_dim])
                feature = encode_layer2(e1)
                if i == 0:
                    self.feature_a = feature
                else:
                    self.feature_a = tf.concat([self.feature_a, feature], axis=1)

            self.feature_a = tf.reshape(self.feature_a, [-1, f_dim, self.agent_num]) ##gai
            # relation1
            d = d
            dv = dv
            dout = dout
            nv = nv

            r1_l1_v = tf.layers.Dense(dv * nv, activation=tf.nn.relu,
                                      kernel_initializer=init_w, bias_initializer=init_b, name='relation_l1_v',
                                      trainable=True)
            r1_l1_q = tf.layers.Dense(dv * nv, activation=tf.nn.relu,
                                      kernel_initializer=init_w, bias_initializer=init_b, name='relation_l1_q',
                                      trainable=True)
            r1_l1_k = tf.layers.Dense(dv * nv, activation=tf.nn.relu,
                                      kernel_initializer=init_w, bias_initializer=init_b, name='relation_l1_k',
                                      trainable=True)
            r1_out = tf.layers.Dense(dout, activation=tf.nn.relu,
                                     kernel_initializer=init_w, bias_initializer=init_b, name='relation_l1_k',
                                     trainable=True)
            for i in range(self.agent_num):
                v1 = tf.matmul(self.feature_a, tf.transpose(self.adj[:, i, :, :], [0, 2, 1]))
                q1 = tf.matmul(self.feature_a, tf.transpose(self.adj[:, i, :, :], [0, 2, 1]))
                k1 = tf.matmul(self.feature_a, tf.transpose(self.adj[:, i, :, :], [0, 2, 1]))

                v1 = tf.transpose(v1, [0, 2, 1])
                q1 = tf.transpose(q1, [0, 2, 1])
                k1 = tf.transpose(k1, [0, 2, 1])

                v2 = r1_l1_v(v1)
                q2 = r1_l1_q(q1)
                k2 = r1_l1_k(k1)

                v = tf.reshape(v2, shape=[-1, self.neighbors, nv, dv])
                q = tf.reshape(q2, shape=[-1, self.neighbors, nv, dv])
                k = tf.reshape(k2, shape=[-1, self.neighbors, nv, dv])

                v = tf.transpose(v, [0, 2, 1, 3])
                k = tf.transpose(k, [0, 2, 3, 1])
                q = tf.transpose(q, [0, 2, 1, 3])

                att = tf.matmul(q, k) / np.sqrt(dv)
                att = tf.nn.softmax(att, axis=-1)

                out = tf.matmul(att, v)
                out = tf.transpose(out, [0, 2, 1, 3])

                out = tf.reshape(out, shape=[-1, self.neighbors, dv * nv])

                T = tf.matmul(self.vecholder, out)

                out = r1_out(T)

                if i == 0:
                    self.relation_1_a = out
                else:
                    self.relation_1_a = tf.concat([self.relation_1_a, out], axis=1)

            self.relation_1_a = tf.reshape(self.relation_1_a, [-1, dv * nv, self.agent_num]) ##gai

            # relation 2
            r2_l1_v = tf.layers.Dense(dv * nv, activation=tf.nn.relu,
                                      kernel_initializer=init_w, bias_initializer=init_b, name='relation_l2_v',
                                      trainable=True)
            r2_l1_q = tf.layers.Dense(dv * nv, activation=tf.nn.relu,
                                      kernel_initializer=init_w, bias_initializer=init_b, name='relation_l2_q',
                                      trainable=True)
            r2_l1_k = tf.layers.Dense(dv * nv, activation=tf.nn.relu,
                                      kernel_initializer=init_w, bias_initializer=init_b, name='relation_l2_k',
                                      trainable=True)
            r2_out = tf.layers.Dense(dout, activation=tf.nn.relu,
                                     kernel_initializer=init_w, bias_initializer=init_b, name='relation_l2_k',
                                     trainable=True)
            for i in range(self.agent_num):
                v1 = tf.matmul(self.relation_1_a, tf.transpose(self.adj[:, i, :, :], [0, 2, 1]))
                q1 = tf.matmul(self.relation_1_a, tf.transpose(self.adj[:, i, :, :], [0, 2, 1]))
                k1 = tf.matmul(self.relation_1_a, tf.transpose(self.adj[:, i, :, :], [0, 2, 1]))

                v1 = tf.transpose(v1, [0, 2, 1])
                q1 = tf.transpose(q1, [0, 2, 1])
                k1 = tf.transpose(k1, [0, 2, 1])

                v2 = r2_l1_v(v1)
                q2 = r2_l1_q(q1)
                k2 = r2_l1_k(k1)

                v = tf.reshape(v2, shape=[-1, self.neighbors, nv, dv])
                q = tf.reshape(q2, shape=[-1, self.neighbors, nv, dv])
                k = tf.reshape(k2, shape=[-1, self.neighbors, nv, dv])

                v = tf.transpose(v, [0, 2, 1, 3])
                k = tf.transpose(k, [0, 2, 3, 1])
                q = tf.transpose(q, [0, 2, 1, 3])

                att = tf.matmul(q, k) / np.sqrt(dv)
                att = tf.nn.softmax(att, axis=-1)

                out = tf.matmul(att, v)
                out = tf.transpose(out, [0, 2, 1, 3])

                out = tf.reshape(out, shape=[-1, self.neighbors, dv * nv])

                T = tf.matmul(self.vecholder, out)

                out = r2_out(T)

                if i == 0:
                    self.relation_2_a = out
                else:
                    self.relation_2_a = tf.concat([self.relation_2_a, out], axis=1)

            self.action_mean = tf.layers.Dense(1, activation=None,
                                              kernel_initializer=init_w, bias_initializer=init_b, name='mean',
                                              trainable=True)
            self.action_sigma = tf.layers.Dense(1, activation=None,
                                              kernel_initializer=init_w, bias_initializer=init_b, name='sigma',
                                              trainable=True)                               
            self.pi = []
            self.action = []
            for i in range(self.agent_num):
                h = tf.concat([self.feature_a[:, :, i], self.relation_1_a[:, i, :],
                               self.relation_2_a[:, i, :]], axis=1)
                dis = tf.distributions.Normal(loc=self.action_mean(h), scale=self.action_sigma(h))
       
                self.pi.append(dis)
                self.action.append(tf.squeeze(dis.sample([1])))


        with tf.variable_scope('target_Actor_network' + self.name):
            # enc
            f_dim = 128
            encode_layer1 = tf.layers.Dense(512, activation=tf.nn.relu,
                                            kernel_initializer=init_w, bias_initializer=init_b, name='encoder_l1',
                                            trainable=True)
            encode_layer2 = tf.layers.Dense(f_dim, activation=tf.nn.relu,
                                            kernel_initializer=init_w, bias_initializer=init_b, name='encoder_l2',
                                            trainable=True)

            for i in range(self.agent_num):
                e1 = encode_layer1(self.state_holder[:, i * self.state_dim:(i + 1) * self.state_dim])
                feature = encode_layer2(e1)
                if i == 0:
                    self.feature_a_old = feature
                else:
                    self.feature_a_old = tf.concat([self.feature_a_old, feature], axis=1)
            self.feature_a_old = tf.reshape(self.feature_a_old, [-1, 128, self.agent_num]) ##gai
            # relation1

            d = 128
            dv = 16
            dout = 128
            nv = 8

            r1_l1_v = tf.layers.Dense(dv * nv, activation=tf.nn.relu,
                                      kernel_initializer=init_w, bias_initializer=init_b, name='relation_l1_v',
                                      trainable=True)
            r1_l1_q = tf.layers.Dense(dv * nv, activation=tf.nn.relu,
                                      kernel_initializer=init_w, bias_initializer=init_b, name='relation_l1_q',
                                      trainable=True)
            r1_l1_k = tf.layers.Dense(dv * nv, activation=tf.nn.relu,
                                      kernel_initializer=init_w, bias_initializer=init_b, name='relation_l1_k',
                                      trainable=True)
            r1_out = tf.layers.Dense(dout, activation=tf.nn.relu,
                                     kernel_initializer=init_w, bias_initializer=init_b, name='relation_l1_k',
                                     trainable=True)
            for i in range(self.agent_num):
                v1 = tf.matmul(self.feature_a_old, tf.transpose(self.adj[:, i, :, :], [0, 2, 1]))
                q1 = tf.matmul(self.feature_a_old, tf.transpose(self.adj[:, i, :, :], [0, 2, 1]))
                k1 = tf.matmul(self.feature_a_old, tf.transpose(self.adj[:, i, :, :], [0, 2, 1]))

                v1 = tf.transpose(v1, [0, 2, 1])
                q1 = tf.transpose(q1, [0, 2, 1])
                k1 = tf.transpose(k1, [0, 2, 1])

                v2 = r1_l1_v(v1)
                q2 = r1_l1_q(q1)
                k2 = r1_l1_k(k1)

                v = tf.reshape(v2, shape=[-1, self.neighbors, nv, dv])
                q = tf.reshape(q2, shape=[-1, self.neighbors, nv, dv])
                k = tf.reshape(k2, shape=[-1, self.neighbors, nv, dv])

                v = tf.transpose(v, [0, 2, 1, 3])
                k = tf.transpose(k, [0, 2, 3, 1])
                q = tf.transpose(q, [0, 2, 1, 3])

                att = tf.matmul(q, k) / np.sqrt(dv)
                att = tf.nn.softmax(att, axis=-1)

                out = tf.matmul(att, v)
                out = tf.transpose(out, [0, 2, 1, 3])

                out = tf.reshape(out, shape=[-1, self.neighbors, dv * nv])

                T = tf.matmul(self.vecholder, out)

                out = r1_out(T)

                if i == 0:
                    self.relation_1_a_old = out
                else:
                    self.relation_1_a_old = tf.concat([self.relation_1_a_old, out], axis=1)
            self.relation_1_a_old = tf.reshape(self.relation_1_a_old, [-1, dv * nv, self.agent_num])  ##
            # relation 2
            r2_l1_v = tf.layers.Dense(dv * nv, activation=tf.nn.relu,
                                      kernel_initializer=init_w, bias_initializer=init_b, name='relation_l2_v',
                                      trainable=True)
            r2_l1_q = tf.layers.Dense(dv * nv, activation=tf.nn.relu,
                                      kernel_initializer=init_w, bias_initializer=init_b, name='relation_l2_q',
                                      trainable=True)
            r2_l1_k = tf.layers.Dense(dv * nv, activation=tf.nn.relu,
                                      kernel_initializer=init_w, bias_initializer=init_b, name='relation_l2_k',
                                      trainable=True)
            r2_out = tf.layers.Dense(dout, activation=tf.nn.relu,
                                     kernel_initializer=init_w, bias_initializer=init_b, name='relation_l2_k',
                                     trainable=True)
            for i in range(self.agent_num):
                v1 = tf.matmul(self.relation_1_a_old, tf.transpose(self.adj[:, i, :, :], [0, 2, 1]))
                q1 = tf.matmul(self.relation_1_a_old, tf.transpose(self.adj[:, i, :, :], [0, 2, 1]))
                k1 = tf.matmul(self.relation_1_a_old, tf.transpose(self.adj[:, i, :, :], [0, 2, 1]))

                v1 = tf.transpose(v1, [0, 2, 1])
                q1 = tf.transpose(q1, [0, 2, 1])
                k1 = tf.transpose(k1, [0, 2, 1])

                v2 = r2_l1_v(v1)
                q2 = r2_l1_q(q1)
                k2 = r2_l1_k(k1)

                v = tf.reshape(v2, shape=[-1, self.neighbors, nv, dv])
                q = tf.reshape(q2, shape=[-1, self.neighbors, nv, dv])
                k = tf.reshape(k2, shape=[-1, self.neighbors, nv, dv])

                v = tf.transpose(v, [0, 2, 1, 3])
                k = tf.transpose(k, [0, 2, 3, 1])
                q = tf.transpose(q, [0, 2, 1, 3])

                att = tf.matmul(q, k) / np.sqrt(dv)
                att = tf.nn.softmax(att, axis=-1)

                out = tf.matmul(att, v)
                out = tf.transpose(out, [0, 2, 1, 3])

                out = tf.reshape(out, shape=[-1, self.neighbors, dv * nv])

                T = tf.matmul(self.vecholder, out)

                out = r2_out(T)

                if i == 0:
                    self.relation_2_a_old = out
                else:
                    self.relation_2_a_old = tf.concat([self.relation_2_a_old, out], axis=1)

            self.action_mean_old = tf.layers.Dense(1, activation=None,
                                              kernel_initializer=init_w, bias_initializer=init_b, name='mean_old',
                                              trainable=False)
            self.action_sigma_old = tf.layers.Dense(1, activation=None,
                                              kernel_initializer=init_w, bias_initializer=init_b, name='sigma_old',
                                              trainable=False)                               
            self.pi_old = []
            self.action_old = []
            for i in range(self.agent_num):
                h = tf.concat([self.feature_a_old[:, :, i], self.relation_1_a_old[:, i, :],
                               self.relation_2_a_old[:, i, :]], axis=1)
                dis = tf.distributions.Normal(loc=self.action_mean_old(h), scale=self.action_sigma_old(h))
        
                self.pi_old.append(dis)
                self.action_old.append(tf.squeeze(dis.sample([1])))

        self.p_e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='update_Actor_network' + self.name)
        self.p_t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='target_Actor_network' + self.name)

        # train setting modify the loss!!!
        self.p_trainOp = []
        for i in range(self.agent_num):
            ratio = tf.exp(
                tf.reshape(self.pi[i].log_prob(self.action_holder[:, i]), [-1, 1]) - tf.reshape(
                    tf.clip_by_value(self.pi_old[i].log_prob(self.action_holder[:, i]),
                                     -20, 20), [-1, 1]))
 
            self.surrogate = ratio * self.advantage[:, i]
            self.clip_surrogate = tf.clip_by_value(ratio, 1. - self.epsilon_holder,
                                                   1 + self.epsilon_holder) * self.advantage[:, i]
            self.p_loss = -tf.reduce_mean(tf.minimum(self.surrogate, self.clip_surrogate))

            grads, _ = tf.clip_by_global_norm(tf.gradients(self.p_loss, self.p_e_params), 5.)
            grads_and_vars = list(zip(grads, self.p_e_params))
            self.p_trainOp.append(
                tf.train.AdamOptimizer(learning_rate=0.0001).apply_gradients(grads_and_vars, name="apply_gradients"))
        self.Actor_network_update = [tf.assign(tar, eva) for tar, eva in zip(self.p_t_params, self.p_e_params)]
Ejemplo n.º 35
0
    def _init(self,
              ob_space,
              ac_space,
              hid_size,
              num_hid_layers,
              gaussian_fixed_var=True):
        assert isinstance(ob_space, gym.spaces.Box)

        # Add the variable to track layers
        self.num_hid_layers = num_hid_layers
        self.pdtype = pdtype = make_pdtype(ac_space)
        sequence_length = None

        ob = U.get_placeholder(name="ob",
                               dtype=tf.float32,
                               shape=[sequence_length] + list(ob_space.shape))

        with tf.variable_scope("obfilter"):
            self.ob_rms = RunningMeanStd(shape=ob_space.shape)

        with tf.variable_scope('vf'):
            obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std,
                                   -5.0, 5.0)
            last_out = obz
            for i in range(num_hid_layers):
                last_out = tf.nn.tanh(
                    tf.layers.dense(
                        last_out,
                        hid_size,
                        name="fc%i" % (i + 1),
                        kernel_initializer=U.normc_initializer(1.0)))
            self.vpred = tf.layers.dense(
                last_out,
                1,
                name='final',
                kernel_initializer=U.normc_initializer(0.1))[:, 0]

        with tf.variable_scope('pol'):
            last_out = obz
            for i in range(num_hid_layers):
                last_out = tf.nn.tanh(
                    tf.layers.dense(
                        last_out,
                        hid_size,
                        name='fc%i' % (i + 1),
                        kernel_initializer=U.normc_initializer(1.0)))
            if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
                mean = tf.layers.dense(
                    last_out,
                    pdtype.param_shape()[0] // 2,
                    name='final',
                    kernel_initializer=U.normc_initializer(0.01))
                logstd = tf.get_variable(
                    name="logstd",
                    shape=[1, pdtype.param_shape()[0] // 2],
                    initializer=tf.zeros_initializer())
                pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)
            else:
                pdparam = tf.layers.dense(
                    last_out,
                    pdtype.param_shape()[0],
                    name='final',
                    kernel_initializer=U.normc_initializer(0.01))

        pdparam = tf.clip_by_value(pdparam, -5.0, 5.0)
        self.pd = pdtype.pdfromflat(pdparam)

        self.state_in = []
        self.state_out = []

        stochastic = tf.placeholder(dtype=tf.bool, shape=())
        ac = U.switch(stochastic, self.pd.sample(), self.pd.mode())
        self._act = U.function(
            [stochastic, ob],
            [ac, self.vpred, tf.exp(self.pd.logp(ac))])
Ejemplo n.º 36
0
    masks = tf.sequence_mask(target_sequence_length, max_target_sequence_length, dtype=tf.float32, name='masks')

    with tf.name_scope("optimization"):
        # Loss function
        cost = tf.contrib.seq2seq.sequence_loss(
            training_logits,
            targets,
            masks)

        # Optimizer
        optimizer = tf.train.AdamOptimizer(lr)

        # Gradient Clipping
        gradients = optimizer.compute_gradients(cost)
        capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None]
        train_op = optimizer.apply_gradients(capped_gradients)


# Batch and pad the source and target sequences

# In[26]:


"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
def pad_sentence_batch(sentence_batch, pad_int):
    """Pad sentences with <PAD> so that each sentence of a batch has the same length"""
    max_sentence = max([len(sentence) for sentence in sentence_batch])
    return [sentence + [pad_int] * (max_sentence - len(sentence)) for sentence in sentence_batch]
def _clip_and_normalize(word_probs, epsilon):
    '''
    word_probs: 1D tensor of [vsize]
    '''
    word_probs = tf.clip_by_value(word_probs, epsilon, 1.0 - epsilon)
    return word_probs / tf.reduce_sum(word_probs, axis=-1, keep_dims=True) # scale preds so that the class probas of each sample sum to 1
Ejemplo n.º 38
0
Archivo: data.py Proyecto: bbeatrix/ns
def get_dataset(args,
                dataset,
                split,
                batch_size,
                limit,
                augment=False,
                normal_class=-1,
                outliers=False,
                add_obs_noise=False,
                add_iso_noise=False):

    if dataset == 'emnist-letters':
        dataset = 'emnist/letters'
    elif dataset == 'imagenet':
        dataset = 'downsampled_imagenet/32x32'
        if split == tfds.Split.TEST:
            split = tfds.Split.VALIDATION

    if dataset == 'uniform-noise':

        def random_uniform_generator():
            while True:
                yield {
                    'image': np.random.randint(0, high=255, size=(28, 28, 1))
                }

        ds = tf.data.Dataset.from_generator(
            random_uniform_generator,
            output_types={'image': tf.int32},
            output_shapes={'image': (28, 28, 1)})
    else:
        ds = tfds.load(name=dataset, split=split)

    if split == tfds.Split.TRAIN:
        ds = ds.shuffle(100000)

    if normal_class != -1:
        if outliers:
            ds = ds.filter(lambda x: tf.not_equal(x['label'], normal_class))
        else:
            ds = ds.filter(lambda x: tf.equal(x['label'], normal_class))

    ds = ds.take((limit // batch_size) * batch_size) \
        .map(lambda x: x['image']) \
        .map(lambda x: tf.cast(x, tf.float32))

    if add_obs_noise:
        if dataset == 'downsampled_imagenet/32x32':
            ds = ds.map(lambda x: x + tf.random.uniform([32, 32, 3]))
        else:
            ds = ds.map(lambda x: x + tf.random.uniform(x.shape))

    image_width = ds.output_shapes[0].value
    image_height = ds.output_shapes[1].value
    image_channels = ds.output_shapes[2].value

    if image_width != args.shape[0] or image_height != args.shape[1]:
        print('Resize (crop/pad) images to taget shape.')
        ds = ds.map(lambda x: tf.image.resize_image_with_crop_or_pad(
            x, args.shape[0], args.shape[1]))
    if image_channels != 3 and args.color:
        print('Transform grayscale images to rgb.')
        ds = ds.map(lambda x: tf.image.grayscale_to_rgb(x))
    elif image_channels != 1 and not args.color:
        print('Transform rgb images to grayscale.')
        ds = ds.map(lambda x: tf.image.rgb_to_grayscale(x))

    ds = ds.map(lambda x: x / 255.)

    if add_iso_noise:
        if split == tfds.Split.TRAIN:
            print("Adding iso noise to train of {}.".format(dataset))
            ds = ds.map(lambda x: x + tf.random.normal(x.shape, stddev=.25))
            ds = ds.map(lambda x: tf.clip_by_value(x, 0, 1))

    if augment:
        ds = ds.map(lambda x: augment_transforms(x)) \
               .map(lambda x: tf.clip_by_value(x, -1, 1))
    ds = ds.map(lambda x: tf.transpose(x, [2, 0, 1])) \
        .batch(batch_size) \
        .repeat() \
        .prefetch(2)

    iterator = ds.make_initializable_iterator()
    iterator_init_op = iterator.initializer
    get_next = iterator.get_next()
    return ds, iterator, iterator_init_op, get_next
Ejemplo n.º 39
0
 def distort(rgb, bitmap):
     rgb = tf.image.random_brightness(rgb, 0.1)
     rgb = tf.image.random_contrast(rgb, 0.9, 1.1)
     #    rgb = tf.image.per_image_standardization(rgb)  # works great, but how to have it done for predict?
     rgb = tf.clip_by_value(rgb, clip_value_min=-1.0, clip_value_max=1.0)
     return rgb, bitmap
Ejemplo n.º 40
0
def _project_perturbation(perturbation, epsilon, input_image, image_bounds):
    """Project `perturbation` onto L-infinity ball of radius `epsilon`."""
    clipped_perturbation = tf.clip_by_value(perturbation, -epsilon, epsilon)
    new_image = tf.clip_by_value(input_image + clipped_perturbation,
                                 image_bounds[0], image_bounds[1])
    return new_image - input_image
Ejemplo n.º 41
0
 def choose_action(self, obs_list):
     action = super().choose_action(obs_list)
     action = tf.clip_by_value(
         action + tf.random.normal(tf.shape(action), stddev=self.noise),
         -1., 1.)
     return action
Ejemplo n.º 42
0
    def _build_model(self):
        # input points
        self.x = tf.placeholder(tf.float32, shape=[self.batch_size, int(np.prod(self.x_dims))], name="X")
        x = tf.tile(self.x, multiples=[self.n_samples, 1])
        self.lr = tf.placeholder(tf.float32, shape=(), name="lr")

        self.p_z = dbns.Normal(loc=tf.zeros(shape=[self.batch_size * self.n_samples, self.z_dim]),
                               scale=tf.ones(shape=[self.batch_size * self.n_samples, self.z_dim]))
        # self.p_h1 = dbns.Normal(loc=tf.zeros(shape=[self.batch_size * self.n_samples, 100]),
        #                         scale=tf.ones(shape=[self.batch_size * self.n_samples, 100]))
        # self.p_h2 = dbns.Normal(loc=tf.zeros(shape=[self.batch_size * self.n_samples, 50]),
        #                         scale=tf.ones(shape=[self.batch_size * self.n_samples, 50]))
        # self.p_h1_ = dbns.Normal(loc=tf.zeros(shape=[self.batch_size * self.n_samples, 100]),
        #                          scale=tf.ones(shape=[self.batch_size * self.n_samples, 100]))

        # encoder
        z_params = self.encoder(x)
        z_mu = z_params[:, self.z_dim:]
        z_sigma = tf.exp(z_params[:, :self.z_dim])
        self.q_z = dbns.Normal(loc=z_mu, scale=z_sigma)
        # params_q_h1_x = self.encoder(x, scope="q_h1_x", hidden_dim=200, z_dim=100)
        # h1_mu = params_q_h1_x[:, 100:]
        # h1_sigma = tf.exp(params_q_h1_x[:, :100])
        # self.q_h1_x = dbns.Normal(loc=h1_mu, scale=h1_sigma)
        # h1 = h1_mu + tf.multiply(h1_sigma, self.p_h1.sample())
        # params_q_h2_h1 = self.encoder(h1, scope="q_h2_h1", hidden_dim=100, z_dim=50)
        # h2_mu = params_q_h2_h1[:, 50:]
        # h2_sigma = tf.exp(params_q_h2_h1[:, :50])
        # self.q_h2_h1 = dbns.Normal(loc=h2_mu, scale=h2_sigma)
        # h2 = h2_mu + tf.multiply(h2_sigma, self.p_h2.sample())

        z = z_mu + tf.multiply(z_sigma, self.p_z.sample())

        # params_p_h1_h2 = self.encoder(h2, scope="p_h1_h2", hidden_dim=100, z_dim=100)
        # h1_mu_ = params_p_h1_h2[:, 100:]
        # h1_sigma_ = tf.exp(params_p_h1_h2[:, :100])
        # self.p_h1_h2 = dbns.Normal(loc=h1_mu_, scale=h1_sigma_)
        # h1_ = h1_mu_ + tf.multiply(h1_sigma_, self.p_h1_.sample())
        # x_hat = self.decoder(h1_, hidden_dim=200)
        # x_hat = self.decoder(h1, hidden_dim=200)

        x_hat = self.decoder(z)
        self.out_dbn = dbns.Bernoulli(logits=x_hat)

        log_lik = tf.reduce_sum(x * tf.log(1e-8 + x_hat) + (1 - x) * tf.log(1e-8 + 1 - x_hat), 1)
        neg_kld = tf.reduce_sum(self.p_z.log_prob(z) - self.q_z.log_prob(z), 1)
        # log_lik = (tf.reduce_sum(x * tf.log(1e-8 + x_hat) + (1 - x) * tf.log(1e-8 + 1 - x_hat), 1) +
        #            tf.reduce_sum(self.p_h1_h2.log_prob(h1), 1))
        # neg_kld = (tf.reduce_sum(self.p_h1_h2.log_prob(h1_) - self.q_h1_x.log_prob(h1), 1) +
        #            tf.reduce_sum(self.p_h1.log_prob(h1) - self.q_h1_x.log_prob(h1), 1) +
        #            tf.reduce_sum(self.p_h2.log_prob(h2) - self.q_h2_h1.log_prob(h2), 1))

        # log_lik = (tf.reduce_sum(x * tf.log(1e-8 + x_hat) + (1 - x) * tf.log(1e-8 + 1 - x_hat), 1) +
        #            tf.reduce_sum(self.p_h1_h2.log_prob(h1), 1) + tf.reduce_sum(self.p_h2.log_prob(h2), 1))
        # neg_kld = tf.reduce_sum(self.q_h1_x.log_prob(h1), 1) + tf.reduce_sum(self.q_h2_h1.log_prob(h2), 1)

        # calculate importance weights using logsumexp and exp-normalize tricks
        log_iws = (tf.reshape(log_lik, [self.batch_size, self.n_samples]) -
                   tf.reshape(neg_kld, [self.batch_size, self.n_samples]))
        max_log_iws = tf.reduce_max(log_iws, axis=1, keepdims=True)
        log_iws -= max_log_iws
        self.elbo = tf.reduce_mean(max_log_iws + tf.log(1e-8 + tf.reduce_mean(
            tf.exp(log_iws), axis=1, keepdims=True)))
        self.loss = -self.elbo

        # compute gradients
        log_norm_const = tf.log(tf.clip_by_value(tf.reduce_sum(tf.exp(log_iws), 1, keepdims=True), 1e-9, np.inf))
        log_norm_iws = tf.reshape(log_iws - log_norm_const, shape=[-1])
        norm_iws = tf.stop_gradient(tf.exp(log_norm_iws))
        trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
        grads = tf.gradients(-tf.reshape(log_iws, [-1]) * norm_iws, trainable_vars)
        grads_and_vars = zip(grads, trainable_vars)

        # for now, hardcoding the Adam optimizer parameters used in the paper
        optimizer = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.9, beta2=0.999, epsilon=0.0001)
        optimizer.apply_gradients(grads_and_vars)
        self.train_op = optimizer.minimize(self.loss)

        # for sampling
        self.z = self.encoder(self.x, trainable=False, reuse=True)
        self.z_pl = tf.placeholder(tf.float32, shape=[None, self.z_dim])
        self.sample = self.decoder(self.z_pl, trainable=False, reuse=True)

        # tensorboard summaries
        x_img = tf.reshape(x, [-1] + self.x_dims)
        tf.summary.image('data', x_img)
        sample_img = tf.reshape(x_hat, [-1] + self.x_dims)
        tf.summary.image('samples', sample_img)
        tf.summary.scalar('log_lik', tf.reduce_mean(log_lik))
        tf.summary.scalar('neg_kld', tf.reduce_mean(neg_kld))
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('elbo', self.elbo)
        self.merged = tf.summary.merge_all()
Ejemplo n.º 43
0
        def _interpolate(im, x, y, out_size):
            with tf.variable_scope('_interpolate'):
                # constants
                num_batch = tf.shape(im)[0]
                height = tf.shape(im)[1]
                width = tf.shape(im)[2]
                channels = tf.shape(im)[3]

                x = tf.cast(x, 'float32')
                y = tf.cast(y, 'float32')
                height_f = tf.cast(height, 'float32')
                width_f = tf.cast(width, 'float32')
                out_height = out_size[0]
                out_width = out_size[1]
                zero = tf.zeros([], dtype='int32')
                max_y = tf.cast(tf.shape(im)[1] - 1, 'int32')
                max_x = tf.cast(tf.shape(im)[2] - 1, 'int32')

                # scale indices from [-1, 1] to [0, width/height]
                x = (x + 1.0)*(width_f) / 2.0
                y = (y + 1.0)*(height_f) / 2.0

                # do sampling
                x0 = tf.cast(tf.floor(x), 'int32')
                x1 = x0 + 1
                y0 = tf.cast(tf.floor(y), 'int32')
                y1 = y0 + 1

                x0 = tf.clip_by_value(x0, zero, max_x)
                x1 = tf.clip_by_value(x1, zero, max_x)
                y0 = tf.clip_by_value(y0, zero, max_y)
                y1 = tf.clip_by_value(y1, zero, max_y)
                dim2 = width
                dim1 = width*height
                base = _repeat(tf.range(num_batch)*dim1, out_height*out_width)
                base_y0 = base + y0*dim2
                base_y1 = base + y1*dim2
                idx_a = base_y0 + x0
                idx_b = base_y1 + x0
                idx_c = base_y0 + x1
                idx_d = base_y1 + x1

                # use indices to lookup pixels in the flat image and restore
                # channels dim
                im_flat = tf.reshape(im, tf.pack([-1, channels]))
                im_flat = tf.cast(im_flat, 'float32')
                Ia = tf.gather(im_flat, idx_a)
                Ib = tf.gather(im_flat, idx_b)
                Ic = tf.gather(im_flat, idx_c)
                Id = tf.gather(im_flat, idx_d)

                # and finally calculate interpolated values
                x0_f = tf.cast(x0, 'float32')
                x1_f = tf.cast(x1, 'float32')
                y0_f = tf.cast(y0, 'float32')
                y1_f = tf.cast(y1, 'float32')
                wa = tf.expand_dims(((x1_f-x) * (y1_f-y)), 1)
                wb = tf.expand_dims(((x1_f-x) * (y-y0_f)), 1)
                wc = tf.expand_dims(((x-x0_f) * (y1_f-y)), 1)
                wd = tf.expand_dims(((x-x0_f) * (y-y0_f)), 1)
                output = tf.add_n([wa*Ia, wb*Ib, wc*Ic, wd*Id])
                return output
Ejemplo n.º 44
0
 def hard_sigmoid(self, x):
     return tf.clip_by_value((x + 1.) / 2, 0, 1)
Ejemplo n.º 45
0
def _normalize_clip_observation(x, clip_range=[-5.0, 5.0]):
    rms = RunningMeanStd(shape=x.shape[1:])
    norm_x = tf.clip_by_value((x - rms.mean) / rms.std, min(clip_range), max(clip_range))
    return norm_x, rms
Ejemplo n.º 46
0
 def call(self, inputs):
     means = tf.math.reduce_mean(inputs, axis=TIME_AXIS, keepdims=True)
     variances = tf.math.reduce_mean(tf.math.square(inputs - means), axis=TIME_AXIS)
     means = tf.squeeze(means, TIME_AXIS)
     stddevs = tf.math.sqrt(tf.clip_by_value(variances, 0, variances.dtype.max))
     return tf.concat((means, stddevs), axis=TIME_AXIS)
Ejemplo n.º 47
0
def add_image_summaries(images: tf.Tensor,
                        labels: tf.Tensor,
                        preds: tf.Tensor,
                        locs: tf.Tensor,
                        k: int = 1) -> tf.Tensor:
    '''Adds image summaries for the k best and k worst images in each batch.
    Each image is overlayed with (lat, lon), label, and prediction.

    Args
    - images: tf.Tensor, shape [batch_size, H, W, C], type float32
        - C must be either 3 (RGB order), or 1 (grayscale)
        - already standardized (relative to entire dataset) with mean 0, std 1
    - labels: tf.Tensor, shape [batch_size]
    - preds: tf.Tensor, shape [batch_size]
    - locs: tf.Tensor, shape [batch_size, 2], each row is [lat, lon]
    - k: int, number of best and worst images to show per batch

    Returns: tf.summary, merged summaries
    '''
    # For float tensors, tf.summary.image automatically scales min/max to 0/255.
    # Set +/- 3 std. dev. to 0/255.
    # We want to display images with our own scaling -> cast to tf.uint8
    images = tf.clip_by_value((images / 6.0 + 0.5) * 255,
                              clip_value_min=0,
                              clip_value_max=255)
    images = tf.cast(images, tf.uint8)

    def write_on_imgs(imgs: np.ndarray, locs: np.ndarray, labels: np.ndarray,
                      preds: np.ndarray) -> np.ndarray:
        '''Writes white text w/ black background onto images.

        Args
        - imgs: np.array, shape [num_imgs, H, W, C], type uint8
            C must be either 1 or 3
        - locs: np.array, shape [num_imgs, 2]
        - labels: np.array, shape [num_imgs]
        - preds: np.array, shape [num_imgs]

        Returns
        - new_imgs: np.array, shape [num_imgs, H, W, C]
        '''
        C = imgs.shape[3]
        new_imgs = np.empty_like(imgs)
        for i, img in enumerate(imgs):
            if C == 1:
                img = img[:, :, 0]  # remove C dim. new shape: [H, W]
            img = PIL.Image.fromarray(img)
            # write white text on black background
            draw = PIL.ImageDraw.Draw(img)
            text = 'loc: ({:.6f}, {:.6f})\nlabel: {:.4f}, pred: {:.4f}'.format(
                locs[i][0], locs[i][1], labels[i], preds[i])
            size = draw.textsize(text)  # (w, h) of text
            draw.rectangle(xy=[(0, 0), size], fill='black')
            draw.text(xy=(0, 0), text=text, fill='white')
            if C == 1:
                new_imgs[i, :, :, 0] = np.asarray(img)
            else:
                new_imgs[i] = np.asarray(img)
        return new_imgs

    diff = tf.abs(preds - labels)
    _, worst_indices = tf.nn.top_k(diff, k=k)
    _, best_indices = tf.nn.top_k(-1 * diff, k=k)
    worst_inputs = [
        tf.gather(x, worst_indices) for x in [images, locs, labels, preds]
    ]
    worst_img_sum = tf.summary.image('worst_images_in_batch',
                                     tf.py_func(func=write_on_imgs,
                                                inp=worst_inputs,
                                                Tout=tf.uint8,
                                                stateful=False,
                                                name='write_on_worst_imgs'),
                                     max_outputs=k)
    best_inputs = [
        tf.gather(x, best_indices) for x in [images, locs, labels, preds]
    ]
    best_img_sum = tf.summary.image('best_images_in_batch',
                                    tf.py_func(func=write_on_imgs,
                                               inp=best_inputs,
                                               Tout=tf.uint8,
                                               stateful=False,
                                               name='write_on_best_imgs'),
                                    max_outputs=k)

    return tf.summary.merge([worst_img_sum, best_img_sum])
Ejemplo n.º 48
0
# 通过tf.train.exponential_decay 函数生成学习率
"""
因为staircase为True,所以每训练100轮后学习率乘以0.96

实现了一下功能:
decayed_learning_rate = learning_rate * decay_rate^(global_step / decay_steps)


Args:
    learning_rate:  The initial learning rate. 事先设定的学习率
    global_step: 衰减速度
    decay_steps: 衰减系数
    decay_rate: 衰减系数
    staircase: 默认为false, 当为True的时候,学习率成为一个阶梯函数
    name: String.  Optional name of the operation.  Defaults to 'ExponentialDecay'

"""
learning_rate = tf.train.exponential_decay(learning_rate=0.1,
                                           global_step=global_step,
                                           decay_steps=100,
                                           decay_rate=0.96,
                                           staircase=True)

# 定义损失函数和反向传播算法
cross_entropy = -tf.reduce_mean(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)))

# 使用衰减指数的学习率,在minimize函数中传入global_step将自动更新global_step参数,从而使得学习率也得到相应更新
tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(
    cross_entropy, global_step=global_step)
Ejemplo n.º 49
0
import numpy as np
import tensorflow as tf
import vgg16
from scipy.misc import imread, imresize
import matplotlib.pyplot as plt
import matplotlib.image as image
from tqdm import tqdm

sess = tf.Session()

opt_img = tf.Variable(tf.truncated_normal([1, 224, 224, 3],
                                          dtype=tf.float32,
                                          stddev=1e-1),
                      name='opt_img')

tmp_img = tf.clip_by_value(opt_img, 0.0, 255.0)
vgg = vgg16.vgg16(tmp_img, 'vgg16_weights.npz', sess)

style_img = imread('style.png', mode='RGB')
style_img = imresize(style_img, (224, 224))
style_img = np.reshape(style_img, [1, 224, 224, 3])

content_img = imread('content.png', mode='RGB')
content_img = imresize(content_img, (224, 224))
content_img = np.reshape(content_img, [1, 224, 224, 3])

layers = [
    'conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2',
    'conv3_3', 'conv4_1', 'conv4_2', 'conv4_3', 'conv5_1', 'conv5_2', 'conv5_3'
]
Ejemplo n.º 50
0
    def _setup_model(self, rank, memory_size, alpha, obs_space, action_space,
                     full_state_space, noise_target_action, **kwargs):

        self.graph = tf.Graph()
        with self.graph.as_default():
            self.sess = tf_util.single_threaded_session(graph=self.graph)
            if self.use_prioritiy:
                from algorithm.priority_memory import PrioritizedMemory
                self.memory = PrioritizedMemory(capacity=memory_size,
                                                alpha=alpha)
            else:
                from algorithm.memory import Memory
                self.memory = Memory(limit=memory_size,
                                     action_shape=action_space.shape,
                                     observation_shape=obs_space.shape,
                                     full_state_shape=full_state_space.shape)
            # 定义 placeholders
            self.observe_Input = tf.placeholder(tf.float32,
                                                [None] + list(obs_space.shape),
                                                name='observe_Input')
            self.observe_Input_ = tf.placeholder(tf.float32, [None] +
                                                 list(obs_space.shape),
                                                 name='observe_Input_')
            self.f_s = tf.placeholder(tf.float32,
                                      [None] + list(full_state_space.shape),
                                      name='full_state_Input')
            self.f_s_ = tf.placeholder(tf.float32,
                                       [None] + list(full_state_space.shape),
                                       name='fill_state_Input_')
            self.R = tf.placeholder(tf.float32, [None, 1], 'r')
            self.terminals1 = tf.placeholder(tf.float32,
                                             shape=(None, 1),
                                             name='terminals1')
            self.ISWeights = tf.placeholder(tf.float32, [None, 1],
                                            name='IS_weights')
            self.n_step_steps = tf.placeholder(tf.float32,
                                               shape=(None, 1),
                                               name='n_step_reached')
            self.q_demo = tf.placeholder(tf.float32, [None, 1],
                                         name='Q_of_actions_from_memory')
            self.come_from_demo = tf.placeholder(tf.float32, [None, 1],
                                                 name='Demo_index')
            self.action_memory = tf.placeholder(tf.float32, [None] +
                                                list(action_space.shape),
                                                name='actions_from_memory')

            with tf.variable_scope('obs_rms'):
                self.obs_rms = RunningMeanStd(shape=obs_space.shape)

            with tf.variable_scope('state_rms'):
                self.state_rms = RunningMeanStd(shape=full_state_space.shape)

            with tf.name_scope('obs_preprocess'):
                self.normalized_observe_Input = tf.clip_by_value(
                    normalize(self.observe_Input, self.obs_rms), -5., 5.)
                self.normalized_observe_Input_ = tf.clip_by_value(
                    normalize(self.observe_Input_, self.obs_rms), -5., 5.)

            with tf.name_scope('state_preprocess'):
                self.normalized_f_s0 = normalize(self.f_s, self.state_rms)
                self.normalized_f_s1 = normalize(self.f_s_, self.state_rms)

            with tf.variable_scope('Actor'):
                self.action, f_s_predict = self.build_actor(
                    self.normalized_observe_Input,
                    scope='eval',
                    trainable=True,
                    full_state_dim=full_state_space.shape[0])
                self.action_, _ = self.build_actor(
                    self.normalized_observe_Input_,
                    scope='target',
                    trainable=False,
                    full_state_dim=full_state_space.shape[0])

                # Target policy smoothing, by adding clipped noise to target actions
                if noise_target_action:
                    epsilon = tf.random_normal(tf.shape(self.action_),
                                               stddev=0.007)
                    epsilon = tf.clip_by_value(epsilon, -0.01, 0.01)
                    a2 = self.action_ + epsilon
                    noised_action_ = tf.clip_by_value(a2, -1, 1)
                else:
                    noised_action_ = self.action_

            with tf.variable_scope('Critic'):
                # Q值都要被clip 防止过估计.
                self.q_1 = tf.clip_by_value(
                    self.build_critic(self.normalized_f_s0,
                                      self.action,
                                      scope='eval_1',
                                      trainable=True), self.Q_value_range[0],
                    self.Q_value_range[1])

                q_1_ = self.build_critic(self.normalized_f_s1,
                                         noised_action_,
                                         scope='target_1',
                                         trainable=False)

                if self.use_TD3:
                    q_2 = tf.clip_by_value(
                        self.build_critic(self.normalized_f_s0,
                                          self.action,
                                          scope='eval_2',
                                          trainable=True),
                        self.Q_value_range[0], self.Q_value_range[1])

                    q_2_ = self.build_critic(self.normalized_f_s1,
                                             noised_action_,
                                             scope='target_2',
                                             trainable=False)

            # Collect networks parameters. It would make it more easily to manage them.
            self.ae_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                               scope='Actor/eval')
            self.at_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                               scope='Actor/target')
            self.ce1_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                                scope='Critic/eval_1')
            self.ct1_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                                scope='Critic/target_1')

            if self.use_TD3:
                self.ce2_params = tf.get_collection(
                    tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic/eval_2')
                self.ct2_params = tf.get_collection(
                    tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic/target_2')

            with tf.variable_scope('Soft_Update'):
                self.soft_replace_a = [
                    tf.assign(t, (1 - TAU) * t + TAU * e)
                    for t, e in zip(self.at_params, self.ae_params)
                ]
                self.soft_replace_c = [
                    tf.assign(t, (1 - TAU) * t + TAU * e)
                    for t, e in zip(self.ct1_params, self.ce1_params)
                ]
                if self.use_TD3:
                    self.soft_replace_c += [
                        tf.assign(t, (1 - TAU) * t + TAU * e)
                        for t, e in zip(self.ct2_params, self.ce2_params)
                    ]

            # critic 的误差 为 (one-step-td 误差 + n-step-td 误差 + critic_online 的L2惩罚)
            # TD3: critic一共有4个, 算两套 critic的误差, 秀儿.
            with tf.variable_scope('Critic_Lose'):
                if self.use_TD3:
                    min_q_ = tf.minimum(q_1_, q_2_)
                else:
                    min_q_ = q_1_

                self.q_target = self.R + (1. -
                                          self.terminals1) * GAMMA * min_q_
                if self.use_n_step:
                    self.n_step_target_q = self.R + (
                        1. - self.terminals1) * tf.pow(
                            GAMMA, self.n_step_steps) * min_q_
                    cliped_n_step_target_q = tf.clip_by_value(
                        self.n_step_target_q, self.Q_value_range[0],
                        self.Q_value_range[1])

                cliped_q_target = tf.clip_by_value(self.q_target,
                                                   self.Q_value_range[0],
                                                   self.Q_value_range[1])

                self.td_error_1 = tf.abs(cliped_q_target - self.q_1)
                if self.use_TD3:
                    self.td_error_2 = tf.abs(cliped_q_target - q_2)

                if self.use_n_step:
                    self.nstep_td_error_1 = tf.abs(cliped_n_step_target_q -
                                                   self.q_1)
                    if self.use_TD3:
                        self.nstep_td_error_2 = tf.abs(cliped_n_step_target_q -
                                                       q_2)

                L2_regular_1 = tf.contrib.layers.apply_regularization(
                    tf.contrib.layers.l2_regularizer(0.001),
                    weights_list=self.ce1_params)
                if self.use_TD3:
                    L2_regular_2 = tf.contrib.layers.apply_regularization(
                        tf.contrib.layers.l2_regularizer(0.001),
                        weights_list=self.ce2_params)

                one_step_losse_1 = tf.reduce_mean(
                    tf.multiply(self.ISWeights, tf.square(
                        self.td_error_1))) * self.lambda_1_step
                if self.use_TD3:
                    one_step_losse_2 = tf.reduce_mean(
                        tf.multiply(self.ISWeights, tf.square(
                            self.td_error_2))) * self.lambda_1_step

                if self.use_n_step:
                    n_step_td_losses_1 = tf.reduce_mean(
                        tf.multiply(
                            self.ISWeights, tf.square(
                                self.nstep_td_error_1))) * self.lambda_n_step
                    c_loss_1 = one_step_losse_1 + n_step_td_losses_1 + L2_regular_1

                    if self.use_TD3:
                        n_step_td_losses_2 = tf.reduce_mean(
                            tf.multiply(self.ISWeights,
                                        tf.square(self.nstep_td_error_2))
                        ) * self.lambda_n_step
                        c_loss_2 = one_step_losse_2 + n_step_td_losses_2 + L2_regular_2
                else:
                    c_loss_1 = one_step_losse_1 + L2_regular_1

                    if self.use_TD3:
                        c_loss_2 = one_step_losse_2 + L2_regular_2

            # actor 的 loss 为 最大化q(s,a) 最小化行为克隆误差.
            # (只有demo的transition 且 demo的action 比 actor生成的action q_1(s,a)高的时候 才会有克隆误差)
            with tf.variable_scope('Actor_lose'):
                Is_worse_than_demo = self.q_1 < self.q_demo
                Is_worse_than_demo = tf.cast(Is_worse_than_demo, tf.float32)
                worse_than_demo = tf.cast(tf.reduce_sum(Is_worse_than_demo),
                                          tf.int8)

                # 算action误差 我用的是平方和, 也有人用均方误差 reduce_mean. 其实都可以.
                # 我的action本来都是很小的数.
                action_diffs = Is_worse_than_demo * tf.reduce_sum(
                    self.come_from_demo *
                    tf.square(self.action - self.action_memory),
                    1,
                    keepdims=True)

                L_BC = self.LAMBDA_BC * tf.reduce_sum(action_diffs)
                auxiliary_predict_loss = self.LAMBDA_predict * tf.reduce_mean(
                    tf.square(f_s_predict - self.f_s))
                a_loss = -tf.reduce_mean(
                    self.q_1) + L_BC + auxiliary_predict_loss

            # Setting optimizer for Actor and Critic
            with tf.variable_scope('Critic_Optimizer'):
                if self.use_TD3:
                    self.critic_grads_1 = tf_util.flatgrad(
                        loss=c_loss_1, var_list=self.ce1_params)
                    self.critic_grads_2 = tf_util.flatgrad(
                        loss=c_loss_2, var_list=self.ce2_params)

                    self.critic_optimizer_1 = MpiAdam(var_list=self.ce1_params,
                                                      beta1=0.9,
                                                      beta2=0.999,
                                                      epsilon=1e-08)
                    self.critic_optimizer_2 = MpiAdam(var_list=self.ce2_params,
                                                      beta1=0.9,
                                                      beta2=0.999,
                                                      epsilon=1e-08)
                else:
                    self.critic_grads = tf_util.flatgrad(
                        loss=c_loss_1, var_list=self.ce1_params)
                    self.critic_optimizer = MpiAdam(var_list=self.ce1_params,
                                                    beta1=0.9,
                                                    beta2=0.999,
                                                    epsilon=1e-08)
            with tf.variable_scope('Actor_Optimizer'):
                self.actor_grads = tf_util.flatgrad(a_loss, self.ae_params)
                self.actor_optimizer = MpiAdam(var_list=self.ae_params,
                                               beta1=0.9,
                                               beta2=0.999,
                                               epsilon=1e-08)
            with self.sess.as_default():
                self._initialize(self.sess)

            # 保存模型
            var_list = tf.global_variables()
            print(
                "var_list!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
            )
            for v in var_list:
                print(v)
            self.saver = tf.train.Saver(var_list=var_list, max_to_keep=1)
            self.writer = tf.summary.FileWriter(
                "logs/" + self.experiment_name + "/DDPG_" + str(rank),
                self.graph)
            # TensorBoard summary
            self.a_summary = tf.summary.merge([
                tf.summary.scalar('a_loss', a_loss, family='actor'),
                tf.summary.scalar('L_BC', L_BC, family='actor'),
                tf.summary.scalar('worse_than_demo',
                                  worse_than_demo,
                                  family='actor'),
                tf.summary.scalar('auxiliary_predict_loss',
                                  auxiliary_predict_loss,
                                  family='actor')
            ])

            if self.use_TD3:
                self.c_summary = tf.summary.merge([
                    tf.summary.scalar('c_loss_1', c_loss_1, family='critic'),
                    tf.summary.scalar('c_loss_2', c_loss_2, family='critic')
                ])
            else:
                self.c_summary = tf.summary.merge(
                    [tf.summary.scalar('c_loss_1', c_loss_1, family='critic')])

            # episode summary
            self.episode_cumulate_reward = tf.placeholder(
                tf.float32, name='episode_cumulate_reward')
            self.episoed_length = tf.placeholder(
                tf.int16, name='episode_cumulate_reward')
            self.success_or_not = tf.placeholder(
                tf.int8, name='episode_cumulate_reward')

            self.eval_episode_cumulate_reward = tf.placeholder(
                tf.float32, name='episode_cumulate_reward')
            self.eval_episoed_length = tf.placeholder(
                tf.int16, name='episode_cumulate_reward')
            self.eval_success_or_not = tf.placeholder(
                tf.int8, name='episode_cumulate_reward')

            self.episode_summary = tf.summary.merge([
                tf.summary.scalar('episode_cumulate_reward',
                                  self.episode_cumulate_reward,
                                  family='episoed_result'),
                tf.summary.scalar('episoed_length',
                                  self.episoed_length,
                                  family='episoed_result'),
                tf.summary.scalar('success_or_not',
                                  self.success_or_not,
                                  family='episoed_result'),
            ])

            self.eval_episode_summary = tf.summary.merge([
                tf.summary.scalar('eval_episode_cumulate_reward',
                                  self.eval_episode_cumulate_reward,
                                  family='Eval_episoed_result'),
                tf.summary.scalar('eval_episoed_length',
                                  self.eval_episoed_length,
                                  family='Eval_episoed_result'),
                tf.summary.scalar('eval_success_or_not',
                                  self.eval_success_or_not,
                                  family='Eval_episoed_result'),
            ])
Ejemplo n.º 51
0
    def __init__(self,
                 session,
                 action_size,
                 width,
                 height,
                 states_size,
                 optimizer=tf.train.AdamOptimizer(1e-4),
                 eta=0.5,
                 beta=0.01):
        self.layers = {}
        self.action_size = action_size
        self.optimizer = optimizer
        self.session = session

        self.width = width
        self.height = height
        self.states_size = states_size

        # beta is the entropy strength regularization term, a bigger entropy means higher emphasis on exploration
        self.beta = beta
        # eta regularizes the value to give more emphasis on the action taken, rather than the current states
        self.eta = eta

        with tf.device('/cpu:0'):
            with tf.variable_scope('network'):
                self.action = tf.placeholder('int32', [None], name='action')
                self.target_value = tf.placeholder('float32', [None],
                                                   name='target_value')

                self.state, self.policy, self.value = self.build_model(
                    self.width, self.height, self.states_size)

                self.weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                                 scope='network')
                self.advantages = tf.placeholder('float32', [None],
                                                 name='advantages')

            with tf.variable_scope('optimizer'):
                # Compute the one hot vectors for each action given.
                action_one_hot = tf.one_hot(self.action, self.action_size, 1.0,
                                            0.0)

                # There are some issues when taking the log of the policy when it is exactly 1 or 0
                min_policy = 1e-8
                max_policy = 1 - min_policy
                # log policy is the expected log probability of arriving in the current states
                self.log_policy = tf.log(
                    tf.clip_by_value(self.policy, min_policy, max_policy))

                # log pi for action is the expected log probability of arriving in the current states given the
                # action taken
                self.log_pi_for_action = tf.reduce_sum(tf.multiply(
                    self.log_policy, action_one_hot),
                                                       axis=1)

                # We want to perform gradient ascent to maximize the discounted rewards, tf automatically tries to
                # reduce the loss, therefore we feed it the negative log policy multiplied by the estimate of the
                # advantage given by taking the current action in the current states
                self.policy_loss = -tf.reduce_mean(
                    self.log_pi_for_action * self.advantages)

                # The value loss is just the squared deference between the current states's value and the desired value
                self.value_loss = tf.reduce_mean(
                    tf.square(self.target_value - self.value))

                # The entropy improves exploration by discouraging premature convergence to suboptimal deterministic
                # policies, in other words, to penalize a small entropy ( which means that the probability distribution
                # is concentrated in one action ) we subtract the entropy from the loss
                self.entropy = tf.reduce_sum(
                    tf.multiply(self.policy, -self.log_policy))

                # We try to minimize the loss such that the best actions are chosen more often
                self.loss = self.eta * self.value_loss + self.policy_loss - self.entropy * self.beta

                # Create a list of tuples of gradients and their respective weights
                grads = tf.gradients(self.loss, self.weights)
                # clip by global norm reduces the chances of gradients exploding
                grads, _ = tf.clip_by_global_norm(grads, 40.0)
                grads_vars = list(zip(grads, self.weights))

                # Create an operator to apply the gradients using the optimizer.
                self.train_op = optimizer.apply_gradients(grads_vars)
Ejemplo n.º 52
0
 def surrogate(self):
     r = self.network.mvn.prob(self.action_pl) / self.network_old.mvn.prob(self.action_pl)
     surr1 = r * self.adv_pl
     surr2 = tf.clip_by_value(r, 1.0 - self.epsilon, 1.0 + self.epsilon) * self.adv_pl
     return -tf.reduce_mean(tf.minimum(surr1, surr2))
Ejemplo n.º 53
0
 def _get_target_action(self, vector_input):
     with tf.device(self.device):
         target_mu = self.actor_target_net(vector_input, None)
     return target_mu, tf.clip_by_value(target_mu + self.action_noise(), -1, 1)
Ejemplo n.º 54
0
def affinity_loss(labels,
                  probs,
                  num_classes,
                  kld_margin):
  """Affinity Field (AFF) loss.

  This function computes AFF loss. There are several components in the
  function:
  1) extracts edges from the ground-truth labels.
  2) extracts ignored pixels and their paired pixels (the neighboring
     pixels on the eight corners).
  3) extracts neighboring pixels on the eight corners from a 3x3 patch.
  4) computes KL-Divergence between center pixels and their neighboring
     pixels from the eight corners.

  Args:
    labels: A tensor of size [batch_size, height_in, width_in], indicating 
      semantic segmentation ground-truth labels.
    probs: A tensor of size [batch_size, height_in, width_in, num_classes],
      indicating segmentation predictions.
    num_classes: A number indicating the total number of valid classes.
    kld_margin: A number indicating the margin for KL-Divergence at edge.

  Returns:
    Two 1-D tensors value indicating the loss at edge and non-edge.
  """
  # Compute ignore map (e.g, label of 255 and their paired pixels).
  labels = tf.squeeze(labels, axis=-1) # NxHxW
  ignore = nnx.ignores_from_label(labels, num_classes, 1) # NxHxWx8
  not_ignore = tf.logical_not(ignore)
  not_ignore = tf.expand_dims(not_ignore, axis=3) # NxHxWx1x8

  # Compute edge map.
  one_hot_lab = tf.one_hot(labels, depth=num_classes)
  edge = nnx.edges_from_label(one_hot_lab, 1, 255) # NxHxWxCx8

  # Remove ignored pixels from the edge/non-edge.
  edge = tf.logical_and(edge, not_ignore)
  not_edge = tf.logical_and(tf.logical_not(edge), not_ignore)

  edge_indices = tf.where(tf.reshape(edge, [-1]))
  not_edge_indices = tf.where(tf.reshape(not_edge, [-1]))

  # Extract eight corner from the center in a patch as paired pixels.
  probs_paired = nnx.eightcorner_activation(probs, 1)  # NxHxWxCx8
  probs = tf.expand_dims(probs, axis=-1) # NxHxWxCx1
  bot_epsilon = tf.constant(1e-4, name='bot_epsilon')
  top_epsilon = tf.constant(1.0, name='top_epsilon')
  neg_probs = tf.clip_by_value(
      1-probs, bot_epsilon, top_epsilon)
  probs = tf.clip_by_value(
      probs, bot_epsilon, top_epsilon)
  neg_probs_paired= tf.clip_by_value(
      1-probs_paired, bot_epsilon, top_epsilon)
  probs_paired = tf.clip_by_value(
    probs_paired, bot_epsilon, top_epsilon)

  # Compute KL-Divergence.
  kldiv = probs_paired*tf.log(probs_paired/probs)
  kldiv += neg_probs_paired*tf.log(neg_probs_paired/neg_probs)
  not_edge_loss = kldiv
  edge_loss = tf.maximum(0.0, kld_margin-kldiv)

  not_edge_loss = tf.reshape(not_edge_loss, [-1])
  not_edge_loss = tf.gather(not_edge_loss, not_edge_indices)
  edge_loss = tf.reshape(edge_loss, [-1])
  edge_loss = tf.gather(edge_loss, edge_indices)

  return edge_loss, not_edge_loss
Ejemplo n.º 55
0
 def generate_noisy_image(image, noise_ratio):
     noise_image = VGG_MEAN_PIXELS + np.random.uniform(
         -20, 20, image.shape).astype(np.float32)
     return tf.clip_by_value(
         noise_image * noise_ratio + image * (1 - noise_ratio), 0.0, 255.0)
Ejemplo n.º 56
0
    def train(self, lr=0.0002, epoch=100, schedule=10, resume=True, freeze_encoder=False, sample_steps=50,
              checkpoint_steps=500, clamp=0.001, d_iters=3):
        g_vars, d_vars = self.retrieve_trainable_vars(freeze_encoder=freeze_encoder)
        input_handle, loss_handle, _, summary_handle = self.retrieve_handles()

        if not self.sess:
            raise Exception("no session registered")

        learning_rate = tf.placeholder(tf.float32, name="learning_rate")

        d_optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss_handle.d_loss, var_list=d_vars)
        g_optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss_handle.g_loss, var_list=g_vars)

        cap_d_vars_ops = [val.assign(tf.clip_by_value(val, -clamp, clamp)) for val in d_vars]

        tf.global_variables_initializer().run()

        real_data = input_handle.real_data

        # filter by one type of labels
        data_provider = TrainDataProvider(self.data_dir)
        total_batches = data_provider.compute_total_batch_num(self.batch_size)
        val_batch_iter = data_provider.get_val(size=self.batch_size)

        saver = tf.train.Saver(max_to_keep=3)
        summary_writer = tf.summary.FileWriter(self.log_dir, self.sess.graph)

        if resume:
            _, model_dir = self.get_model_id_and_dir()
            self.restore_model(saver, model_dir)

        current_lr = lr
        counter = 0
        start_time = time.time()

        for ei in range(epoch):
            train_batch_iter = data_provider.get_train_iter(self.batch_size)

            if (ei + 1) % schedule == 0:
                update_lr = current_lr / 2.0
                # minimum learning rate guarantee
                update_lr = max(update_lr, 0.0002)
                print("decay learning rate from %.5f to %.5f" % (current_lr, update_lr))
                current_lr = update_lr

            for bid, batch in enumerate(train_batch_iter):
                counter += 1
                batch_images = batch
                # Optimize D
                self.sess.run(cap_d_vars_ops)

                _, batch_d_loss, d_loss_real, d_loss_fake, d_summary = self.sess.run([d_optimizer, loss_handle.d_loss,
                                                                                      loss_handle.d_loss_real,
                                                                                      loss_handle.d_loss_fake,
                                                            summary_handle.d_merged],
                                                           feed_dict={real_data: batch_images,
                                                                      learning_rate: current_lr
                                                                      })
                # Optimize G
                _, batch_g_loss = self.sess.run([g_optimizer, loss_handle.g_loss],
                                                feed_dict={
                                                    real_data: batch_images,
                                                    learning_rate: current_lr
                                                })
                # magic move to Optimize G again
                # according to https://github.com/carpedm20/DCGAN-tensorflow
                # collect all the losses along the way
                _, batch_g_loss, \
                const_loss, l1_loss, tv_loss, g_summary = self.sess.run([g_optimizer,
                                                                         loss_handle.g_loss,
                                                                         loss_handle.const_loss,
                                                                         loss_handle.l1_loss,
                                                                         loss_handle.tv_loss,
                                                                         summary_handle.g_merged],
                                                                        feed_dict={
                                                                            real_data: batch_images,
                                                                            learning_rate: current_lr
                                                                        })
                passed = time.time() - start_time
                log_format = "Epoch: [%2d], [%4d/%4d] time: %4.4f, d_loss: %.5f, g_loss: %.5f, " + \
                             "const_loss: %.5f, l1_loss: %.5f, tv_loss: %.5f, d_loss_real: %.7f, d_loss_fake: %.7f"
                print(log_format % (ei, bid, total_batches, passed, batch_d_loss, batch_g_loss,
                                     const_loss, l1_loss, tv_loss, d_loss_real, d_loss_fake))
                summary_writer.add_summary(d_summary, counter)
                summary_writer.add_summary(g_summary, counter)

                if counter % sample_steps == 0:
                    # sample the current model states with val data
                    self.validate_model(val_batch_iter, ei, counter)

                if counter % checkpoint_steps == 0:
                    print("Checkpoint: save checkpoint step %d" % counter)
                    self.checkpoint(saver, counter)

        # valiation the models
        # print("val.examples len:{}".format(len(data_provider.val.examples)))
        # accuracy = 0.0
        # iters = int(len(data_provider.val.examples) / self.batch_size)
        # for it in range(iters):
        #     val_batch_iter = data_provider.get_val(size=self.batch_size)
        #     accuracy += self.validate_last_model(val_batch_iter)
        #     break
        # accuracy /= iters
        # print("Avg accuracy: %.5f" % accuracy)

        # save the last checkpoint
        print("Checkpoint: last checkpoint step %d" % counter)
        self.checkpoint(saver, counter)
Ejemplo n.º 57
0
def adaptive_affinity_loss(labels,
                           one_hot_lab,
                           probs,
                           size,
                           num_classes,
                           kld_margin,
                           w_edge,
                           w_not_edge):
  """Adaptive affinity field (AAF) loss.

  This function computes AAF loss. There are three components in the function:
  1) extracts edges from the ground-truth labels.
  2) extracts ignored pixels and their paired pixels (usually the eight corner
     pixels).
  3) extracts eight corner pixels/predictions from the center in a
     (2*size+1)x(2*size+1) patch
  4) computes KL-Divergence between center pixels and their paired pixels (the 
     eight corner).
  5) imposes adaptive weightings on the loss.

  Args:
    labels: A tensor of size [batch_size, height_in, width_in], indicating 
      semantic segmentation ground-truth labels.
    one_hot_lab: A tensor of size [batch_size, height_in, width_in, num_classes]
      which is the ground-truth labels in the form of one-hot vector.
    probs: A tensor of size [batch_size, height_in, width_in, num_classes],
      indicating segmentation predictions.
    size: A number indicating the half size of a patch.
    num_classes: A number indicating the total number of valid classes. The 
    kld_margin: A number indicating the margin for KL-Divergence at edge.
    w_edge: A number indicating the weighting for KL-Divergence at edge.
    w_not_edge: A number indicating the weighting for KL-Divergence at non-edge.

  Returns:
    Two 1-D tensors value indicating the loss at edge and non-edge.
  """
  # Compute ignore map (e.g, label of 255 and their paired pixels).
  labels = tf.squeeze(labels, axis=-1) # NxHxW
  ignore = nnx.ignores_from_label(labels, num_classes, size) # NxHxWx8
  not_ignore = tf.logical_not(ignore)
  not_ignore = tf.expand_dims(not_ignore, axis=3) # NxHxWx1x8

  # Compute edge map.
  edge = nnx.edges_from_label(one_hot_lab, size, 255) # NxHxWxCx8

  # Remove ignored pixels from the edge/non-edge.
  edge = tf.logical_and(edge, not_ignore)
  not_edge = tf.logical_and(tf.logical_not(edge), not_ignore)

  edge_indices = tf.where(tf.reshape(edge, [-1]))
  not_edge_indices = tf.where(tf.reshape(not_edge, [-1]))

  # Extract eight corner from the center in a patch as paired pixels.
  probs_paired = nnx.eightcorner_activation(probs, size)  # NxHxWxCx8
  probs = tf.expand_dims(probs, axis=-1) # NxHxWxCx1
  bot_epsilon = tf.constant(1e-4, name='bot_epsilon')
  top_epsilon = tf.constant(1.0, name='top_epsilon')

  neg_probs = tf.clip_by_value(
      1-probs, bot_epsilon, top_epsilon)
  neg_probs_paired = tf.clip_by_value(
      1-probs_paired, bot_epsilon, top_epsilon)
  probs = tf.clip_by_value(
      probs, bot_epsilon, top_epsilon)
  probs_paired = tf.clip_by_value(
    probs_paired, bot_epsilon, top_epsilon)

  # Compute KL-Divergence.
  kldiv = probs_paired*tf.log(probs_paired/probs)
  kldiv += neg_probs_paired*tf.log(neg_probs_paired/neg_probs)
  edge_loss = tf.maximum(0.0, kld_margin-kldiv)
  not_edge_loss = kldiv

  # Impose weights on edge/non-edge losses.
  one_hot_lab = tf.expand_dims(one_hot_lab, axis=-1)
  w_edge = tf.reduce_sum(w_edge*one_hot_lab, axis=3, keep_dims=True) # NxHxWx1x1
  w_not_edge = tf.reduce_sum(w_not_edge*one_hot_lab, axis=3, keep_dims=True) # NxHxWx1x1

  edge_loss *= w_edge
  not_edge_loss *= w_not_edge

  not_edge_loss = tf.reshape(not_edge_loss, [-1])
  not_edge_loss = tf.gather(not_edge_loss, not_edge_indices)
  edge_loss = tf.reshape(edge_loss, [-1])
  edge_loss = tf.gather(edge_loss, edge_indices)

  return edge_loss, not_edge_loss
Ejemplo n.º 58
0
from __future__ import division, print_function

import tensorflow as tf
import numpy as np
from painter.wct.vgg_normalised import vgg_from_t7
from keras import backend as K
from keras.models import Model
from keras.layers import Input, UpSampling2D, Lambda
from painter.wct.ops import pad_reflect, Conv2DReflect, torch_decay, wct_tf, wct_style_swap, adain
from collections import namedtuple

### Helpers ###

mse = tf.losses.mean_squared_error

clip = lambda x: tf.clip_by_value(x, 0, 1)

EncoderDecoder = namedtuple(
    'EncoderDecoder', 'content_input content_encoder_model content_encoded \
                             style_encoded \
                             decoder_input, decoder_model decoded decoded_encoded \
                             pixel_loss feature_loss tv_loss total_loss \
                             train_op learning_rate global_step \
                             summary_op')

### WCT Model Graph ###


class WCTModel(object):
    '''Model graph for Universal Style Transfer via Feature Transforms from https://arxiv.org/abs/1705.08086'''
    def __init__(self,
Ejemplo n.º 59
0
    def _step(self) -> Dict[str, tf.Tensor]:
        """Do a step of SGD and update the priorities."""

        # Pull out the data needed for updates/priorities.
        inputs = next(self._iterator)
        transitions: types.Transition = inputs.data
        keys, probs = inputs.info[:2]

        with tf.GradientTape() as tape:
            # Evaluate our networks.
            q_tm1 = self._network(transitions.observation)
            q_t_value = self._target_network(transitions.next_observation)
            q_t_selector = self._network(transitions.next_observation)

            # The rewards and discounts have to have the same type as network values.
            r_t = tf.cast(transitions.reward, q_tm1.dtype)
            r_t = tf.clip_by_value(r_t, -1., 1.)
            d_t = tf.cast(transitions.discount, q_tm1.dtype) * tf.cast(
                self._discount, q_tm1.dtype)

            # Compute the loss.
            _, extra = trfl.double_qlearning(q_tm1, transitions.action, r_t,
                                             d_t, q_t_value, q_t_selector)
            loss = losses.huber(extra.td_error, self._huber_loss_parameter)

            # Get the importance weights.
            importance_weights = 1. / probs  # [B]
            importance_weights **= self._importance_sampling_exponent
            importance_weights /= tf.reduce_max(importance_weights)

            # Reweight.
            loss *= tf.cast(importance_weights, loss.dtype)  # [B]
            loss = tf.reduce_mean(loss, axis=[0])  # []

        # Do a step of SGD.
        gradients = tape.gradient(loss, self._network.trainable_variables)
        gradients, _ = tf.clip_by_global_norm(gradients,
                                              self._max_gradient_norm)
        self._optimizer.apply(gradients, self._network.trainable_variables)

        # Update the priorities in the replay buffer.
        if self._replay_client:
            priorities = tf.cast(tf.abs(extra.td_error), tf.float64)
            self._replay_client.update_priorities(
                table=adders.DEFAULT_PRIORITY_TABLE,
                keys=keys,
                priorities=priorities)

        # Periodically update the target network.
        if tf.math.mod(self._num_steps, self._target_update_period) == 0:
            for src, dest in zip(self._network.variables,
                                 self._target_network.variables):
                dest.assign(src)
        self._num_steps.assign_add(1)

        # Report loss & statistics for logging.
        fetches = {
            'loss': loss,
        }

        return fetches
Ejemplo n.º 60
0
 def loss(self, i, x):
     return tf.reduce_mean(tf.clip_by_value(tf.square(x - self.a), 0, 10))