Beispiel #1
0
    def __init__(self, state_dim, action_dim, name="critic"):
        """
        Initialize critic network. The critic network maintains a copy of itself and target updating ops
        Args
            state_dim: dimension of input space, if is length one, we assume it is low dimension.
            action_dim: dimension of action space.
        """
        super(CriticNetwork, self).__init__(state_dim, action_dim, name=name)

        self.update_op = self.create_update_op()

        # online critic
        self.network, self.state, self.action = self.network

        #target critic
        self.target_network, self.target_state, self.target_action = self.target_network

        # for critic network, the we need one more input variable: y to compute the loss
        # this input variable is fed by: r + gamma * target(s_t+1, action(s_t+1))
        self.y = tf.placeholder(tf.float32, shape=None, name="target_q")
        self.mean_loss = tf.reduce_mean(
            tf.squared_difference(self.y, self.network))
        self.loss = tf.squared_difference(self.y, self.network)

        # get gradients
        self.gradients = self.compute_gradient()

        # get action gradients
        self.action_gradient = self.compute_action_gradient()

        self.train = self.create_train_op()
Beispiel #2
0
    def embedding_lookup(self, x, means):
        """Compute nearest neighbors and loss for training the embeddings.

    Args:
        x: Batch of encoder continuous latent states sliced/projected into
        shape
        [-1, num_blocks, block_dim].
        means: Embedding means.

    Returns:
        The nearest neighbor in one hot form, the nearest neighbor
        itself, the
        commitment loss, embedding training loss.
    """
        x_means_hot = self.nearest_neighbor(x, means)
        x_means_hot_flat = tf.reshape(
            x_means_hot,
            [-1, self.hparams.num_blocks, self.hparams.block_v_size])
        x_means = tf.matmul(tf.transpose(x_means_hot_flat, perm=[1, 0, 2]),
                            means)
        x_means = tf.transpose(x_means, [1, 0, 2])
        q_loss = tf.reduce_mean(
            tf.squared_difference(tf.stop_gradient(x), x_means))
        e_loss = tf.reduce_mean(
            tf.squared_difference(x, tf.stop_gradient(x_means)))
        return x_means_hot, x_means, q_loss, e_loss
Beispiel #3
0
def mean_squared_error(output, target, is_mean=False):
    """Return the TensorFlow expression of mean-squre-error of two distributions.

    Parameters
    ----------
    output : 2D or 4D tensor.
    target : 2D or 4D tensor.
    is_mean : boolean, if True, use ``tf.reduce_mean`` to compute the loss of one data, otherwise, use ``tf.reduce_sum`` (default).

    References
    ------------
    - `Wiki Mean Squared Error <https://en.wikipedia.org/wiki/Mean_squared_error>`_
    """
    with tf.name_scope("mean_squared_error_loss"):
        if output.get_shape().ndims == 2:   # [batch_size, n_feature]
            if is_mean:
                mse = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(output, target), 1))
            else:
                mse = tf.reduce_mean(tf.reduce_sum(tf.squared_difference(output, target), 1))
        elif output.get_shape().ndims == 4: # [batch_size, w, h, c]
            if is_mean:
                mse = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(output, target), [1, 2, 3]))
            else:
                mse = tf.reduce_mean(tf.reduce_sum(tf.squared_difference(output, target), [1, 2, 3]))
        return mse
    def _build_net(self):
        """ Build the neuron network """
        # ------------------ build evaluate_net ------------------
        self._s = tf.placeholder(tf.float32, [None, self._n_features], name='s')  # input
        self._q_target11 = tf.placeholder(tf.float32, [None, self._n_actions[0]], name='Q_target_11')
        self._q_target12 = tf.placeholder(tf.float32, [None, self._n_actions[1]], name='Q_target_12')
        self._q_target21 = tf.placeholder(tf.float32, [None, self._n_actions[2]], name='Q_target_21')
        self._q_target22 = tf.placeholder(tf.float32, [None, self._n_actions[3]], name='Q_target_22')

        with tf.variable_scope('eval_net'):
            # c_names(collections_names) are the collections to store variables
            c_names, w_initializer, b_initializer = ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], \
                                                    tf.random_normal_initializer(0., 0.3), tf.constant_initializer(
                0.1)  # config of layers
            n_l1, n_l2, n_l3, n_l4, n_l5, n_l6, n_l7, n_l8, n_l9 = 180, 360, 720, 910, 720, 288, 72, 36, 18

            l11_8, l12_8, l21_8, l22_8 = self.build_sub_network(w_initializer, b_initializer, c_names, n_l1, n_l2, n_l3,
                                                                n_l4, n_l5, n_l6, n_l7, n_l8, n_l9)

            self._q_eval11, self._q_eval12, self._q_eval21, self._q_eval22 = \
                self.build_output_net(w_initializer, b_initializer, c_names, l11_8, l12_8, l21_8, l22_8, n_l9)

        with tf.variable_scope('loss'):
            self._loss11 = tf.reduce_mean(tf.squared_difference(self._q_target11, self._q_eval11))
            self._loss12 = tf.reduce_mean(tf.squared_difference(self._q_target12, self._q_eval12))
            self._loss21 = tf.reduce_mean(tf.squared_difference(self._q_target21, self._q_eval21))
            self._loss22 = tf.reduce_mean(tf.squared_difference(self._q_target22, self._q_eval22))
            if self._output_tensorboard:
                tf.summary.scalar('loss11', self._loss11)
                tf.summary.scalar('loss12', self._loss12)
                tf.summary.scalar('loss21', self._loss21)
                tf.summary.scalar('loss22', self._loss22)
        with tf.variable_scope('train'):
            self._train_op11 = tf.train.AdamOptimizer(self._lr).minimize(self._loss11)
            self._train_op12 = tf.train.AdamOptimizer(self._lr).minimize(self._loss12)
            self._train_op21 = tf.train.AdamOptimizer(self._lr).minimize(self._loss21)
            self._train_op22 = tf.train.AdamOptimizer(self._lr).minimize(self._loss22)

        # ------------------ build target_net ------------------
        self._s_ = tf.placeholder(tf.float32, [None, self._n_features], name='s_')  # input
        with tf.variable_scope('target_net'):
            # c_names(collections_names) are the collections to store variables
            c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES]

            l11_8, l12_8, l21_8, l22_8 = self.build_sub_network(w_initializer, b_initializer, c_names, n_l1, n_l2, n_l3,
                                                                n_l4, n_l5, n_l6, n_l7, n_l8, n_l9)

            self._q_next11, self._q_next12, self._q_next21, self._q_next22 = \
                self.build_output_net(w_initializer, b_initializer, c_names, l11_8, l12_8, l21_8, l22_8, n_l9)
    def _build_net(self):
        def build_layers(s, c_names, n_l1, w_initializer, b_initializer):
            with tf.variable_scope('l1'):
                w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names)
                b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names)
                l1 = tf.nn.relu(tf.matmul(s, w1) + b1)
            with tf.variable_scope('Q'):
                w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names)
                b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names)
                out = tf.matmul(l1, w2) + b2

            return out

        # -------------- 创建 eval 神经网络, 及时提升参数 --------------
        self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s')  # 用来接收 observation
        self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='Q_target') # 用来接收 q_target 的值, 这个之后会通过计算得到
        # c_names(collections_names) 是在更新 target_net 参数时会用到
        #定义W,b的初始值

        #############################prioritized####################################################
        if self.prioritized:
            self.ISWeights = tf.placeholder(tf.float32, [None, 1], name='IS_weights')#重要性采样权重
        #############################prioritized####################################################

        with tf.variable_scope('eval_net'):
            c_names, n_l1, w_initializer, b_initializer = \
                ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 10, \
                tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1)  # config of layers

            self.q_eval = build_layers(self.s, c_names, n_l1, w_initializer, b_initializer)

        with tf.variable_scope('loss'): # 求误差
            #############################prioritized####################################################
            if self.prioritized:
                self.abs_errors = tf.reduce_sum(tf.abs(self.q_target - self.q_eval), axis=1)  # for updating Sumtree
                self.loss = tf.reduce_mean(self.ISWeights * tf.squared_difference(self.q_target, self.q_eval))#定义一个w乘在 loss 前,来根据抽到的概率改变 loss 的缩放程度。
            #############################prioritized####################################################
            else:
                self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval))
        with tf.variable_scope('train'):    # 梯度下降
            self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss)

        # ---------------- 创建 target 神经网络, 提供 target Q ---------------------
        self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_')    # 接收下个 observation
        with tf.variable_scope('target_net'):
            #c_names(collections_names) 是在更新 target_net 参数时会用到
            c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES]
            self.q_next = build_layers(self.s, c_names, n_l1, w_initializer, b_initializer)
Beispiel #6
0
def GMM_M_Step(X, Gama, ClusterNo, name='GMM_Statistics', **kwargs):

    D, h, s = tf.split(X, [1,1,1], axis=3)
    
    WXd = tf.multiply(Gama, tf.tile(D ,[1,1,1,ClusterNo]))
    WXa = tf.multiply(Gama, tf.tile(h ,[1,1,1,ClusterNo]))
    WXb = tf.multiply(Gama, tf.tile(s ,[1,1,1,ClusterNo]))
    
    S = tf.reduce_sum(tf.reduce_sum(Gama, axis=1), axis=1)
    S = tf.add(S, tensorflow.keras.backend.epsilon())
    S = tf.reshape(S,[1, ClusterNo])
    
    M_d = tf.div(tf.reduce_sum(tf.reduce_sum(WXd, axis=1), axis=1) , S)
    M_a = tf.div(tf.reduce_sum(tf.reduce_sum(WXa, axis=1), axis=1) , S)
    M_b = tf.div(tf.reduce_sum(tf.reduce_sum(WXb, axis=1), axis=1) , S)
    
    Mu = tf.split(tf.concat([M_d, M_a, M_b],axis=0), ClusterNo, 1)  
    
    Norm_d = tf.squared_difference(D, tf.reshape(M_d,[1, ClusterNo]))
    Norm_h = tf.squared_difference(h, tf.reshape(M_a,[1, ClusterNo]))
    Norm_s = tf.squared_difference(s, tf.reshape(M_b,[1, ClusterNo]))
    
    WSd = tf.multiply(Gama, Norm_d)
    WSh = tf.multiply(Gama, Norm_h)
    WSs = tf.multiply(Gama, Norm_s)
    
    S_d = tf.sqrt(tf.div(tf.reduce_sum(tf.reduce_sum(WSd, axis=1), axis=1) , S))
    S_h = tf.sqrt(tf.div(tf.reduce_sum(tf.reduce_sum(WSh, axis=1), axis=1) , S))
    S_s = tf.sqrt(tf.div(tf.reduce_sum(tf.reduce_sum(WSs, axis=1), axis=1) , S))
    
    Std = tf.split(tf.concat([S_d, S_h, S_s],axis=0), ClusterNo, 1)  
    
    dist = list()
    for k in range(0, ClusterNo):
        dist = tfp.distributions.MultivariateNormalDiag(tf.reshape(Mu[k],[1,3]), tf.reshape(Std[k],[1,3]))

    PI = tf.split(Gama, ClusterNo, axis=3)
    Prob0 = list()
    ds = tf.expand_dims(dataset_tf(X), -2)
    for k in range(0, ClusterNo):
        Prob0.append(tf.multiply(tf.squeeze(dist.prob(ds[:, 0, :])), tf.squeeze(PI[k])))

    Prob = tf.convert_to_tensor(Prob0, dtype=tf.float32)
    Prob = tf.minimum(tf.add(tf.reduce_sum(Prob, axis=0), tensorflow.keras.backend.epsilon()), tf.constant(1.0, tf.float32))
    Log_Prob = tf.negative(tf.log(Prob))
    Log_Likelihood = tf.reduce_mean(Log_Prob)
    
    return Log_Likelihood, Mu, Std
Beispiel #7
0
def tfmodel(x, y):
    W = tf.Variable(5.)
    b = tf.Variable(5.)

    pred = W * x + b
    cost = tf.squared_difference(pred, y)
    return pred, cost
Beispiel #8
0
def discriminator_loss(type, real, fake):
    n_scale = len(real)
    loss = []

    real_loss = 0
    fake_loss = 0

    for i in range(n_scale):
        if type == 'lsgan':
            real_loss = tf.reduce_mean(tf.squared_difference(real[i], 1.0))
            fake_loss = tf.reduce_mean(tf.square(fake[i]))

        if type == 'gan':
            real_loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(
                    real[i]),
                                                        logits=real[i]))
            fake_loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(
                    fake[i]),
                                                        logits=fake[i]))

        loss.append(real_loss + fake_loss)

    return sum(loss)
Beispiel #9
0
 def _build_model(self):
     """ Build the MDN Model"""
     self.x_holder = tf.placeholder(tf.float32, [self.batch_size, self.num_steps, 1 ], name="x")
     self.y_holder = tf.placeholder(tf.float32, [self.batch_size, self.num_steps, 1], name="y")
     
     multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(
             [tf.nn.rnn_cell.LSTMCell(self.rnn_size) for _ in range(self.num_layers)], state_is_tuple=True)
     self.init_state = multi_rnn_cell.zero_state(self.batch_size, tf.float32)
     
     rnn_outputs, self.final_state = tf.nn.dynamic_rnn(cell=multi_rnn_cell,
                                                  inputs=self.x_holder,
                                                  initial_state=self.init_state)
     
     w1 = tf.get_variable('w1', shape=[self.rnn_size, self.hidden_size], dtype=tf.float32,
                         initializer=tf.truncated_normal_initializer(stddev=0.2))
     b1 = tf.get_variable('b1', shape=[self.hidden_size], dtype=tf.float32, 
                         initializer=tf.constant_initializer())
     h1 = tf.nn.sigmoid(tf.matmul(tf.reshape(rnn_outputs, [-1, self.rnn_size]), w1) + b1)
     w2 = tf.get_variable('w2', shape=[self.hidden_size, 1], dtype=tf.float32,
                          initializer=tf.truncated_normal_initializer(stddev=0.2))
     b2 = tf.get_variable('b2', shape=[1], dtype=tf.float32,
                          initializer=tf.constant_initializer())
     output_fc = tf.matmul(h1, w2) + b2
     self.preds = tf.reshape(output_fc, [self.batch_size, self.num_steps, 1])
     # self.final_c_state = final_state.c
     # self.final_h_state = final_state.h
     if self.is_training:
         self.optimizer = tf.train.AdamOptimizer()
         self.loss = tf.reduce_mean(tf.squared_difference(self.preds, self.y_holder))
         self.train_op = self.optimizer.minimize(self.loss)
Beispiel #10
0
    def __init__(self):

        self.states_ph = tf.placeholder(tf.float32, (None, state_dim))
        self.actions_ph = tf.placeholder(tf.int32, (None, ))
        self.rewards_ph = tf.placeholder(tf.float32, (None, 1))
        self.next_states_ph = tf.placeholder(tf.float32, (None, state_dim))

        # done标志对next_q_value的处理放在了网络外,所以要传入
        self.next_q_values_ph = tf.placeholder(tf.float32, (None, action_num))

        # ———————— 神经网络定义 ———————— #

        with tf.variable_scope('main'):  # q=f(s)
            layer = tf.layers.dense(self.states_ph, 20, tf.nn.relu)
            self.q_values = tf.layers.dense(layer, action_num, None)

        with tf.variable_scope('target'):  # 由target_net负责计算next_q_value
            layer = tf.layers.dense(self.next_states_ph, 20, tf.nn.relu)
            self.next_q_values = tf.layers.dense(layer, action_num, None)

        # ———————— 训练更新定义 ———————— # q_target=r+gamma*max_a(q(s',a'))

        q_target = tf.stop_gradient(self.rewards_ph[0] + gamma * tf.reduce_max(self.next_q_values_ph[0], axis=0))

        loss = tf.reduce_mean(tf.squared_difference(q_target, self.q_values[0][self.actions_ph[0]]))
        self.optimizer = tf.train.AdamOptimizer(lr).minimize(loss)

        main_vars = [var for var in tf.global_variables() if 'main' in var.name]
        target_vars = [var for var in tf.global_variables() if 'target' in var.name]
        self.target_update = [tf.assign(main_var, target_var) for main_var, target_var in zip(main_vars, target_vars)]

        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())
Beispiel #11
0
    def __init__(self, state_size, learning_rate, name='critic'):
        self.state_size = state_size
        self.learning_rate = learning_rate

        with tf.variable_scope(name):
            self.state = tf.placeholder(tf.float32, [None, self.state_size],
                                        name="state")
            self.R_t = tf.placeholder(tf.float32, name="total_rewards")
            self.learning_rate = tf.placeholder(tf.float32,
                                                name="learning_rate")

            self.W1 = tf.get_variable(
                "W1", [self.state_size, 12],
                initializer=tensorflow.initializers.variance_scaling(seed=0))
            self.b1 = tf.get_variable("b1", [12],
                                      initializer=tf.zeros_initializer())
            self.W2 = tf.get_variable(
                "W2", [12, 1],
                initializer=tensorflow.initializers.variance_scaling(seed=0))
            self.b2 = tf.get_variable("b2", [1],
                                      initializer=tf.zeros_initializer())

            self.Z1 = tf.add(tf.matmul(self.state, self.W1), self.b1)
            self.A1 = tf.nn.relu(self.Z1)
            self.output = tf.add(tf.matmul(self.A1, self.W2), self.b2)

            self.square_loss = tf.squared_difference(tf.squeeze(self.output),
                                                     self.R_t)
            tvars = tf.trainable_variables()
            # trainable_vars = [var for var in tvars if '2' in var.name]
            trainable_vars = tvars
            self.optimizer = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate).minimize(
                    self.square_loss, var_list=trainable_vars)
Beispiel #12
0
  def training_losses(self, denoise_fn, x_start, t, noise=None):
    """
    Training loss calculation
    """

    # Add noise to data
    assert t.shape == [x_start.shape[0]]
    if noise is None:
      noise = tf.random_normal(shape=x_start.shape, dtype=x_start.dtype)
    assert noise.shape == x_start.shape and noise.dtype == x_start.dtype
    x_t = self.q_sample(x_start=x_start, t=t, noise=noise)

    # Calculate the loss
    if self.loss_type == 'kl':  # the variational bound
      losses = self._vb_terms_bpd(
        denoise_fn=denoise_fn, x_start=x_start, x_t=x_t, t=t, clip_denoised=False, return_pred_xstart=False)
    elif self.loss_type == 'mse':  # unweighted MSE
      assert self.model_var_type != 'learned'
      target = {
        'xprev': self.q_posterior_mean_variance(x_start=x_start, x_t=x_t, t=t)[0],
        'xstart': x_start,
        'eps': noise
      }[self.model_mean_type]
      model_output = denoise_fn(x_t, t)
      assert model_output.shape == target.shape == x_start.shape
      losses = nn.meanflat(tf.squared_difference(target, model_output))
    else:
      raise NotImplementedError(self.loss_type)

    assert losses.shape == t.shape
    return losses
Beispiel #13
0
    def log_prob_fn(params):
      rho, alpha, sigma = tf.split(params, [num_features, 1, 1], -1)

      one = tf.ones(num_features)
      def indep(d):
        return tfd.Independent(d, 1)
      p_rho = indep(tfd.InverseGamma(5. * one, 5. * one))
      p_alpha = indep(tfd.HalfNormal([1.]))
      p_sigma = indep(tfd.HalfNormal([1.]))

      rho_shape = tf.shape(rho)
      alpha_shape = tf.shape(alpha)

      x1 = tf.expand_dims(x, -2)
      x2 = tf.expand_dims(x, -3)
      exp = -0.5 * tf.squared_difference(x1, x2)
      exp /= tf.reshape(tf.square(rho), tf.concat([rho_shape[:1], [1, 1], rho_shape[1:]], 0))
      exp = tf.reduce_sum(exp, -1, keep_dims=True)
      exp += 2. * tf.reshape(tf.log(alpha), tf.concat([alpha_shape[:1], [1, 1], alpha_shape[1:]], 0))
      exp = tf.exp(exp[Ellipsis, 0])
      exp += tf.matrix_diag(tf.tile(tf.square(sigma), [1, int(x.shape[0])]) + 1e-6)
      exp = tf.check_numerics(exp, "exp 2 has NaNs")
      with tf.control_dependencies([tf.print(exp[0], summarize=99999)]):
        exp = tf.identity(exp)

      p_y = tfd.MultivariateNormalFullCovariance(
          covariance_matrix=exp)

      log_prob = (
          p_rho.log_prob(rho) + p_alpha.log_prob(alpha) +
          p_sigma.log_prob(sigma) + p_y.log_prob(y))

      return log_prob
Beispiel #14
0
    def _build_net(self):
        # ------------------ all inputs ------------------------
        tf.compat.v1.disable_eager_execution()
        self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s')  # input State
        self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_')  # input Next State
        self.r = tf.placeholder(tf.float32, [None, ], name='r')  # input Reward
        self.a = tf.placeholder(tf.int32, [None, ], name='a')  # input Action

        w_initializer, b_initializer = tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1)

        # ------------------ build evaluate_net ------------------
        with tf.variable_scope('eval_net'):
            e1 = tf.layers.dense(self.s, 20, tf.nn.relu, kernel_initializer=w_initializer,
                                 bias_initializer=b_initializer, name='e1')
            self.q_eval = tf.layers.dense(e1, self.n_actions, kernel_initializer=w_initializer,
                                          bias_initializer=b_initializer, name='q')

        # ------------------ build target_net ------------------
        with tf.variable_scope('target_net'):
            t1 = tf.layers.dense(self.s_, 20, tf.nn.relu, kernel_initializer=w_initializer,
                                 bias_initializer=b_initializer, name='t1')
            self.q_next = tf.layers.dense(t1, self.n_actions, kernel_initializer=w_initializer,
                                          bias_initializer=b_initializer, name='t2')

        with tf.variable_scope('q_target'):
            q_target = self.r + self.gamma * tf.reduce_max(self.q_next, axis=1, name='Qmax_s_')    # shape=(None, )
            self.q_target = tf.stop_gradient(q_target)
        with tf.variable_scope('q_eval'):
            a_indices = tf.stack([tf.range(tf.shape(self.a)[0], dtype=tf.int32), self.a], axis=1)
            self.q_eval_wrt_a = tf.gather_nd(params=self.q_eval, indices=a_indices)    # shape=(None, )
        with tf.variable_scope('loss'):
            self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval_wrt_a, name='TD_error'))
        with tf.variable_scope('train'):
            self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss)
Beispiel #15
0
    def tower_loss(self, x, y_, z_):
        y_conv, z_conv = self.construct_net(x)

        # Cast the nn result back to fp32 to avoid loss overflow/underflow
        if self.model_dtype != tf.float32:
            y_conv = tf.cast(y_conv, tf.float32)
            z_conv = tf.cast(z_conv, tf.float32)

        # Calculate loss on policy head
        cross_entropy = \
            tf.nn.softmax_cross_entropy_with_logits(labels=y_,
                                                    logits=y_conv)
        policy_loss = tf.reduce_mean(cross_entropy)

        # Loss on value head
        mse_loss = \
            tf.reduce_mean(tf.squared_difference(z_, z_conv))

        # Regularizer
        reg_variables = tf.get_collection(tf.GraphKeys.WEIGHTS)
        reg_term = self.l2_scale * tf.add_n(
            [tf.cast(tf.nn.l2_loss(v), tf.float32) for v in reg_variables])

        # For training from a (smaller) dataset of strong players, you will
        # want to reduce the factor in front of self.mse_loss here.
        loss = 1.0 * policy_loss + 1.0 * mse_loss + reg_term

        return loss, policy_loss, mse_loss, reg_term, y_conv
Beispiel #16
0
    def neuralNetwork(self):

        # 建立主神经网络
        self.s = tf.placeholder(tf.float32, [None, self.features],
                                name='state')
        self.t = tf.placeholder(tf.float32, [None, self.actions],
                                name='Qtarget')
        with tf.variable_scope('mainnet'):
            # 建立参数集合
            cNames, unitNum, wInit, bInit = ['main net parameters', tf.GraphKeys.GLOBAL_VARIABLES], 10, \
                                            tf.random_normal_initializer(0.0, 0.3), tf.constant_initializer(0.1)

            with tf.variable_scope('layer1'):
                w1 = tf.get_variable('w1', [self.features, unitNum],
                                     initializer=wInit,
                                     collections=cNames)
                b1 = tf.get_variable('b1', [1, unitNum],
                                     initializer=bInit,
                                     collections=cNames)
                l1 = tf.nn.relu(tf.matmul(self.s, w1) + b1)

            with tf.variable_scope('layer2'):
                w2 = tf.get_variable('w2', [unitNum, self.actions],
                                     initializer=wInit,
                                     collections=cNames)
                b2 = tf.get_variable('b2', [1, self.actions],
                                     initializer=bInit,
                                     collections=cNames)
                self.Qpredict = tf.matmul(l1, w2) + b2

        with tf.variable_scope('loss'):
            self.loss = tf.reduce_mean(
                tf.squared_difference(self.t, self.Qpredict))
        with tf.variable_scope('train'):
            self.train = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss)

        # 建立旧神经网络
        self.ns = tf.placeholder(tf.float32, [None, self.features],
                                 name='newState')
        with tf.variable_scope('oldnet'):
            cNames = ['old net parameters', tf.GraphKeys.GLOBAL_VARIABLES]

            with tf.variable_scope('layer1'):
                w1 = tf.get_variable('w1', [self.features, unitNum],
                                     initializer=wInit,
                                     collections=cNames)
                b1 = tf.get_variable('b1', [1, unitNum],
                                     initializer=bInit,
                                     collections=cNames)
                l1 = tf.nn.relu(tf.matmul(self.ns, w1) + b1)

            with tf.variable_scope('layer2'):
                w2 = tf.get_variable('w2', [unitNum, self.actions],
                                     initializer=wInit,
                                     collections=cNames)
                b2 = tf.get_variable('b2', [1, self.actions],
                                     initializer=bInit,
                                     collections=cNames)
                self.QnextStatePredict = tf.matmul(l1, w2) + b2
Beispiel #17
0
 def loss_som_s(self):
     """Computes the SOM loss of standard SOM for initialization."""
     loss_som = tf.reduce_mean(
         tf.squared_difference(
             tf.expand_dims(tf.stop_gradient(self.sample_z_e), axis=1),
             self.z_q_neighbors))
     tf.summary.scalar("loss_som_s", loss_som)
     return loss_som
Beispiel #18
0
    def _get_lr_tensor(self):
        """Get lr minimizing the surrogate.

    Returns:
      The lr_t.
    """
        lr = tf.squared_difference(1.0, tf.sqrt(self._mu)) / self._h_min
        return lr
Beispiel #19
0
 def loss_som_old(self):
     """Computes the SOM loss."""
     loss_som = tf.reduce_mean(
         tf.squared_difference(
             tf.expand_dims(tf.stop_gradient(self.z_e_sample), axis=1),
             self.z_q_neighbors))
     tf.summary.scalar("loss_som_old", loss_som)
     return loss_som
 def compute_noise_and_variance(wx, center, vote_conf, masses):
     noise = tf.squared_difference(wx, center)
     variance = min_var + tf.reduce_sum(
         vote_conf * noise,
         axis=[1, -1, -2],
         keepdims=True,
         name='variance_calculation') / masses
     return noise, variance
Beispiel #21
0
def normalized_mean_square_error(output, target):
    """Return the TensorFlow expression of normalized mean-squre-error of two distributions.

    Parameters
    ----------
    output : 2D or 4D tensor.
    target : 2D or 4D tensor.
    """
    with tf.name_scope("mean_squared_error_loss"):
        if output.get_shape().ndims == 2:   # [batch_size, n_feature]
            nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=1))
            nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=1))
        elif output.get_shape().ndims == 4: # [batch_size, w, h, c]
            nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=[1,2,3]))
            nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=[1,2,3]))
        nmse = tf.reduce_mean(nmse_a / nmse_b)
    return nmse
Beispiel #22
0
    def memAutoEnc(self, new_memory, info, control, name="", reuse=None):
        with tf.variable_scope("memAutoEnc" + name, reuse=reuse):
            # inputs to auto encoder
            features = info if cfg.autoEncMemInputs == "INFO" else new_memory
            features = ops.linear(features,
                                  self.memory_dim,
                                  self.control_dim,
                                  act=cfg.autoEncMemAct,
                                  name="aeMem")

            # reconstruct control
            if cfg.autoEncMemLoss == "CONT":
                loss = tf.reduce_mean(tf.squared_difference(control, features))
            else:
                interactions, dim = ops.mul(
                    self.question_contextual_word_embeddings,
                    features,
                    self.control_dim,
                    concat={"x": cfg.autoEncMemCnct},
                    mulBias=cfg.mulBias,
                    name="aeMem")

                logits = ops.linear(interactions,
                                    dim,
                                    1,
                                    dropout=0.,
                                    name="logits")
                logits = self.expMask(logits, self.question_lengths)

                # reconstruct word attentions
                if cfg.autoEncMemLoss == "PROB":
                    loss = tf.reduce_mean(
                        tf.nn.softmax_cross_entropy_with_logits(
                            labels=self.attentions["question"][-1],
                            logits=logits))

                # reconstruct control through words attentions
                else:
                    attention = tf.nn.softmax(logits)
                    summary = ops.att2Smry(
                        attention, self.question_contextual_word_embeddings)
                    loss = tf.reduce_mean(
                        tf.squared_difference(control, summary))

        return loss
Beispiel #23
0
 def z_dist_flat(self):
     """Computes the distances between the centroids and the embeddings."""
     z_dist = tf.squared_difference(
         tf.expand_dims(tf.expand_dims(self.sample_z_e, 1), 1),
         tf.expand_dims(self.embeddings, 0))
     z_dist_red = tf.reduce_sum(z_dist, axis=-1)
     z_dist_flat = tf.reshape(z_dist_red,
                              [-1, self.som_dim[0] * self.som_dim[1]])
     return z_dist_flat
Beispiel #24
0
def build_model(x,
                lmbda,
                mode='training',
                layers=None,
                msssim_loss=False):
  """Builds the compression model."""  
  
  is_training = (mode == 'training')
  num_pixels = tf.to_float(tf.reduce_prod(tf.shape(x)[:-1]))

  if layers is None:
    num_filters = 192
    analysis_transform = AnalysisTransform(num_filters)
    synthesis_transform = SynthesisTransform(num_filters)
    hyper_analysis_transform = HyperAnalysisTransform(num_filters)
    hyper_synthesis_transform = HyperSynthesisTransform(num_filters)
    entropy_bottleneck = tfc.EntropyBottleneck()
    
    layers = (analysis_transform, hyper_analysis_transform,
              entropy_bottleneck, hyper_synthesis_transform,
              synthesis_transform)
  else:
    analysis_transform, hyper_analysis_transform, entropy_bottleneck, \
    hyper_synthesis_transform, synthesis_transform = layers
  
  y = analysis_transform(x)
  z = hyper_analysis_transform(y)
  z_tilde_hat, z_likelihoods = entropy_bottleneck(z, training=is_training)
  mean, sigma = hyper_synthesis_transform(z_tilde_hat)
  scale_table = np.exp(np.linspace(
      np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS))
  conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table, 
                                                   mean=mean)
  y_tilde_hat, y_likelihoods = conditional_bottleneck(y, training=is_training)
  x_tilde_hat = synthesis_transform(y_tilde_hat)

  if mode == "testing":
    side_string = entropy_bottleneck.compress(z_tilde_hat)
    string = conditional_bottleneck.compress(y_tilde_hat)  
  else:
    string = None
    side_string = None

  bpp = (tf.reduce_sum(tf.log(y_likelihoods)) +
          tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) * num_pixels)

  mse = tf.reduce_mean(tf.squared_difference(x, x_tilde_hat))
  mse *= 255 ** 2

  msssim = tf.reduce_mean(1 - tf.image.ssim_multiscale(x_tilde_hat, x, 1))
  
  distortion = msssim if msssim_loss else mse 

  loss = lmbda * distortion + bpp
  
  return loss, bpp, mse, msssim, x_tilde_hat, y_tilde_hat, z_tilde_hat, \
         y, z, string, side_string, layers
Beispiel #25
0
 def generator_loss(self, D, fake_y, use_lsgan=True):
     """  fool discriminator into believing that G(x) is real
 """
     if use_lsgan:
         # use mean squared error
         loss = tf.reduce_mean(tf.squared_difference(D(fake_y), REAL_LABEL))
     else:
         # heuristic, non-saturating loss
         loss = -tf.reduce_mean(ops.safe_log(D(fake_y))) / 2
     return loss
 def z_dist_flat_ng(self):
     """Computes the distances between the centroids and the embeddings stopping the gradient of the latent
     embeddings."""
     z_dist = tf.squared_difference(
         tf.expand_dims(tf.expand_dims(tf.stop_gradient(self.z_e), 1), 1),
         tf.expand_dims(self.embeddings, 0))
     z_dist_red = tf.reduce_sum(z_dist, axis=-1)  # 1,32,8,8
     z_dist_flat = tf.reshape(
         z_dist_red, [-1, self.som_dim[0] * self.som_dim[1]])  # 1,32,64
     return z_dist_flat
Beispiel #27
0
def load_test_model_graph(checkpoint_dir):
    '''
    model used in test mode. (entropy_bootleneck(training=False)
    '''
    # inputs
    x = tf.placeholder(tf.float32, [1, None, None, 3])
    orig_x = tf.placeholder(tf.float32, [1, None, None, 3])

    # Instantiate model.
    analysis_transform = AnalysisTransform(192)
    synthesis_transform = SynthesisTransform(192)
    hyper_analysis_transform = HyperAnalysisTransform(192)
    hyper_synthesis_transform = HyperSynthesisTransform(192)
    entropy_bottleneck = tfc.EntropyBottleneck()

    # Transform and compress the image.
    y = analysis_transform(x)
    y_shape = tf.shape(y)
    z = hyper_analysis_transform(abs(y))
    z_hat, z_likelihoods = entropy_bottleneck(z, training=False)
    sigma = hyper_synthesis_transform(z_hat)
    sigma = sigma[:, :y_shape[1], :y_shape[2], :]
    scale_table = np.exp(
        np.linspace(np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS))
    conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table)
    side_string = entropy_bottleneck.compress(z)
    string = conditional_bottleneck.compress(y)

    # Transform the quantized image back (if requested).
    y_hat, y_likelihoods = conditional_bottleneck(y, training=False)
    x_hat = synthesis_transform(y_hat)

    # eval bpp
    num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32)
    eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum(
        tf.log(z_likelihoods))) / (-np.log(2) * num_pixels)

    # reconstruction metric
    # Bring both images back to 0..255 range.
    orig_x_255 = orig_x * 255
    x_hat = tf.clip_by_value(x_hat, 0, 1)
    x_hat = tf.round(x_hat * 255)
    mse = tf.reduce_mean(tf.squared_difference(orig_x_255, x_hat))
    psnr = tf.squeeze(tf.image.psnr(x_hat, orig_x_255, 255))
    msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, orig_x_255, 255))

    # session
    sess = tf.Session()
    # load graph
    latest = tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir)
    tf.train.Saver().restore(sess, save_path=latest)

    return sess, x, orig_x, [
        string, side_string
    ], eval_bpp, x_hat, mse, psnr, msssim, num_pixels, y, z
Beispiel #28
0
    def _build_net(self):
        def build_layers(s, c_names, w_initializer, b_initializer):
            for i, h in enumerate(self.hidden):
                if i == 0:
                    in_units, out_units, inputs = self.n_features, self.hidden[i], s
                else:
                    in_units, out_units, inputs = self.hidden[i-1], self.hidden[i], l
                with tf.variable_scope('l%i' % i):
                    w = tf.get_variable('w', [in_units, out_units], initializer=w_initializer, collections=c_names)
                    b = tf.get_variable('b', [1, out_units], initializer=b_initializer, collections=c_names)
                    l = tf.nn.relu(tf.matmul(inputs, w) + b)

            with tf.variable_scope('Value'):
                w = tf.get_variable('w', [self.hidden[-1], 1], initializer=w_initializer, collections=c_names)
                b = tf.get_variable('b', [1, 1], initializer=b_initializer, collections=c_names)
                self.V = tf.matmul(l, w) + b

            with tf.variable_scope('Advantage'):
                w = tf.get_variable('w', [self.hidden[-1], self.n_actions], initializer=w_initializer, collections=c_names)
                b = tf.get_variable('b', [1, self.n_actions], initializer=b_initializer, collections=c_names)
                self.A = tf.matmul(l, w) + b

            with tf.variable_scope('Q'):
                out = self.V + (self.A - tf.reduce_mean(self.A, axis=1, keep_dims=True))  # Q = V(s) + A(s,a)

            # with tf.variable_scope('out'):
            #     w = tf.get_variable('w', [self.hidden[-1], self.n_actions], initializer=w_initializer, collections=c_names)
            #     b = tf.get_variable('b', [1, self.n_actions], initializer=b_initializer, collections=c_names)
            #     out = tf.matmul(l, w) + b
            return out

        # ------------------ build evaluate_net ------------------
        self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s')  # input
        self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='Q_target')  # for calculating loss
        self.ISWeights = tf.placeholder(tf.float32, [None, 1], name='IS_weights')
        with tf.variable_scope('eval_net'):
            c_names, w_initializer, b_initializer = \
                ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], \
                tf.random_normal_initializer(0., 0.01), tf.constant_initializer(0.01)  # config of layers

            self.q_eval = build_layers(self.s, c_names, w_initializer, b_initializer)

        with tf.variable_scope('loss'):
            self.abs_errors = tf.abs(tf.reduce_sum(self.q_target - self.q_eval, axis=1))  # for updating Sumtree
            self.loss = tf.reduce_mean(self.ISWeights * tf.squared_difference(self.q_target, self.q_eval))

        with tf.variable_scope('train'):
            self._train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss)

        # ------------------ build target_net ------------------
        self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_')  # input
        with tf.variable_scope('target_net'):
            c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES]
            self.q_next = build_layers(self.s_, c_names, w_initializer, b_initializer)
Beispiel #29
0
    def _build_net(self):
        tf.reset_default_graph()
        # ------------------ build evaluate_net ------------------
        self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s')  # input
        self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='Q_target')  # for calculating loss
        with tf.variable_scope('eval_net'):
            # c_names(collections_names) are the collections to store variables
            c_names, n_l1, w_initializer, b_initializer = \
                ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 50, \
                tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1)  # config of layers

            # first layer. collections is used later when assign to target net
            with tf.variable_scope('l1'):
                w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names)
                b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names)
                l1 = tf.nn.relu(tf.matmul(self.s, w1) + b1)

            # second layer. collections is used later when assign to target net
            with tf.variable_scope('l2'):
                w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names)
                b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names)
                self.q_eval = tf.matmul(l1, w2) + b2

            # # output layer. collections is used later when assign to target net
            # with tf.variable_scope('l3'):
            #     w3 = tf.get_variable('w3', [n_l2, self.n_actions], initializer=w_initializer, collections=c_names)
            #     b3 = tf.get_variable('b3', [1, self.n_actions], initializer=b_initializer, collections=c_names)
            #     self.q_eval = tf.matmul(l2, w3) + b3

        with tf.variable_scope('loss'):
            self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval))
            tf.summary.scalar('loss', self.loss)
        with tf.variable_scope('train'):
            # learning_rate = tf.train.exponential_decay()
            self._train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss)

        # ------------------ build target_net ------------------
        self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_')    # input
        with tf.variable_scope('target_net'):
            # c_names(collections_names) are the collections to store variables
            c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES]

            # first layer. collections is used later when assign to target net
            with tf.variable_scope('l1'):
                w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names)
                b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names)
                l1 = tf.nn.relu(tf.matmul(self.s_, w1) + b1)

            # second layer. collections is used later when assign to target net
            with tf.variable_scope('l2'):
                w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names)
                b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names)
                self.q_next = tf.matmul(l1, w2) + b2
Beispiel #30
0
def lossfn(real_input, fake_input, compress, hparams, lsgan, name):
    """Loss function."""
    eps = 1e-12
    with tf.variable_scope(name):
        d1 = discriminator(real_input, compress, hparams, "discriminator")
        d2 = discriminator(fake_input,
                           compress,
                           hparams,
                           "discriminator",
                           reuse=True)
        if lsgan:
            dloss = tf.reduce_mean(tf.squared_difference(
                d1, 0.9)) + tf.reduce_mean(tf.square(d2))
            gloss = tf.reduce_mean(tf.squared_difference(d2, 0.9))
            loss = (dloss + gloss) / 2
        else:  # cross_entropy
            dloss = -tf.reduce_mean(tf.log(d1 + eps)) - tf.reduce_mean(
                tf.log1p(eps - d2))
            gloss = -tf.reduce_mean(tf.log(d2 + eps))
            loss = (dloss + gloss) / 2
        return loss