Ejemplos de gradient en Python, ejemplos de tensorflow.gradient en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: LineartensorflowAC.py Proyecto: wangyixuan14/RL_project

    def _build_net(self):
        with tf.name_scope('inputs'):
            self.obs = tf.placeholder(tf.float32, [None, self.n_features],
                                      name="observations")
            self.actions = tf.placeholder(tf.int32, [
                None,
            ],
                                          name="actions_num")
            self.dis_return = tf.placeholder(tf.float32, [None], name="return")
            # self.prediction = tf.placeholder(tf.float32, [None, ], name="actions_value")

        with tf.name_scope('Actor'):
            self.w_u = tf.Variable(tf.random_uniform(
                [self.n_features, self.n_actions]),
                                   dtype=tf.float32,
                                   name="w_u")
            self.action = tf.matmul(self.obs, self.w_u)

        with tf.name_scope('Critic'):
            self.w_v = tf.Variable(tf.random_uniform([self.n_features, 1]),
                                   dtype=tf.float32,
                                   name="w_v")
            self.prediction = tf.matmul(self.obs, self.w_v)
        # # fc1
        # layer = tf.layers.dense(
        #     inputs=self.tf_obs,
        #     units=self.n_features,
        #     activation=None,  # tanh activation
        #     kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.),
        #     bias_initializer=tf.constant_initializer(0.),
        #     name='fc1'
        # )
        # # fc2
        # all_act = tf.layers.dense(
        #     inputs=layer,
        #     units=self.n_actions,
        #     activation=None,
        #     kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.),
        #     bias_initializer=tf.constant_initializer(0.),
        #     name='fc2'
        # )
        self.all_act_prob = tf.nn.softmax(self.action, name='act_prob')

        with tf.name_scope('loss'):
            # to maximize total reward (log_p * R) is to minimize -(log_p * R), and the tf only have minimize(loss)
            self.neg_log_prob = tf.reduce_sum(
                -tf.log(self.all_act_prob) *
                tf.one_hot(self.actions, self.n_actions),
                axis=1)  # this is negative log of chosen action
            delta = self.dis_return - self.prediction
            loss_v = tf.reduce_mean(tf.square(delta))
            # loss_u = tf.reduce_mean(self.neg_log_prob * delta)  # reward guided loss

        with tf.name_scope('update'):
            self.w_u = tf.assign_add(
                self.w_u,
                self.lr_actor * delta *
                tf.gradient(self.neg_log_prob, self.w_u))
            self.w_v = tf.assign_add(
                self.w_v, self.lr_critic * tf.gradients(loss_v, self.w_v))

Ejemplo n.º 2

0

Mostrar archivo

def test_gradient():

    x = 2
    z = 1

    y = func_y

    with tf.GradientTape as t:
        k = x - 1
        t.watch(k)
        y = func_y(k, z)

    grad = tf.gradient(y, t)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: hessian_free_implementation.py Proyecto: NithinTangellamudi/HessianFreeImplementation

def CG(b, x0, TOLERANCE=1.0e-10, MAX_ITERATIONS=100):
    """
    A function to solve [A]{x} = {b} linear equation system with the
    conjugate gradient method.
    More at: http://en.wikipedia.org/wiki/Conjugate_gradient_method
    ========== Parameters ==========
    A : array
        A real symmetric positive definite matrix.
        In our case this will be the Hessian. We want to avoid using this, so it will not be used.
        We will use a finite differences to calculate H*d by calling finite_differences(x,d) where x is the current
        point and d is the direction of movement
    b : vector
        The right hand side (RHS) vector of the system.
        In our case it will be the gradient of a specific point
        We need to be able to calculate gradients at more than
        Just that one point
        Therefore we will pass this as a function so we can
        evaluate the gradient of the function at that point
    x0 : vector
        The starting guess for the solution. Anything will do.
    MAX_ITERATIONS : integer
        Maximum number of iterations. Iteration will stop after maxiter
        steps even if the specified tolerance has not been achieved.
    TOLERANCE : float
        Tolerance to achieve. The algorithm will terminate when either
        the relative or the absolute residual is below TOLERANCE.
    """

    #   Initializations
    x = x0
    d = -tf.gradient(Loss, x)
    r0 = b - finite_differences(x,d)


    #   Start iterations
    for i in range(MAX_ITERATIONS):
        a = float(np.dot(d.T, r0) / np.dot(d.T, finite_differences(x, d)))
        x = x - a * gradient(x)

        ri = r0 - np.dot(finite_differences(x, d), d)

        # print i, np.linalg.norm(ri)
        # Checks stopping condition
        if np.linalg.norm(ri) < TOLERANCE:
            return x

        # Otherwise go on to find new direction
        b = float(np.dot(gradient(x).T, finite_differences(x, d)))
        d = - gradient(x) + b * d
        r0 = ri
    return x

Ejemplo n.º 4

0

Mostrar archivo

def mle_loss(B, W, mu, tau):
    """
    Calculate maximize log-likelihood
    Parameters:
    --------
    B: N by 1 vector. dtype: tf.float32
    W: N by N matrix (?). dtype: tf.float32
    mu: a scalar. dtype: tf.float32
    tau: a scalar, same as sigma in gaussian distribution. dtype: tf.float32
    """
    J = tf.gradient(B, [W])
    _B = tf.math.squre(B[1:] - B[:-1] - mu) - tf.math.log(tau)
    # TODO: multiply by Jaccobia matrix, unclear
    log_prob = _B * tf.linalg.det(J)

    neg_log_likelihood = tf.reduce_sum(log_prob)

    return neg_log_likelihood

Ejemplo n.º 5

0

Mostrar archivo

def wasserstein_loss(real_scores, fake_scores):
    batch_size = real_scores.shape[0]

    avg_real_scores = tf.math.reduce_mean(real_scores)
    avg_fake_scores = tf.math.reduce_mean(avg_fake_scores)

    gen_loss = -avg_fake_score

    alpha = tf.random.uniform([batch_size, 1, 1, 1])
    interpolated = (alpha * generated) + ((1 - alpha) * real)
    critic_interpolated = discriminator(interpolated)
    critic_gradient = tf.gradient(critic_interpolated, interpolated)
    norm_critic_gradient = tf.math.sqrt(
        tf.reduce_sum(tf.math.square(critic_gradient), [1, 2, 3]))
    norm_critic_center = norm_critic_gradient - 1
    gradient_penalty = tf.math.square(norm_critic_center)

    discrim_loss = -avg_real_scores + avg_fake_scores + (gp_weight *
                                                         gradient_penalty)

    return gen_loss, discrim_loss

Ejemplo n.º 6

0

Mostrar archivo

def wgangp_loss(logits_real, logits_fake, batch_size, x, G_sample):
    """Compute the WGAN-GP loss.
    
    Inputs:
    - logits_real: Tensor, shape [batch_size, 1], output of discriminator
        Log probability that the image is real for each real image
    - logits_fake: Tensor, shape[batch_size, 1], output of discriminator
        Log probability that the image is real for each fake image
    - batch_size: The number of examples in this batch
    - x: the input (real) images for this batch
    - G_sample: the generated (fake) images for this batch [batch_size,784]
    
    Returns:
    - D_loss: discriminator loss scalar
    - G_loss: generator loss scalar
    """
    # TODO: compute D_loss and G_loss
    D_loss = tf.reduce_mean(logits_fake-logits_real)
    G_loss = -tf.reduce_mean(logits_fake)

    # lambda from the paper
    lam = 10
    
    # random sample of batch_size (tf.random_uniform)
    eps = tf.random_uniform([batch_size,1])
    x_hat = eps * x + (1-eps)* G_sample

    # Gradients of Gradients is kind of tricky!
    with tf.variable_scope('',reuse=True) as scope:
        D_x_hat = discriminator(x_hat)
        grad_D_x_hat = tf.gradient(D_x_hat,x_hat)[0]

    grad_norm = tf.norm(grad_D_x_hat,axis = 1)
    grad_pen = lam * tf.reduce_mean((grad_norm-1)**2)

    D_loss += grad_pen

    return D_loss, G_loss

Ejemplo n.º 7

0

Mostrar archivo

def network_mnist(images, labels,
                  mode):  #features=images,labels,mode=TEST or TRAIN
    # Input Layer
    input_layer = tf.reshape(images["x"], [-1, 28, 28, 1])

    # Convolutional Layer #1
    conv1 = tf.layers.conv2d(inputs=input_layer,
                             filters=32,
                             kernel_size=[5, 5],
                             padding="same",
                             activation=tf.nn.relu)

    # Pooling Layer #1
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    # Convolutional Layer #2 and Pooling Layer #2
    conv2 = tf.layers.conv2d(inputs=pool1,
                             filters=64,
                             kernel_size=[5, 5],
                             padding="same",
                             activation=tf.nn.relu)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

    # Dense Layer
    pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
    dense = tf.layers.dense(inputs=pool2_flat,
                            units=128,
                            activation=tf.nn.relu)
    dropout = tf.layers.dropout(inputs=dense,
                                rate=0.4,
                                training=mode == tf.estimator.ModeKeys.TRAIN)

    # Logits Layer
    logits = tf.layers.dense(inputs=dropout, units=10)

    #Returns logits and representer
    return logits, tf.gradient(dense, input_layer)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: losses.py Proyecto: kose-y/dist-primal-dual

 def eval_deriv(self, bhat, b):
     grads = tf.gradient(self.eval(bhat, b), bhat)
     return grads[0]

Ejemplo n.º 9

0

Mostrar archivo

	def __init__(self, scope, globalAC):

		# in the __init__ , we defined some important variables to make some function
		# such as define loss, train_op
		# define some important tensor graph ...

		if scope == GLOBAL_NET_SCOPE: # let us make a global net

			with tf.variable_scope(scope):

				# give me some placeholders, come on !
				self.s = tf.placeholder(tf.float32, [None, N_S],'S')

				# the network will return para according to self.s
				# para of action net and critic net
				self.a_para, self.c_para = self._build_net(scope)[-2:]

		else: # let us make a local worker network
			with tf.variable_scope(scope):

				# give me some placeholder to give the net

				# this is the input of net
				self.s = self.s = tf.placeholder(tf.float32, [None, N_S],'S')

				# this is the action from memory
				self.a_memory = tf.placeholder(tf.float32, [None, A_S],'A')

				# this is the value target of q_value
				self.v_target = tf.placeholder(tf.float32, [None, 1],'v_target')

				# the network will return para according to self.s
				# para of action net and critic net
				# mu and sigma are the output about chosen action from actio_net
				# mu and sigma are the parameters of a normal distribution
				# self.v is the value of this statement
				mu, sigma, self.v, self.a_para, self.c_para = self._build_net(scope)

				# we need self,v_target and self.v to grt c_loss 
				td = tf.subtract(self.v_target, self.v, name ='td_error')
				# this is the the loss for q_learning , for the train_operation of critic_net

				with tf.variable_scope('c_loss'):
					self.c_loss = tf.reduce_mean(tf.squared(td))


				with tf.variable_scope('get_action_distribution'):
					mu = mu*A_BOUND[1]
					sigma += 1e-4
					normal_dist = tf.distributions.Normal(mu, sigma)


				with tf.variable_scope('a_loss'):
					# we need the action from memory to get a_loss
					log_prob = normal.dist.log_prob(self.a_memory)

					error = log_prob*td

					entropy = normal_dist.entropy() # encourage exploration

					error = ENTROPY_BETA * entropy + error

					self.a_loss = tf.reduce_mean(error)

				with tf.variable_scope('chosen_action'):
					# use the action_net of local net to choose action
					self.a = tf.clip_by_value(
											tf.squeeze(
														normal_dist.sample(1),
														axis = 0
														),
											A_BOUND[0],
											A_BOUND[1]
											)

				with tf.variable_scope('local_gradient'):
					# get the gradient of local net
					# to train local network and update global network
					self.a_grad = tf.gradient(self.a_loss, self.a_para)
					self.c_grad = tf.gradient(self.c_loss, self.c_para)

				with tf.variable_scope('sync'):
					# todo


				with tf.variable_scope('pull'):
					# pull the para of global action_net to the local action_net
					self.pull_a_para_op = [local_para.assign(global_para) for local_para, global_para in zip(self.a_para, globalAC.a_para)]

					# pull the para of global critic_net to the local critic_net
					self.pull_c_para_op = [local_para.assign(global_para) for local_para, global_para in zip(self.c_para, globalAC.c_para)]


				with tf.variable_scope('push'):
					# push the gradients of training to the global net
					# use the gradients caculated from local net to train global net

					self.update_gradient_action_op = optimizer_action.apply_gradients(zip(self.a_grad, globalAC.a_para))
					self.update_gradient_critic_op = optimizer_critic.apply_gradients(zip(self.c_para, globalAC.c_para))



	def _build_net(self, scope):
		# to define a network structure for action_net ,critic_net in global and local network
		w_init = tf.random_normal_initializer(0.0, 0.1)

		with tf.variable_scope('actor'):
			# we will get some normal_distributions of action, number of distributions is N_A
			output_a = tf.layers.dense(
										self.s,
										20,
										tf.nn.relu6,
										kernel_initializer = w_init,
										name = 'output_a'
										)

			mu = tf.layers.dense(  # get the mu of a normal distribution of action, dim of mu is N_A
									output_a,
									N_A,
									tf.nn.tanh,
									kernel_initializer = w_init,
									name = 'mu'
								)

			sigma = tf.layers.dense( # get the sigma of a normal distribution of action, dim of sigma is N_A
									output_a,
									N_A,
									tf.nn.softplus,
									kernel_initializer = w_init,
									name = 'sigma'
								)

		with tf.variable_scope('critic'):
			output_c = tf.layers.dense(
										self.s,
										20,
										tf.nn.relu6,
										kernel_initializer = w_init,
										name = 'output_c'
										)

			v = tf.layers.dense(  # we get the value of this statement self.s
								output_c,
								1,
								kernel_initializer = w_init,
								name = 'v'
								)

		a_para = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = scope+'/actor')
		c_para = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = scope+'/critic')

		return mu, sigma, v, a_para, c_para

 
	def update_global(self, feed_dict): # push the gradients to the global net to train
		# to train global net using the gradiients caculated from local net

		SESS.run([self.update_gradient_action_op, self.update_gradient_critic_op], feed_dict)
		# some data is from placeholder

	def pull_global(self): #pull the new para from global net to local net
		SESS.run([self.pull_a_para_op, self.pull_c_para_op])

	def choose_action(self, s):
		# we need the statement of this moment to caculate a action
		s = s[np.new.axis, :]

		return SESS.run(self.a, {self.s:s})[0]

Ejemplo n.º 10

0

Mostrar archivo

Archivo: lstm_tf.py Proyecto: naifmeh/lstm_mouse

            tf.ones([self.batch_size, self.num_steps], dtype=tf.float32),
        )
        self.cost = tf.reduce_sum(loss)
        
        # self.relu_out = tf.nn.relu(tf.reshape(logits, [-1, coord_size]))
        # #self.softmax_out = tf.nn.softmax(tf.reshape(logits, [-1, coord_size]))
        # self.predict = tf.cast(self.relu_out, tf.int32)
        correct_prediction = tf.equal(self.dense, tf.reshape(self.input_obj.targets, [-1]))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        if not is_training:
            return
        
        self.learning_rate = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradient(self.cost, tvars), 5)
        optimizer = tf.train.AdamOtimizer(self.learning_rate)
        self.train_op = optimizer.apply_gradients(
            zip(grads, tvars),
            global_step=tf.contrib.framework.get_or_create_global_step())
        )

        self.new_lr = tf.placeholder(tf.float32, shape=[])
        self.lr_update = tf.assign(self.learning_rate, self.new_lr)

    def assign_lr(self, session, lr_value):
        session.run(self.lr_update)

def train(train_data, num_epochs, num_layer, batch_size, model_save_name,
         learning_rate=1.0, max_lr_epoch=10, lr_decay=0.93):

Ejemplo n.º 11

0

Mostrar archivo

Archivo: ModelBasedRL.py Proyecto: SherinGr/LML

score = tf.matmul(layer1, W2)
probability = tf.nn.sigmoid(score)

tvars = tf.trainable_variables()

input_y = tf.placeholder(name='input_y', shape=[None, 1], dtype=tf.float32)
advantages = tf.placeholder(name='reward_signal', dtype=tf.float32)
W1grad = tf.placeholder(name='batch_grad1', dtype=tf.float32)
W2grad = tf.placeholder(name='batch_grad2', dtype=tf.float32)
batchGrad = [W1grad, W2grad]

loglik = tf.log(input_y*(input_y-probability) + (1-input_y)*(input_y+probability))
adam = tf.train.AdamOptimizer(learning_rate=lr)
loss = -tf.reduce_mean(loglik * advantages)

newGrads = tf.gradient(loss, tvars)
updateGrads = adam.apply_gradients(zip(batchGrad, tvars))

""" Model Network """
h_model = 256

input_data = tf.placeholder(name='input_data', shape=[None, 5], dtype=tf.float32)
with tf.variable_scope('rnnlm'):
    # weights and biases
    softmax_w = tf.get_variable('softmax_w', shape=[h_model, 50])
    softmax_b = tf.get_variable('softmax_b', shape=[50])

previous_state = tf.placeholder(name='previous_state', shape=[None, 5], dtype=tf.float32)

W1M = tf.get_variable(name='W1M', shape=[5, h_model],
                      initializer=tf.contrib.layers.xavier_initializer())

Ejemplo n.º 12

0

Mostrar archivo

    def trainModel(self, on_policy=False, target=False):
        # sampling
        if on_policy:
            mini_batch = [self.memory[-1]]
        else:
            mini_batch = random.sample(self.memory, self.batch_size)

        states = [x[0] for x in mini_batch]
        actions = [[x[1]] for x in mini_batch]
        rewards = [x[2] for x in mini_batch]
        #next_states = [x[3] for x in mini_batch]
        mus = [x[3] for x in mini_batch]
        dones = [int(x[4]) for x in mini_batch]

        states = np.asarray(states)
        actions = np.asarray(actions)
        mus = np.asarray(mus)

        states = tf.convert_to_tensor(states, dtype=tf.float32)
        actions = tf.convert_to_tensor(actions, dtype=tf.int32)
        mus = tf.convert_to_tensor(mus, dtype=tf.float32)

        q = self.critic(states)
        pi = self.actor(states)
        pi_avg = self.polyak(states)

        #q_a = get_by_index(q, actions) # get_by_index might not work
        # might need to implement tf.gather another way

        #pi_a = get_by_index(pi, actions)

        a_index = tf.stack([tf.range(tf.shape(actions)[0]), actions[:, 0]],
                           axis=-1)

        q_a = tf.gather_nd(q, a_index)
        pi_a = tf.gather_nd(pi, a_index)

        v = tf.reduce_sum(q * pi, axis=-1)  # might need ,axis = -1

        rho = pi / (mu + 1e-6)
        rho_a = tf.gather_nd(rho, a_index)
        rho_bar = tf.minimum(1.0, rho_a)

        print(v, '\n ############################################## \n', dones)
        q_ret = v[-1] * dones[-1]

        q_rets = []

        for i in reversed(range(len(rewards))):
            q_ret = rewards[i] + self.gamma * q_ret
            q_rets.append(q_ret)
            q_ret = (rho_bar[i] * (q_ret - q_a[i])) + v[i]
            # (edit1?) need correction for when new sequence is beginning ??

        q_rets.reverse()
        #q_ret = tf.reshape(tf.stack(values=q_rets, axis=1), [-1])	# (edit1) in reference to seq_to_batch
        # OpenAI baseline a2c.utils
        print(q_ret)
        print('#############################################')
        print(q_ret.shape)
        q_ret = tf.expand_dims(tf.convert_to_tensor(q_ret, dtype=tf.float32),
                               axis=1)

        # adv = q_ret - v
        loss_f = -rho_bar * tf.log(pi_a + 1e-6) * (q_ret - v)
        #loss_f = tf.reduce_mean(loss_f)
        loss_bc = -tf.maximum((1 - c / rho), 0.0) * pi * tf.log(pi) * (
            q - v)  # note that tf.____ functions might need to be
        # tf.math.____
        # might need to reshape either q or v
        #loss_bc = tf.reduce_mean(loss_bc)
        loss_q = tf.reduce_mean(tf.square(tf.stop_gradient(q_ret) - q_a) * 0.5)

        # (edit1) in reference t

        g = tf.gradients(-(loss_f + loss_bc), pi)
        k = pi_avg / (pi + 1e-6)

        #k_dot_g = tf.reduce_sum(k*g, axis=-1)
        grad_pi = tf.maximum(0.0,
                             (tf.reduce_sum(k * g, axis=-1) - self.delta) /
                             (tf.reduce_sum(tf.square(k), axis=-1) + 1e-6))
        grad_pi = tf.gradient(grad_pi, self.actor.trainable_variables)
        grad_v = tf.gradients(loss_q, self.critic.trainable_variables)

        trainer_pi = tf.train.Adam(learning_rate=self.learning_rate)
        trainer_v = tf.train.Adam(learning_rate=self.learning_rate)

        trainer_pi.apply_gradients(grad_pi)
        trainer_v.apply_gradients(grad_v)

        self.update_polyak()

Ejemplo n.º 13

0

Mostrar archivo

    def __init__(self, scope, globalAC):

        # in the __init__ , we defined some important variables to make some function
        # such as define loss, train_op
        # define some important tensor graph ...

        if scope == GLOBAL_NET_SCOPE:  # let us make a global net

            with tf.variable_scope(scope):

                # give me some placeholders, come on !
                self.s = tf.placeholder(tf.float32, [None, N_S], 'S')

                # the network will return para according to self.s
                # para of action net and critic net
                self.a_para, self.c_para = self._build_net(scope)[-2:]

        else:  # let us make a local worker network
            with tf.variable_scope(scope):

                # give me some placeholder to give the net

                # this is the input of net
                self.s = self.s = tf.placeholder(tf.float32, [None, N_S], 'S')

                # this is the action from memory
                self.a_memory = tf.placeholder(tf.float32, [None, A_S], 'A')

                # this is the value target of q_value
                self.v_target = tf.placeholder(tf.float32, [None, 1],
                                               'v_target')

                # the network will return para according to self.s
                # para of action net and critic net
                # mu and sigma are the output about chosen action from actio_net
                # mu and sigma are the parameters of a normal distribution
                # self.v is the value of this statement
                mu, sigma, self.v, self.a_para, self.c_para = self._build_net(
                    scope)

                # we need self,v_target and self.v to grt c_loss
                td = tf.subtract(self.v_target, self.v, name='td_error')
                # this is the the loss for q_learning , for the train_operation of critic_net

                with tf.variable_scope('c_loss'):
                    self.c_loss = tf.reduce_mean(tf.squared(td))

                with tf.variable_scope('get_action_distribution'):
                    mu = mu * A_BOUND[1]
                    sigma += 1e-4
                    normal_dist = tf.distributions.Normal(mu, sigma)

                with tf.variable_scope('a_loss'):
                    # we need the action from memory to get a_loss
                    log_prob = normal.dist.log_prob(self.a_memory)

                    error = log_prob * td

                    entropy = normal_dist.entropy()  # encourage exploration

                    error = ENTROPY_BETA * entropy + error

                    self.a_loss = tf.reduce_mean(error)

                with tf.variable_scope('chosen_action'):
                    # use the action_net of local net to choose action
                    self.a = tf.clip_by_value(
                        tf.squeeze(normal_dist.sample(1), axis=0), A_BOUND[0],
                        A_BOUND[1])

                with tf.variable_scope('local_gradient'):
                    # get the gradient of local net
                    # to train local network and update global network
                    self.a_grad = tf.gradient(self.a_loss, self.a_para)
                    self.c_grad = tf.gradient(self.c_loss, self.c_para)

                with tf.variable_scope('sync'):
                    # todo
                    pass

                with tf.variable_scope('pull'):
                    # pull the para of global action_net to the local action_net
                    self.pull_a_para_op = [
                        local_para.assign(global_para)
                        for local_para, global_para in zip(
                            self.a_para, globalAC.a_para)
                    ]

                    # pull the para of global critic_net to the local critic_net
                    self.pull_c_para_op = [
                        local_para.assign(global_para)
                        for local_para, global_para in zip(
                            self.c_para, globalAC.c_para)
                    ]

                with tf.variable_scope('push'):
                    # push the gradients of training to the global net
                    # use the gradients caculated from local net to train global net

                    self.update_gradient_action_op = optimizer_action.apply_gradients(
                        zip(self.a_grad, globalAC.a_para))
                    self.update_gradient_critic_op = optimizer_critic.apply_gradients(
                        zip(self.c_para, globalAC.c_para))

Ejemplo n.º 14

0

Mostrar archivo

 def backward(self, input, *args, **kwargs):
     if self.identity: return input
     if self.mas:
         return input * tf.cast(self.mask, tf.float32)
     else:
         return tf.gradient(self, self.input, input)[0]

Ejemplo n.º 15

0

Mostrar archivo

Archivo: pool.py Proyecto: joaogui1/SymJAX

 def backward(self, input):
     return tf.gradient(self, self.input, input)[0]

Ejemplo n.º 16

0

Mostrar archivo

Archivo: 3.py Proyecto: nivedn3/DL4CV-EE6132-

logits = tf.matmul(h_fc1,W_fc2) + b_fc2

#var = [noise]
var = [x_noise]

with tf.name_scope("cross_entropy"):
  #cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits,labels = y))

  cost = tf.reduce_sum(tf.multiply(logits,y))
  print cost
  tf.summary.scalar('cross entropy',cost)

with tf.name_scope("train"):

  #k = tf.Variable()
  grad = tf.gradient(cost,x_noise)
  mean, var = tf.nn.moments(grad, axes=[0])
  learning_rate = tf.reciprocal(tf.sqrt(var))
  train_step = tf.train.AdamOptimizer(learning_rate).minimize(-cost,var_list=var)



with tf.name_scope("accuracy"):
  correct_prediction = tf.equal(tf.argmax(logits,1),tf.argmax(y,1))
  accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
  tf.summary.scalar('accuracy',accuracy)

#with tf.name_scope("test_accuracy"):
#  correct_prediction = tf.equal(tf.argmax(logits,1),tf.argmax(y,1))
#  test_accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
#  tf.summary.scalar('test_accuracy',test_accuracy)

Ejemplo n.º 17

0

Mostrar archivo

def mle_gradient(loss, W, mu, tau, P0):
    return tf.gradient(loss, [W, mu, tau, P0])