def main(): tf.set_random_seed(10) with tf.Session() as sess: rnn_cell = tf.nn.rnn_cell.LSTMCell(10) # defining initial state initial_state = rnn_cell.zero_state(4, dtype=tf.float32) inputs = tf.Variable(tf.random_uniform(shape = (4, 30, 100)), name='input') inputs = tf.identity(inputs, "input_node") # 'state' is a tensor of shape [batch_size, cell_state_size] outputs, state = tf.nn.dynamic_rnn(rnn_cell, inputs, initial_state=initial_state, dtype=tf.float32) y1 = tf.identity(outputs, 'outputs') y2 = tf.identity(state, 'state') t1 = tf.ones([4, 30, 10]) t2 = tf.ones([4, 10]) loss = tf.reduce_sum((y1 - t1) * (y1 - t1)) + tf.reduce_sum((y2 - t2) * (y2 - t2)) tf.identity(loss, name = "lstm_loss") # tf.summary.FileWriter('/tmp/log', tf.get_default_graph()) net_outputs = map(lambda x: tf.get_default_graph().get_tensor_by_name(x), argv[2].split(',')) run_model(net_outputs, argv[1], None, argv[3] == 'True')
def _build_predict(self, Xnew, full_cov=False): """ Compute the mean and variance of the latent function at some new points Xnew. For a derivation of the terms in here, see the associated SGPR notebook. """ num_inducing = len(self.feature) err = self.Y - self.mean_function(self.X) Kuf = self.feature.Kuf(self.kern, self.X) Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level) Kus = self.feature.Kuf(self.kern, Xnew) sigma = tf.sqrt(self.likelihood.variance) L = tf.cholesky(Kuu) A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma B = tf.matmul(A, A, transpose_b=True) + tf.eye(num_inducing, dtype=settings.float_type) LB = tf.cholesky(B) Aerr = tf.matmul(A, err) c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True) tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True) mean = tf.matmul(tmp2, c, transpose_a=True) if full_cov: var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \ - tf.matmul(tmp1, tmp1, transpose_a=True) shape = tf.stack([1, 1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 2), shape) else: var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \ - tf.reduce_sum(tf.square(tmp1), 0) shape = tf.stack([1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 1), shape) return mean + self.mean_function(Xnew), var
def _compute_average_correct(input_layer, labels, per_example_weights, topk=1): """Returns the numerator and denominator of classifier accuracy.""" dtype = tf.float32 if topk == 1: true_labels = tf.argmax(input_layer, 1) predictions = tf.argmax(labels, 1) in_topk = tf.equal(true_labels, predictions) else: _, true_labels = tf.nn.top_k(labels, k=1) true_labels = tf.reshape(true_labels, [-1]) in_topk = tf.nn.in_top_k(tf.cast(input_layer, dtype), true_labels, k=topk) correct_predictions = tf.cast(in_topk, dtype) # If individual examples are weighted, then we want to normalize by that. if per_example_weights: per_example_weights = tf.convert_to_tensor(per_example_weights, name='per_example_weights') if ((input_layer.get_shape() and not per_example_weights.get_shape( ).is_compatible_with([input_layer.get_shape().dims[0]])) or per_example_weights.get_shape().ndims != 1): raise ValueError( 'per_example_weights must be a vector of the same length as ' 'labels: was %s but expected (%s,)' % ( per_example_weights.get_shape()), input_layer[0]) float_weights = tf.cast(per_example_weights, dtype) # TODO(eiderman): This should use an op that doesn't support broadcasting. correct_predictions *= float_weights num_examples = tf.reduce_sum(float_weights) else: # shape only holds ints, but we want to always return the same type # for num_examples to make everything compatible. num_examples = tf.cast(tf.gather(tf.shape(input_layer), 0), dtype) return tf.reduce_sum(correct_predictions), num_examples
def make_losses(self, pred_relevant, targets_preprocessed, objective_indices, objective_coeffs): # make a loss function and compute some summary numbers per_target_loss = my_ops.mse_ignore_nans(pred_relevant, targets_preprocessed, reduction_indices=0) loss = tf.reduce_sum(per_target_loss) # compute objective value, just for logging purposes # TODO add multiplication by the objective_coeffs (somehow not trivial) obj = tf.reduce_sum(self.postprocess_predictions(targets_preprocessed), 1) #obj = tf.sum(self.postprocess_predictions(targets_preprocessed[:,objective_indices]) * objective_coeffs[None,:], axis=1) obj_nonan = tf.where(tf.is_nan(obj), tf.zeros_like(obj), obj) num_valid_targets = tf.reduce_sum(1-tf.cast(tf.is_nan(obj), tf.float32)) mean_obj = tf.reduce_sum(obj_nonan) / num_valid_targets # summaries obj_sum = tf.summary.scalar("objective_todo", mean_obj) #TODO per_target_loss_sums = [] #per_target_loss_sums = [tf.summary.scalar(name, loss) for name,loss in zip(self.target_names,per_target_loss)] loss_sum = tf.summary.scalar("full loss", loss) #self.per_target_loss = tf.get_variable('avg_targets', [self.target_dim], initializer=tf.constant_initializer(value=0.)) full_loss = loss errs_to_print = [loss] short_summary = [loss_sum] detailed_summary = per_target_loss_sums + [obj_sum] return full_loss, errs_to_print, short_summary, detailed_summary
def routing(input, b_IJ): ''' The routing algorithm. Args: input: A Tensor with [batch_size, num_caps_l=1152, 1, length(u_i)=8, 1] shape, num_caps_l meaning the number of capsule in the layer l. Returns: A Tensor of shape [batch_size, num_caps_l_plus_1, length(v_j)=16, 1] representing the vector output `v_j` in the layer l+1 Notes: u_i represents the vector output of capsule i in the layer l, and v_j the vector output of capsule j in the layer l+1. ''' # W: [num_caps_j, num_caps_i, len_u_i, len_v_j] W = tf.get_variable('Weight', shape=(1, 1152, 10, 8, 16), dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=cfg.stddev)) # Eq.2, calc u_hat # do tiling for input and W before matmul # input => [batch_size, 1152, 10, 8, 1] # W => [batch_size, 1152, 10, 8, 16] input = tf.tile(input, [1, 1, 10, 1, 1]) W = tf.tile(W, [cfg.batch_size, 1, 1, 1, 1]) assert input.get_shape() == [cfg.batch_size, 1152, 10, 8, 1] # in last 2 dims: # [8, 16].T x [8, 1] => [16, 1] => [batch_size, 1152, 10, 16, 1] u_hat = tf.matmul(W, input, transpose_a=True) assert u_hat.get_shape() == [cfg.batch_size, 1152, 10, 16, 1] # line 3,for r iterations do for r_iter in range(cfg.iter_routing): with tf.variable_scope('iter_' + str(r_iter)): # line 4: # => [1, 1152, 10, 1, 1] c_IJ = tf.nn.softmax(b_IJ, dim=2) c_IJ = tf.tile(c_IJ, [cfg.batch_size, 1, 1, 1, 1]) assert c_IJ.get_shape() == [cfg.batch_size, 1152, 10, 1, 1] # line 5: # weighting u_hat with c_IJ, element-wise in the last two dims # => [batch_size, 1152, 10, 16, 1] s_J = tf.multiply(c_IJ, u_hat) # then sum in the second dim, resulting in [batch_size, 1, 10, 16, 1] s_J = tf.reduce_sum(s_J, axis=1, keep_dims=True) assert s_J.get_shape() == [cfg.batch_size, 1, 10, 16, 1] # line 6: # squash using Eq.1, v_J = squash(s_J) assert v_J.get_shape() == [cfg.batch_size, 1, 10, 16, 1] # line 7: # reshape & tile v_j from [batch_size ,1, 10, 16, 1] to [batch_size, 10, 1152, 16, 1] # then matmul in the last tow dim: [16, 1].T x [16, 1] => [1, 1], reduce mean in the # batch_size dim, resulting in [1, 1152, 10, 1, 1] v_J_tiled = tf.tile(v_J, [1, 1152, 1, 1, 1]) u_produce_v = tf.matmul(u_hat, v_J_tiled, transpose_a=True) assert u_produce_v.get_shape() == [cfg.batch_size, 1152, 10, 1, 1] b_IJ += tf.reduce_sum(u_produce_v, axis=0, keep_dims=True)
def _compute_precision_recall(input_layer, labels, threshold, per_example_weights): """Returns the numerator of both, the denominator of precision and recall.""" # To apply per_example_weights, we need to collapse each row to a scalar, but # we really want the sum. labels.get_shape().assert_is_compatible_with(input_layer.get_shape()) relevant = tf.to_float(tf.greater(labels, 0)) retrieved = tf.to_float(tf.greater(input_layer, threshold)) selected = relevant * retrieved if per_example_weights: per_example_weights = tf.convert_to_tensor(per_example_weights, name='per_example_weights') if selected.get_shape().dims: per_example_weights.get_shape().assert_is_compatible_with( [selected.get_shape().dims[0]]) else: per_example_weights.get_shape().assert_is_compatible_with([None]) per_example_weights = tf.to_float(tf.greater(per_example_weights, 0)) selected = functions.reduce_batch_sum(selected) * per_example_weights relevant = functions.reduce_batch_sum(relevant) * per_example_weights retrieved = functions.reduce_batch_sum(retrieved) * per_example_weights sum_relevant = tf.reduce_sum(relevant) sum_retrieved = tf.reduce_sum(retrieved) selected = tf.reduce_sum(selected) return selected, sum_retrieved, sum_relevant
def sq_dist(boxlist1, boxlist2, scope=None): """Computes the pairwise squared distances between box corners. This op treats each box as if it were a point in a 4d Euclidean space and computes pairwise squared distances. Mathematically, we are given two matrices of box coordinates X and Y, where X(i,:) is the i'th row of X, containing the 4 numbers defining the corners of the i'th box in boxlist1. Similarly Y(j,:) corresponds to boxlist2. We compute Z(i,j) = ||X(i,:) - Y(j,:)||^2 = ||X(i,:)||^2 + ||Y(j,:)||^2 - 2 X(i,:)' * Y(j,:), Args: boxlist1: BoxList holding N boxes boxlist2: BoxList holding M boxes scope: name scope. Returns: a tensor with shape [N, M] representing pairwise distances """ with tf.name_scope(scope, 'SqDist'): sqnorm1 = tf.reduce_sum(tf.square(boxlist1.get()), 1, keep_dims=True) sqnorm2 = tf.reduce_sum(tf.square(boxlist2.get()), 1, keep_dims=True) innerprod = tf.matmul(boxlist1.get(), boxlist2.get(), transpose_a=False, transpose_b=True) return sqnorm1 + tf.transpose(sqnorm2) - 2.0 * innerprod
def testPartialShapes(self): np.random.seed(1618) # Input shape is unknown. reduction_axes = [1, 2] c_unknown = tf.placeholder(tf.float32) s_unknown = tf.reduce_sum(c_unknown, reduction_axes) self.assertEqual(tensor_shape.unknown_shape(), s_unknown.get_shape()) np_input = np.random.randn(3, 3, 3) self._compareAll(np_input, reduction_axes, {c_unknown: np_input}) # Input shape only has known rank. c_known_rank = tf.placeholder(tf.float32) c_known_rank.set_shape(tensor_shape.unknown_shape(ndims=3)) s_known_rank = tf.reduce_sum(c_known_rank, reduction_axes, keep_dims=True) self.assertEqual(3, s_known_rank.get_shape().ndims) np_input = np.random.randn(3, 3, 3) self._compareAll(np_input, reduction_axes, {c_known_rank: np_input}) # Reduction indices are unknown. unknown_indices = tf.placeholder(tf.int32) c_unknown_indices = tf.constant([[10.0], [20.0]]) s_unknown_indices = tf.reduce_sum(c_unknown_indices, unknown_indices, keep_dims=False) self.assertEqual(tensor_shape.unknown_shape(), s_unknown_indices.get_shape()) s_unknown_indices_keep = tf.reduce_sum(c_unknown_indices, unknown_indices, keep_dims=True) self.assertEqual(2, s_unknown_indices_keep.get_shape().ndims)
def testGlobalPool1d(self): x1 = np.random.rand(5, 4, 11) no_mask = np.ones((5, 4)) full_mask = np.zeros((5, 4)) x1_ = tf.Variable(x1, dtype=tf.float32) no_mask_ = tf.Variable(no_mask, dtype=tf.float32) full_mask_ = tf.Variable(full_mask, dtype=tf.float32) none_mask_max = common_layers.global_pool_1d(x1_) no_mask_max = common_layers.global_pool_1d(x1_, mask=no_mask_) result1 = tf.reduce_sum(none_mask_max - no_mask_max) full_mask_max = common_layers.global_pool_1d(x1_, mask=full_mask_) result2 = tf.reduce_sum(full_mask_max) none_mask_avr = common_layers.global_pool_1d(x1_, "AVR") no_mask_avr = common_layers.global_pool_1d(x1_, "AVR", no_mask_) result3 = tf.reduce_sum(none_mask_avr - no_mask_avr) full_mask_avr = common_layers.global_pool_1d(x1_, "AVR", full_mask_) result4 = tf.reduce_sum(full_mask_avr) self.evaluate(tf.global_variables_initializer()) actual = self.evaluate([result1, result2, result3, result4]) self.assertAllEqual(actual[:3], [0.0, 0.0, 0.0])
def entropy(self, n, p): # Note that given n and p where p is a probability vector of # length k, the entropy requires a sum over all # possible configurations of a k-vector which sums to n. It's # expensive. # http://stackoverflow.com/questions/36435754/generating-a-numpy-array-with-all-combinations-of-numbers-that-sum-to-less-than sess = tf.Session() n = sess.run(tf.cast(tf.squeeze(n), dtype=tf.int32)) sess.close() p = tf.cast(tf.squeeze(p), dtype=tf.float32) if isinstance(n, np.int32): k = get_dims(p)[0] max_range = np.zeros(k, dtype=np.int32) + n x = np.array([i for i in product(*(range(i+1) for i in max_range)) if sum(i)==n]) logpmf = self.logpmf(x, n, p) return tf.reduce_sum(tf.mul(tf.exp(logpmf), logpmf)) else: out = [] for j in range(n.shape[0]): k = get_dims(p)[0] max_range = np.zeros(k, dtype=np.int32) + n[j] x = np.array([i for i in product(*(range(i+1) for i in max_range)) if sum(i)==n[j]]) logpmf = self.logpmf(x, n[j], p[j, :]) out += [tf.reduce_sum(tf.mul(tf.exp(logpmf), logpmf))] return tf.pack(out)
def soft_triplet_loss(anchor, positive, negative, extra=True, scope="soft_triplet_loss"): r"""Loss for triplet networks as described in the paper: `Deep Metric Learning using Triplet Network <https://arxiv.org/abs/1412.6622>`_ by Hoffer et al. It is a softmax loss using :math:`(anchor-positive)^2` and :math:`(anchor-negative)^2` as logits. Args: anchor (tf.Tensor): anchor feature vectors of shape [Batch, N]. positive (tf.Tensor): features of positive match of the same shape. negative (tf.Tensor): features of negative match of the same shape. extra (bool): also return distances for pos and neg. Returns: tf.Tensor: triplet-loss as scalar (and optionally average_pos_dist, average_neg_dist) """ eps = 1e-10 with tf.name_scope(scope): d_pos = tf.sqrt(tf.reduce_sum(tf.square(anchor - positive), 1) + eps) d_neg = tf.sqrt(tf.reduce_sum(tf.square(anchor - negative), 1) + eps) logits = tf.stack([d_pos, d_neg], axis=1) ones = tf.ones_like(tf.squeeze(d_pos), dtype="int32") loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=ones)) if extra: pos_dist = tf.reduce_mean(d_pos, name='pos-dist') neg_dist = tf.reduce_mean(d_neg, name='neg-dist') return loss, pos_dist, neg_dist else: return loss
def __init__(self, nA, learning_rate,decay,grad_clip,entropy_beta, state_shape=[84,84,4], master=None, device_name='/gpu:0', scope_name='master'): with tf.device(device_name) : self.state = tf.placeholder(tf.float32,[None]+state_shape) block, self.scope = ActorCritic._build_shared_block(self.state,scope_name) self.policy, self.log_softmax_policy = ActorCritic._build_policy(block,nA,scope_name) self.value = ActorCritic._build_value(block,scope_name) self.train_vars = sorted(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope.name), key=lambda v:v.name) if( master is not None ) : self.sync_op= self._sync_op(master) self.action = tf.placeholder(tf.int32,[None,]) self.target_value = tf.placeholder(tf.float32,[None,]) advantage = self.target_value - self.value entropy = tf.reduce_sum(-1. * self.policy * self.log_softmax_policy,axis=1) log_p_s_a = tf.reduce_sum(self.log_softmax_policy * tf.one_hot(self.action,nA),axis=1) self.policy_loss = tf.reduce_mean(tf.stop_gradient(advantage)*log_p_s_a) self.entropy_loss = tf.reduce_mean(entropy) self.value_loss = tf.reduce_mean(advantage**2) loss = -self.policy_loss - entropy_beta* self.entropy_loss + self.value_loss self.gradients = tf.gradients(loss,self.train_vars) clipped_gs = [tf.clip_by_average_norm(g,grad_clip) for g in self.gradients] self.train_op = master.optimizer.apply_gradients(zip(clipped_gs,master.train_vars)) else : #self.optimizer = tf.train.AdamOptimizer(learning_rate,beta1=BETA) self.optimizer = tf.train.RMSPropOptimizer(learning_rate,decay=decay,use_locking=True)
def multilinear_square_product(emb, tuples, l2=0): """ Compute the square-product of real vectors at selected embeddings. This is the sum over all dimensions of the square of summed embedding vectors. :param emb: embedding matrix of size [n_emb, rank] containing float numbers :param tuples: tuple matrix of size [n_t, arity] containing integers :param l2: optional l2 regularization strength that is added to the score. If it is different from 0, the function returns a pair (pred, l2norm) where pred is the sample prediction, but l2norm is the l2 norm of the selected embeddings :return: the multilinear square product between selected embeddings S[i] = sum_k ( sum_j E[I[i,k],j] )^2 >>> emb = [[12., 0, 0], [0, 1, 0], [-1, 1, 1]] >>> idx = tf.Variable([[1,0,0],[1,1,0]]) >>> g = multilinear_square_product(emb, idx) >>> print(tf_eval(g)) [ 577. 148.] """ emb_sel = tf.gather(emb, tuples) pred = tf.reduce_sum(tf.square(tf.reduce_sum(emb_sel, 1)), 1) if l2 == 0: # unregularized prediction ==> returns only the predictions return pred else: # l2 regularization of the selected embeddings reg = l2 * tf.reduce_sum(tf.square(emb_sel)) return pred, reg
def _compute_log_moment(self, sigma, q, moment_order): """Compute high moment of privacy loss. Args: sigma: the noise sigma, in the multiples of the sensitivity. q: the sampling ratio. moment_order: the order of moment. Returns: log E[exp(moment_order * X)] """ assert moment_order <= self._max_moment_order, ("The order of %d is out " "of the upper bound %d." % (moment_order, self._max_moment_order)) binomial_table = tf.slice(self._binomial_table, [moment_order, 0], [1, moment_order + 1]) # qs = [1 q q^2 ... q^L] = exp([0 1 2 ... L] * log(q)) qs = tf.exp(tf.constant([i * 1.0 for i in range(moment_order + 1)], dtype=tf.float64) * tf.cast( tf.log(q), dtype=tf.float64)) moments0 = self._differential_moments(sigma, 0.0, moment_order) term0 = tf.reduce_sum(binomial_table * qs * moments0) moments1 = self._differential_moments(sigma, 1.0, moment_order) term1 = tf.reduce_sum(binomial_table * qs * moments1) return tf.squeeze(tf.log(tf.cast(q * term0 + (1.0 - q) * term1, tf.float64)))
def _create_loss_optimizer(self): # The loss is composed of two terms: # 1.) The reconstruction loss (the negative log probability # of the input under the reconstructed Bernoulli distribution # induced by the decoder in the data space). # This can be interpreted as the number of "nats" required # for reconstructing the input when the activation in latent # is given. # Adding 1e-10 to avoid evaluatio of log(0.0) reconstr_loss = \ -tf.reduce_sum(self.x * tf.log(1e-10 + self.x_reconstr_mean) + (1 - self.x) * tf.log(1e-10 + 1 - self.x_reconstr_mean), 1) # 2.) The latent loss, which is defined as the Kullback Leibler divergence # between the distribution in latent space induced by the encoder on # the data and some prior. This acts as a kind of regularizer. # This can be interpreted as the number of "nats" required # for transmitting the the latent space distribution given # the prior. latent_loss = -0.5 * tf.reduce_sum(1 + self.z_log_sigma_sq - tf.square(self.z_mean) - tf.exp(self.z_log_sigma_sq), 1) self.cost = tf.reduce_mean(reconstr_loss + latent_loss) # average over batch # Use ADAM optimizer self.optimizer = \ tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost)
def multilinear(emb, tuples, l2=0): """ Compute the dot product of real vectors at selected embeddings Note that this model is called Cannonical Parafac (CP), and corresponds to the "distmult" model in some scientific publications on relational database factorization. :param emb: embedding matrix of size [n_emb, rank] containing float numbers :param tuples: tuple matrix of size [n_t, arity] containing integers :param l2: optional l2 regularization strength that is added to the score. If it is different from 0, the function returns a pair (pred, l2norm) where pred is the sample prediction, but l2norm is the l2 norm of the selected embeddings :return: the multilinear dot product between selected embeddings S[i] = sum_j prod_k E[I[i,k],j] >>> emb = [[1., 1, 0, 3], [0, 1, 0, 1], [-1, 1, 1, 5]] >>> idx = tf.Variable([[0, 1], [1, 0], [0, 2], [2, 0], [1, 2], [2, 1]]) >>> g = multilinear(emb, idx) >>> print(tf_eval(g)) [ 4. 4. 15. 15. 6. 6.] """ emb_sel = tf.gather(emb, tuples) pred = tf.reduce_sum(tf.reduce_prod(emb_sel, 1), 1) if l2 == 0: # unregularized prediction ==> returns only the predictions return pred else: # l2 regularization of the selected embeddings reg = l2 * tf.reduce_sum(tf.square(emb_sel)) return pred, reg
def kl_multivariate_normal(loc_one, scale_one, loc_two=0.0, scale_two=1.0): """Calculate the KL of multivariate normal distributions with diagonal covariances. Parameters ---------- loc_one : tf.Tensor A 0-D tensor, 1-D tensor of length n, or 2-D tensor of shape M x n where each row represents the mean of a n-dimensional Gaussian. scale_one : tf.Tensor A tensor of same shape as ``loc_one``, representing the standard deviation. loc_two : tf.Tensor, optional A tensor of same shape as ``loc_one``, representing the mean of another Gaussian. scale_two : tf.Tensor, optional A tensor of same shape as ``loc_one``, representing the standard deviation of another Gaussian. Returns ------- tf.Tensor For 0-D or 1-D tensor inputs, outputs the 0-D tensor ``KL( N(z; loc_one, scale_one) || N(z; loc_two, scale_two) )`` For 2-D tensor inputs, outputs the 1-D tensor ``[KL( N(z; loc_one[m,:], scale_one[m,:]) || N(z; loc_two[m,:], scale_two[m,:]) )]_{m=1}^M`` Raises ------ InvalidArgumentError If the location variables have Inf or NaN values, or if the scale variables are not positive. """ dependencies = [tf.verify_tensor_all_finite(loc_one, msg=''), tf.verify_tensor_all_finite(loc_two, msg=''), tf.assert_positive(scale_one), tf.assert_positive(scale_two)] loc_one = control_flow_ops.with_dependencies(dependencies, loc_one) scale_one = control_flow_ops.with_dependencies(dependencies, scale_one) loc_one = tf.cast(loc_one, tf.float32) scale_one = tf.cast(scale_one, tf.float32) if loc_two == 0.0 and scale_two == 1.0: # With default arguments, we can avoid some intermediate computation. out = tf.square(scale_one) + tf.square(loc_one) - \ 1.0 - 2.0 * tf.log(scale_one) else: loc_two = control_flow_ops.with_dependencies(dependencies, loc_two) scale_two = control_flow_ops.with_dependencies(dependencies, scale_two) loc_two = tf.cast(loc_two, tf.float32) scale_two = tf.cast(scale_two, tf.float32) out = tf.square(scale_one/scale_two) + \ tf.square((loc_two - loc_one)/scale_two) - \ 1.0 + 2.0 * tf.log(scale_two) - 2.0 * tf.log(scale_one) if len(out.get_shape()) <= 1: # scalar or vector return 0.5 * tf.reduce_sum(out) else: # matrix return 0.5 * tf.reduce_sum(out, 1)
def __init__(self, session, np_matrix, rank, learning_rate=0.1): matrix = tf.constant(np_matrix, dtype=tf.float32) scale = 2 * np.sqrt(np_matrix.mean() / rank) initializer = tf.random_uniform_initializer(maxval=scale) with tf.device('/job:ps/task:0'): self.matrix_W = tf.get_variable( "W", (np_matrix.shape[0], rank), initializer=initializer ) with tf.device("/job:ps/task:1"): self.matrix_H = tf.get_variable( "H", (rank, np_matrix.shape[1]), initializer=initializer ) matrix_WH = tf.matmul(self.matrix_W, self.matrix_H) f_norm = tf.reduce_sum(tf.pow(matrix - matrix_WH, 2)) nn_w = tf.reduce_sum(tf.abs(self.matrix_W) - self.matrix_W) nn_h = tf.reduce_sum(tf.abs(self.matrix_H) - self.matrix_H) constraint = INFINITY * (nn_w + nn_h) self.loss = f_norm + constraint self.constraint = constraint self.session = session self.optimizer = tf.train.GradientDescentOptimizer( learning_rate ).minimize(self.loss)
def e_step(o_mean, o_stdv, o_activations, votes): """The E-Step in EM Routing. :param o_mean: (24, 6, 6, 1, 32, 16) :param o_stdv: (24, 6, 6, 1, 32, 16) :param o_activations: (24, 6, 6, 1, 32, 1) :param votes: (24, 6, 6, 288, 32, 16) :return: rr """ o_p_unit0 = - tf.reduce_sum( tf.square(votes - o_mean) / (2 * tf.square(o_stdv)), axis=-1, keep_dims=True ) o_p_unit2 = - tf.reduce_sum( tf.log(o_stdv + epsilon), axis=-1, keep_dims=True ) # o_p is the probability density of the h-th component of the vote from i to j # (24, 6, 6, 1, 32, 16) o_p = o_p_unit0 + o_p_unit2 # rr: (24, 6, 6, 288, 32, 1) zz = tf.log(o_activations + epsilon) + o_p rr = tf.nn.softmax( zz, dim=len(zz.get_shape().as_list())-2 ) return rr
def top_1_and_5(predictions, labels): #test_size = FLAGS.test_size #tf.shape(predictions)[0] in_top1 = tf.to_float(tf.nn.in_top_k(predictions, labels, k=1)) in_top5 = tf.to_float(tf.nn.in_top_k(predictions, labels, k=5)) num_correct_1 = tf.reduce_sum(in_top1, name ="top1") num_correct_5 = tf.reduce_sum(in_top5, name ="top5") return num_correct_1, num_correct_5
def log_likelihood_sym(self, x_var, dist_info_vars): means = dist_info_vars["mean"] log_stds = dist_info_vars["log_std"] zs = (x_var - means) / tf.exp(log_stds) return - tf.reduce_sum(log_stds, reduction_indices=-1) - \ 0.5 * tf.reduce_sum(tf.square(zs), reduction_indices=-1) - \ 0.5 * self.dim * np.log(2 * np.pi)
def get(self, rewards, pads, values, final_values, log_probs, prev_log_probs, target_log_probs, entropies, logits): seq_length = tf.shape(rewards)[0] not_pad = tf.reshape(1 - pads, [seq_length, -1, self.num_samples]) rewards = not_pad * tf.reshape(rewards, [seq_length, -1, self.num_samples]) log_probs = not_pad * tf.reshape(sum(log_probs), [seq_length, -1, self.num_samples]) total_rewards = tf.reduce_sum(rewards, 0) total_log_probs = tf.reduce_sum(log_probs, 0) rewards_and_bonus = (total_rewards + self.bonus_weight * self.get_bonus(total_rewards, total_log_probs)) baseline = tf.reduce_mean(rewards_and_bonus, 1, keep_dims=True) loss = -tf.stop_gradient(rewards_and_bonus - baseline) * total_log_probs loss = tf.reduce_mean(loss) raw_loss = loss # TODO gradient_ops = self.training_ops( loss, learning_rate=self.learning_rate) tf.summary.histogram('log_probs', total_log_probs) tf.summary.histogram('rewards', total_rewards) tf.summary.scalar('avg_rewards', tf.reduce_mean(total_rewards)) tf.summary.scalar('loss', loss) return loss, raw_loss, baseline, gradient_ops, tf.summary.merge_all()
def cord_cls_loss( detectors_mask, matching_true_boxes, num_classes, pred_class_prob, pred_boxes, loc_scale, ): """ :param detectors_mask: [batch, 13, 13, 3, 1] :param matching_true_boxes: [batch, 13, 13, 3, 5] [σ(tx), σ(ty), tw, th, cls] :param num_classes: 20 :param pred_class_prob: [batch, 13, 13, 3, 20] :param pred_boxes: [batch, 13, 13, 3, 4] :param loc_scale: [batch, 13, 13, 3, 1] :return: mean_loss: float mean localization loss across minibatch """ # Classification loss for matching detections. # NOTE: YOLO does not use categorical cross-entropy loss here. matching_classes = tf.cast(matching_true_boxes[..., 4], tf.int32) # [batch, 13, 13, 3] matching_classes = tf.one_hot(matching_classes, num_classes) # [batch, 13, 13, 3, 20] classification_loss = (detectors_mask * tf.square(matching_classes - pred_class_prob)) # [batch, 13, 13, 3, 20] # Coordinate loss for matching detection boxes. [σ(tx), σ(ty), tw, th] matching_boxes = matching_true_boxes[..., 0:4] coordinates_loss = (detectors_mask * loc_scale * tf.square(matching_boxes - pred_boxes)) classification_loss_sum = tf.reduce_sum(classification_loss) coordinates_loss_sum = tf.reduce_sum(coordinates_loss) return classification_loss_sum + coordinates_loss_sum
def char_accuracy(predictions, targets, rej_char, streaming=False): """Computes character level accuracy. Both predictions and targets should have the same shape [batch_size x seq_length]. Args: predictions: predicted characters ids. targets: ground truth character ids. rej_char: the character id used to mark an empty element (end of sequence). streaming: if True, uses the streaming mean from the slim.metric module. Returns: a update_ops for execution and value tensor whose value on evaluation returns the total character accuracy. """ with tf.variable_scope('CharAccuracy'): predictions.get_shape().assert_is_compatible_with(targets.get_shape()) targets = tf.to_int32(targets) const_rej_char = tf.constant(rej_char, shape=targets.get_shape()) weights = tf.to_float(tf.not_equal(targets, const_rej_char)) correct_chars = tf.to_float(tf.equal(predictions, targets)) accuracy_per_example = tf.div( tf.reduce_sum(tf.multiply(correct_chars, weights), 1), tf.reduce_sum(weights, 1)) if streaming: return tf.contrib.metrics.streaming_mean(accuracy_per_example) else: return tf.reduce_mean(accuracy_per_example)
def _log_joint(self, z_sample): """Utility function to calculate model's log joint density, log p(x, z), for inputs z (and fixed data x). Args: z_sample: dict. Latent variable keys to samples. """ self.scope_iter += 1 scope = 'inference_' + str(id(self)) + '/' + str(self.scope_iter) # Form dictionary in order to replace conditioning on prior or # observed variable with conditioning on a specific value. dict_swap = z_sample.copy() for x, qx in six.iteritems(self.data): if isinstance(x, RandomVariable): if isinstance(qx, RandomVariable): qx_copy = copy(qx, scope=scope) dict_swap[x] = qx_copy.value() else: dict_swap[x] = qx log_joint = 0.0 for z in six.iterkeys(self.latent_vars): z_copy = copy(z, dict_swap, scope=scope) log_joint += tf.reduce_sum(z_copy.log_prob(dict_swap[z])) for x in six.iterkeys(self.data): if isinstance(x, RandomVariable): x_copy = copy(x, dict_swap, scope=scope) log_joint += tf.reduce_sum(x_copy.log_prob(dict_swap[x])) return log_joint
def triplet_loss(y_true, y_pred, alpha = 0.2): """ Implementation of the triplet loss as defined by formula Arguments: y_true -- true labels, required when you define a loss in Keras, you don't need it in this function. y_pred -- python list containing three objects: anchor -- the encodings for the anchor images, of shape (None, 128) positive -- the encodings for the positive images, of shape (None, 128) negative -- the encodings for the negative images, of shape (None, 128) Returns: loss -- real number, value of the loss """ anchor, positive, negative = y_pred[0], y_pred[1], y_pred[2] ### START CODE HERE ### (≈ 4 lines) # Step 1: Compute the (encoding) distance between the anchor and the positive, you will need to sum over axis=-1 pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive))) # Step 2: Compute the (encoding) distance between the anchor and the negative, you will need to sum over axis=-1 neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative))) # Step 3: subtract the two previous distances and add alpha. basic_loss = tf.add(tf.subtract(pos_dist,neg_dist), alpha) # Step 4: Take the maximum of basic_loss and 0.0. Sum over the training examples. loss = tf.reduce_sum(tf.maximum(basic_loss, 0.)) ### END CODE HERE ### return loss
def __init__(self, is_training, config): self.batch_size = batch_size = config.batch_size size = config.hidden_size self.max_len = max_len = config.max_len vocab_size = config.vocab_size self._input_data = tf.placeholder(tf.int32, [batch_size, config.max_len]) self._targets = tf.placeholder(tf.int32, [batch_size]) embedding = tf.get_variable("embedding", [vocab_size, size]) inputs = tf.nn.embedding_lookup(embedding, self._input_data) output = tf.reduce_sum(inputs, 1) softmax_w = tf.get_variable("softmax_w", [size, 2]) softmax_b = tf.get_variable("softmax_b", [2]) logits = tf.matmul(output, softmax_w) + softmax_b prediction = tf.nn.softmax(logits) self._prediction = prediction loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, self._targets) self._cost = cost = tf.reduce_sum(loss) / batch_size if not is_training: return self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(self.lr) self._train_op = optimizer.apply_gradients(zip(grads, tvars))
def loss(self, top_out, targets): predictions = top_out with tf.name_scope("log_possion"): weights = self.targets_weights_fn(targets) lp_loss = tf.nn.log_poisson_loss(targets, predictions) return tf.reduce_sum(lp_loss * weights), tf.reduce_sum(weights)
def build_loss(self, ohem=False): # classification loss rpn_cls_score = tf.reshape(self.get_output('rpn_cls_score_reshape'), [-1, 2]) # shape (HxWxA, 2) rpn_label = tf.reshape(self.get_output('rpn-data')[0], [-1]) # shape (HxWxA) # ignore_label(-1) fg_keep = tf.equal(rpn_label, 1) rpn_keep = tf.where(tf.not_equal(rpn_label, -1)) rpn_cls_score = tf.gather(rpn_cls_score, rpn_keep) # shape (N, 2) rpn_label = tf.gather(rpn_label, rpn_keep) rpn_cross_entropy_n = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=rpn_label,logits=rpn_cls_score) # box loss rpn_bbox_pred = self.get_output('rpn_bbox_pred') # shape (1, H, W, Ax4) rpn_bbox_targets = self.get_output('rpn-data')[1] rpn_bbox_inside_weights = self.get_output('rpn-data')[2] rpn_bbox_outside_weights = self.get_output('rpn-data')[3] rpn_bbox_pred = tf.gather(tf.reshape(rpn_bbox_pred, [-1, 4]), rpn_keep) # shape (N, 4) rpn_bbox_targets = tf.gather(tf.reshape(rpn_bbox_targets, [-1, 4]), rpn_keep) rpn_bbox_inside_weights = tf.gather(tf.reshape(rpn_bbox_inside_weights, [-1, 4]), rpn_keep) rpn_bbox_outside_weights = tf.gather(tf.reshape(rpn_bbox_outside_weights, [-1, 4]), rpn_keep) rpn_loss_box_n = tf.reduce_sum(rpn_bbox_outside_weights * self.smooth_l1_dist( rpn_bbox_inside_weights * (rpn_bbox_pred - rpn_bbox_targets)), reduction_indices=[1]) rpn_loss_box = tf.reduce_sum(rpn_loss_box_n) / (tf.reduce_sum(tf.cast(fg_keep, tf.float32)) + 1) rpn_cross_entropy = tf.reduce_mean(rpn_cross_entropy_n) model_loss = rpn_cross_entropy + rpn_loss_box regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n(regularization_losses) + model_loss return total_loss,model_loss, rpn_cross_entropy, rpn_loss_box
def prepare_loss(self, entropy_beta): with tf.device(self._device): # taken action (input for policy) self.a = tf.placeholder("float", [None, self._action_size]) # temporary difference (R-V) (input for policy) self.td = tf.placeholder("float", [None]) # avoid NaN with clipping when value in pi becomes zero log_pi = tf.log(tf.clip_by_value(self.pi, 1e-20, 1.0)) # policy entropy entropy = -tf.reduce_sum(self.pi * log_pi, reduction_indices=1) # policy loss (output) (Adding minus, because the original paper's objective function is for gradient ascent, but we use gradient descent optimizer.) policy_loss = - tf.reduce_sum( tf.reduce_sum( tf.multiply( log_pi, self.a ), reduction_indices=1 ) * self.td + entropy * entropy_beta ) # R (input for value) self.r = tf.placeholder("float", [None]) # value loss (output) # (Learning rate for Critic is half of Actor's, so multiply by 0.5) value_loss = 0.5 * tf.nn.l2_loss(self.r - self.v) # gradienet of policy and value are summed up self.total_loss = policy_loss + value_loss
def build_model(self): tf.compat.v1.reset_default_graph() #tf.Graph() tf.compat.v1.disable_eager_execution() # Defining the weights associated with the Network with tf.device('/cpu:0'): #print(self.n_words) #print(self.dim_hidden) #return #random_tf=tf.random_uniform([8423,512], -0.1, 0.1) #self.word_emb=tf.Variable(random_tf) self.word_emb = tf.Variable(tf.random.uniform([self.n_words, self.dim_hidden], -0.1, 0.1), name='word_emb') print("word_emb",self.word_emb) self.lstm1 = tf.compat.v1.nn.rnn_cell.BasicLSTMCell(self.dim_hidden, state_is_tuple=False) self.lstm2 = tf.compat.v1.nn.rnn_cell.BasicLSTMCell(self.dim_hidden, state_is_tuple=False) self.encode_W = tf.Variable( tf.random.uniform([self.dim_image,self.dim_hidden], -0.1, 0.1), name='encode_W') self.encode_b = tf.Variable( tf.zeros([self.dim_hidden]), name='encode_b') self.word_emb_W = tf.Variable(tf.random.uniform([self.dim_hidden,self.n_words], -0.1,0.1), name='word_emb_W') self.word_emb_b = tf.Variable(tf.zeros([self.n_words]), name='word_emb_b') # Placeholders video = tf.compat.v1.placeholder(tf.float32, [self.batch_size, self.video_lstm_step, self.dim_image]) video_mask = tf.compat.v1.placeholder(tf.float32, [self.batch_size, self.video_lstm_step]) caption = tf.compat.v1.placeholder(tf.int32, [self.batch_size, self.caption_lstm_step+1]) caption_mask = tf.compat.v1.placeholder(tf.float32, [self.batch_size, self.caption_lstm_step+1]) video_flat = tf.reshape(video, [-1, self.dim_image]) print("video_flat",video_flat) image_emb = tf.compat.v1.nn.xw_plus_b( video_flat, self.encode_W,self.encode_b ) print("image_emb",image_emb) #using image embedding to reduce the dimension to 512 image_emb = tf.reshape(image_emb, [self.batch_size, self.lstm_steps, self.dim_hidden]) print("image_emb_reshaping",image_emb) state1 = tf.zeros([self.batch_size, self.lstm1.state_size]) state2 = tf.zeros([self.batch_size, self.lstm2.state_size]) padding = tf.zeros([self.batch_size, self.dim_hidden]) print(self.lstm1.state_size) print(self.lstm2.state_size) probs = [] loss = 0.0 # Encoding Stage for i in range(0, self.video_lstm_step): if i > 0: tf.compat.v1.get_variable_scope().reuse_variables() with tf.compat.v1.variable_scope("LSTM1"): output1, state1 = self.lstm1(image_emb[:,i,:], state1) print("encoding output1 state1", output1,state1) with tf.compat.v1.variable_scope("LSTM2"): output2, state2 = self.lstm2(tf.concat([padding, output1],1), state2) print("encoding output2 state2", output2,state2) # Decoding Stage to generate Captions for i in range(0, self.caption_lstm_step): print("iteration:",i) with tf.device("/cpu:0"):# looks for the id's from word embedding current_embed = tf.compat.v1.nn.embedding_lookup(self.word_emb, caption[:, i]) tf.compat.v1.get_variable_scope().reuse_variables() with tf.compat.v1.variable_scope("LSTM1"): print("decoding input state1 from previous loop", output1,state1) output1, state1 = self.lstm1(padding, state1) print("decoding output1 state1", output1,state1) with tf.compat.v1.variable_scope("LSTM2"): output2, state2 = self.lstm2(tf.concat([current_embed, output1],1), state2) print("decoding output2 state2", output2,state2) print("current_embed:",current_embed) labels = tf.expand_dims(caption[:, i+1], 1) print("labels:",labels) print("caption:",caption) indices = tf.expand_dims(tf.range(0, self.batch_size, 1), 1) print("indices:",indices) concated = tf.concat([indices, labels],1) onehot_labels = tf.compat.v1.sparse_to_dense(concated, tf.stack([self.batch_size, self.n_words]), 1.0, 0.0) print("onehot_labels:",onehot_labels) logit_words = tf.compat.v1.nn.xw_plus_b(output2, self.word_emb_W, self.word_emb_b) print(logit_words) # Computing the loss cross_entropy = tf.compat.v1.nn.softmax_cross_entropy_with_logits(labels=onehot_labels,logits=logit_words) cross_entropy = cross_entropy * caption_mask[:,i] probs.append(logit_words) print(logit_words) print(output2) print(probs) current_loss = tf.reduce_sum(cross_entropy)/self.batch_size print("current_loss",current_loss) loss = loss + current_loss with tf.compat.v1.variable_scope(tf.compat.v1.get_variable_scope(),reuse=tf.compat.v1.AUTO_REUSE) as scope: train_op = tf.compat.v1.train.AdamOptimizer(self.learning_rate).minimize(loss) #allops=tf_graph. get_operations() #print(allops) #return return loss,video,video_mask,caption,caption_mask,probs,train_op
real_data = tf.reshape(2*(real_data_conv-.5), [BATCH_SIZE//len(DEVICES), OUTPUT_DIM]) fake_labels_splits = real_labels fake_data = Generator('New.', BATCH_SIZE//len(DEVICES), labels = fake_labels_splits) fake_data_old = Generator('Old.', BATCH_SIZE//len(DEVICES), labels = fake_labels_splits) disc_real, disc_real_acgan = Discriminator(real_data) disc_fake, disc_fake_acgan = Discriminator(fake_data) gen_cost = -tf.reduce_mean(disc_fake) disc_wgan = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real) alpha = tf.random_uniform(shape=[BATCH_SIZE//len(DEVICES),1], minval=0., maxval=1. ) differences = fake_data - real_data interpolates = real_data + (alpha*differences) gradients = tf.gradients(Discriminator(interpolates)[0], interpolates)[0] slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = tf.reduce_mean((slopes-1.)**2) disc_wgan_pure = disc_wgan disc_wgan += LAMBDA*gradient_penalty disc_cost = disc_wgan if ACGAN: disc_real_acgan_costs.append(tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=disc_real_acgan, labels=real_labels))) disc_fake_acgan_costs.append(tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=disc_fake_acgan, labels=fake_labels_splits))) disc_cost += ACGAN_SCALE * tf.add_n(disc_real_acgan_costs) gen_cost += ACGAN_SCALE_G * tf.add_n(disc_fake_acgan_costs) disc_acgan_real_accs.append(tf.reduce_mean( tf.cast(tf.equal(tf.to_int32(tf.argmax(disc_real_acgan, dimension=1)), real_labels ), tf.float32))) disc_acgan_fake_accs.append(tf.reduce_mean( tf.cast(tf.equal(tf.to_int32(tf.argmax(disc_fake_acgan, dimension=1)), fake_labels_splits ), tf.float32)))
def loss_fn(x, t, w): costs = tf.nn.sigmoid_cross_entropy_with_logits(logits=x, labels=t) weighted_costs = tf.multiply(costs, w) return tf.reduce_sum(weighted_costs)
def loss_fn(x, t): diff = tf.subtract(x, t) return tf.reduce_sum( tf.minimum(0.5 * tf.square(diff), huber_d * (tf.abs(diff) - 0.5 * huber_d)), 0)
def loss_fn(x, t): diff = tf.subtract(x, t) return tf.reduce_sum(tf.abs(diff), 0)
def loss_fn(x, t, w): diff = tf.subtract(x, t) weighted_diff = tf.multiply(diff, w) return tf.reduce_sum(tf.square(weighted_diff), 0)
def build_model(self): self.obs = tf.placeholder(tf.float32, [None, self.observation_size]) self.action = tf.placeholder(tf.float32, [None, self.action_size]) self.advantage = tf.placeholder(tf.float32, [None]) #Mean of old action distribution self.old_action_dist_mu = tf.placeholder(tf.float32, [None, self.action_size]) self.old_action_dist_logstd = tf.placeholder(tf.float32, [None, self.action_size]) #NN framework for action distribution self.action_dist_mu, action_dist_logstd = self.build_policy(self.obs) #Construct distribution by repeating action_dis_logstd self.action_dist_logstd = tf.tile(action_dist_logstd, (tf.shape(action_dist_logstd)[0],1)) #Probability of action under old policy vs. new policy self.log_policy = LOG_POLICY(self.action_dist_mu, self.action_dist_logstd, self.action) self.log_old_policy = LOG_POLICY(self.old_action_dist_mu, self.old_action_dist_logstd, self.action) policy_ratio = tf.exp(self.log_policy - self.log_old_policy) #Number of observations in batch batch_size = tf.cast(tf.shape(self.obs)[0], tf.float32) ''' Equation (14) in paper Contribution of a single s_n : Expectation over a~q[ (new policy / q(is)) * advantage_old] ''' surr_single_state = -tf.reduce_mean(policy_ratio*self.advantage) #Define KL divergence and shannon entropy, averaged over a set of inputs (policies) kl = GAUSS_KL(self.old_action_dist_mu, self.old_action_dist_logstd, self.action_dist_mu, self.action_dist_logstd) / batch_size ent = GAUSS_ENTROPY(self.action_dist_mu, self.action_dist_logstd) / batch_size #Define 'loss' quantities to constrain or maximize self.losses = [surr_single_state, kl, ent] # Get trainable variables for the policy (NN weights) tr_vrbs = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Policy') for i in tr_vrbs: print(i.op.name) # Maximize surrogate function over policy parameter 'theta' represented by neural network weights self.pg = FLAT_GRAD(surr_single_state, tr_vrbs) #KL divergence where first argument is fixed kl_first_fixed = GAUSS_KL_FIRST_FIX(self.action_dist_mu, self.action_dist_logstd) / batch_size #Gradient of KL divergence w.r.t. theta (NN policy weights) first_kl_grads = tf.gradients(kl_first_fixed, tr_vrbs) ''' REVIEW FROM HERE ONWARDS #?????????????????????????????????????????????????????????? ''' self.flat_tangent = tf.placeholder(tf.float32,[None]) tangent = list() start = 0 for vrbs in tr_vrbs: variable_size = np.prod(vrbs.get_shape().as_list()) param = tf.reshape(self.flat_tangent[start:(start+variable_size)], vrbs.get_shape()) tangent.append(param) start += variable_size ''' Gradient of KL with tangent vector gradient_w_tangent : list of KL_prime*y for each variables ''' gradient_w_tangent = [tf.reduce_sum(kl_g*t) for (kl_g, t) in zip(first_kl_grads, tangent)] ''' From derivative of KL_prime*y : [dKL/dx1, dKL/dx2...]*y y -> Ay, A is n by n matrix but hard to implement(numerically solving (n*n)*(n*1)) so first multiply target 'y' to gradient and take derivation 'self.FVP' Returns : [d2KL/dx1dx1+d2KL/dx1dx2..., d2KL/dx1dx2+d2KL/dx2dx2..., ...]*y So get (second derivative of KL divergence)*y for each variable => y->JMJy (Fisher Vector Product) ''' self.FVP = FLAT_GRAD(gradient_w_tangent, tr_vrbs) #Get actual parameter value self.get_value = GetValue(self.sess, tr_vrbs, name='Policy') #Set parameter values self.set_value = SetValue(self.sess, tr_vrbs, name='Policy') #Estimate of the advantage function self.gae = GAE(self.sess, self.observation_size, self.args.gamma, self.args.lamda, self.args.vf_constraint) self.sess.run(tf.global_variables_initializer())
def loss_layer(self, feature_map_i, y_true, anchors): # size in [h, w] format! don't get messed up! grid_size = tf.shape(feature_map_i)[1:3] grid_size_ = feature_map_i.shape.as_list()[1:3] y_true = tf.reshape(y_true, [-1, grid_size_[0], grid_size_[1], 3, 5 + self._NUM_CLASSES]) # the downscale ratio in height and weight ratio = tf.cast(self.img_size / grid_size, tf.float32) # N: batch_size N = tf.cast(tf.shape(feature_map_i)[0], tf.float32) x_y_offset, pred_boxes, pred_conf_logits, pred_prob_logits = self._reorg_layer(feature_map_i, anchors) # shape: take 416x416 input image and 13*13 feature_map for example: # [N, 13, 13, 3, 1] object_mask = y_true[..., 4:5] # shape: [N, 13, 13, 3, 4] & [N, 13, 13, 3] ==> [V, 4] # V: num of true gt box valid_true_boxes = tf.boolean_mask(y_true[..., 0:4], tf.cast(object_mask[..., 0], 'bool')) # shape: [V, 2] valid_true_box_xy = valid_true_boxes[:, 0:2] valid_true_box_wh = valid_true_boxes[:, 2:4] # shape: [N, 13, 13, 3, 2] pred_box_xy = pred_boxes[..., 0:2] pred_box_wh = pred_boxes[..., 2:4] # calc iou # shape: [N, 13, 13, 3, V] iou = self._broadcast_iou(valid_true_box_xy, valid_true_box_wh, pred_box_xy, pred_box_wh) # shape: [N, 13, 13, 3] best_iou = tf.reduce_max(iou, axis=-1) # get_ignore_mask ignore_mask = tf.cast(best_iou < 0.5, tf.float32) # shape: [N, 13, 13, 3, 1] ignore_mask = tf.expand_dims(ignore_mask, -1) # get xy coordinates in one cell from the feature_map # numerical range: 0 ~ 1 # shape: [N, 13, 13, 3, 2] true_xy = y_true[..., 0:2] / ratio[::-1] - x_y_offset pred_xy = pred_box_xy / ratio[::-1] - x_y_offset # get_tw_th, numerical range: 0 ~ 1 # shape: [N, 13, 13, 3, 2] true_tw_th = y_true[..., 2:4] / anchors pred_tw_th = pred_box_wh / anchors # for numerical stability true_tw_th = tf.where(condition=tf.equal(true_tw_th, 0), x=tf.ones_like(true_tw_th), y=true_tw_th) pred_tw_th = tf.where(condition=tf.equal(pred_tw_th, 0), x=tf.ones_like(pred_tw_th), y=pred_tw_th) true_tw_th = tf.log(tf.clip_by_value(true_tw_th, 1e-9, 1e9)) pred_tw_th = tf.log(tf.clip_by_value(pred_tw_th, 1e-9, 1e9)) # box size punishment: # box with smaller area has bigger weight. This is taken from the yolo darknet C source code. # shape: [N, 13, 13, 3, 1] box_loss_scale = 2. - (y_true[..., 2:3] / tf.cast(self.img_size[1], tf.float32)) * ( y_true[..., 3:4] / tf.cast(self.img_size[0], tf.float32)) # shape: [N, 13, 13, 3, 1] xy_loss = tf.reduce_sum(tf.square(true_xy - pred_xy) * object_mask * box_loss_scale) / N wh_loss = tf.reduce_sum(tf.square(true_tw_th - pred_tw_th) * object_mask * box_loss_scale) / N # shape: [N, 13, 13, 3, 1] conf_pos_mask = object_mask conf_neg_mask = (1 - object_mask) * ignore_mask conf_loss_pos = conf_pos_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=object_mask, logits=pred_conf_logits) conf_loss_neg = conf_neg_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=object_mask, logits=pred_conf_logits) conf_loss = tf.reduce_sum(conf_loss_pos + conf_loss_neg) / N # shape: [N, 13, 13, 3, 1] class_loss = object_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=y_true[..., 5:], logits=pred_prob_logits) class_loss = tf.reduce_sum(class_loss) / N return xy_loss, wh_loss, conf_loss, class_loss
def optimize_graph(args): # we don't need GPU for optimizing the graph os.environ['CUDA_VISIBLE_DEVICES'] = '-1' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import tensorflow as tf tf.logging.set_verbosity(tf.logging.ERROR) from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference config = tf.ConfigProto(device_count={'GPU': 0}, allow_soft_placement=True) config_fp = os.path.join(args.model_dir, 'bert_config.json') init_checkpoint = os.path.join(args.model_dir, 'bert_model.ckpt') with tf.gfile.GFile(config_fp, 'r') as f: bert_config = modeling.BertConfig.from_dict(json.load(f)) # input placeholders, not sure if they are friendly to XLA input_ids = tf.placeholder(tf.int32, (None, args.max_seq_len), 'input_ids') input_mask = tf.placeholder(tf.int32, (None, args.max_seq_len), 'input_mask') input_type_ids = tf.placeholder(tf.int32, (None, args.max_seq_len), 'input_type_ids') jit_scope = tf.contrib.compiler.jit.experimental_jit_scope if args.xla else contextlib.suppress with jit_scope(): input_tensors = [input_ids, input_mask, input_type_ids] model = modeling.BertModel(config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=input_type_ids, use_one_hot_embeddings=False) tvars = tf.trainable_variables() (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) minus_mask = lambda x, m: x - tf.expand_dims(1.0 - m, axis=-1) * 1e30 mul_mask = lambda x, m: x * tf.expand_dims(m, axis=-1) masked_reduce_max = lambda x, m: tf.reduce_max(minus_mask(x, m), axis=1) masked_reduce_mean = lambda x, m: tf.reduce_sum(mul_mask( x, m), axis=1) / (tf.reduce_sum(m, axis=1, keepdims=True) + 1e-10) with tf.variable_scope("pooling"): if len(args.pooling_layer) == 1: encoder_layer = model.all_encoder_layers[args.pooling_layer[0]] else: all_layers = [ model.all_encoder_layers[l] for l in args.pooling_layer ] encoder_layer = tf.concat(all_layers, -1) input_mask = tf.cast(input_mask, tf.float32) if args.pooling_strategy == PoolingStrategy.REDUCE_MEAN: pooled = masked_reduce_mean(encoder_layer, input_mask) elif args.pooling_strategy == PoolingStrategy.REDUCE_MAX: pooled = masked_reduce_max(encoder_layer, input_mask) elif args.pooling_strategy == PoolingStrategy.REDUCE_MEAN_MAX: pooled = tf.concat([ masked_reduce_mean(encoder_layer, input_mask), masked_reduce_max(encoder_layer, input_mask) ], axis=1) elif args.pooling_strategy == PoolingStrategy.FIRST_TOKEN or \ args.pooling_strategy == PoolingStrategy.CLS_TOKEN: pooled = tf.squeeze(encoder_layer[:, 0:1, :], axis=1) elif args.pooling_strategy == PoolingStrategy.LAST_TOKEN or \ args.pooling_strategy == PoolingStrategy.SEP_TOKEN: seq_len = tf.cast(tf.reduce_sum(input_mask, axis=1), tf.int32) rng = tf.range(0, tf.shape(seq_len)[0]) indexes = tf.stack([rng, seq_len - 1], 1) pooled = tf.gather_nd(encoder_layer, indexes) elif args.pooling_strategy == PoolingStrategy.NONE: pooled = mul_mask(encoder_layer, input_mask) else: raise NotImplementedError() pooled = tf.identity(pooled, 'final_encodes') output_tensors = [pooled] tmp_g = tf.get_default_graph().as_graph_def() with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) tmp_g = tf.graph_util.convert_variables_to_constants( sess, tmp_g, [n.name[:-2] for n in output_tensors]) dtypes = [n.dtype for n in input_tensors] tmp_g = optimize_for_inference( tmp_g, [n.name[:-2] for n in input_tensors], [n.name[:-2] for n in output_tensors], [dtype.as_datatype_enum for dtype in dtypes], False) tmp_file = tempfile.NamedTemporaryFile('w', delete=False).name with tf.gfile.GFile(tmp_file, 'wb') as f: f.write(tmp_g.SerializeToString()) return tmp_file
def sum(x, axis=None, keepdims=False): axis = None if axis is None else [axis] return tf.reduce_sum(x, axis=axis, keep_dims=keepdims)
def gen_tf_ops(self, verbose=False): if verbose: print("%s Instantiating cohort %s" % (strftime("%H:%M:%S"), self.name)) stock_returns = self.model.return_ops[0] bond_returns = self.model.return_ops[1] stock_allocs = self.model.allocation_ops[0] bond_allocs = self.model.allocation_ops[1] self.port_returns_list = [] self.port_prespend_list = [] self.port_end_vals_list = [] self.spend_amts_list = [] self.spend_amts_nonzero_list = [] with self.model.graph.as_default(): with tf.device("/cpu:0"): if verbose: print("%s Generating %d years from %d" % (strftime("%H:%M:%S"), self.model.ret_years, self.cohort_start_year)) start_year_ix = self.cohort_start_year - self.model.first_year for ix in range(self.model.ret_years): op_stock_return = stock_returns[start_year_ix + ix] op_stock_alloc = stock_allocs[ix] op_bond_return = bond_returns[start_year_ix + ix] op_bond_alloc = bond_allocs[ix] op_const_spend = self.model.const_spending_op op_var_spend = self.model.var_spending_ops[ix] op_total_real_return = tf.add( tf.mul(op_stock_alloc, op_stock_return, name="%s_stock_%d" % (self.name, ix)), tf.mul(op_bond_alloc, op_bond_return, name="%s_bond_%d" % (self.name, ix)), name="%s_total_return_%d" % (self.name, ix)) self.port_returns_list.append(op_total_real_return) if ix == 0: prev_val = self.model.start_val_op else: prev_val = self.port_end_vals_list[ix - 1] op_port_end_val_prespend = tf.add( prev_val, tf.mul(prev_val, self.port_returns_list[ix], name="%s_dolreturn_%d" % (self.name, ix)), name="%s_prespend_%d" % (self.name, ix)) self.port_prespend_list.append(op_port_end_val_prespend) desired_spend_amt = tf.add( tf.mul(op_var_spend, op_port_end_val_prespend, name="%s_des_vspend_%d" % (self.name, ix)), op_const_spend, name="%s_desired_spend_amt_%d" % (self.name, ix)) #spend minimum of tmp_spend_amt, port value spend_amt = tf.minimum(desired_spend_amt, op_port_end_val_prespend, name="%s_actual_spend_amt_%d" % (self.name, ix)) self.spend_amts_list.append(spend_amt) op_port_end_val = tf.sub(op_port_end_val_prespend, spend_amt, name="%s_endval_%d" % (self.name, ix)) self.port_end_vals_list.append(op_port_end_val) #now that we've computed cohort paths we pack results into 1D Tensors to calc objective self.spend_amts = tf.pack(self.spend_amts_list, name="%s_spend_amts" % self.name) self.port_end_vals = tf.pack(self.port_end_vals_list, name="%s_port_end_vals" % self.name) self.mean_spending = tf.reduce_mean(self.spend_amts, name="%s_mean_spending" % self.name) self.sd_spending = tf.sqrt(tf.reduce_mean( tf.pow(tf.sub(self.spend_amts, self.mean_spending), 2)), name="%s_sd_spending" % self.name) self.min_spending = tf.reduce_min(self.spend_amts, name="%s_min_spending" % self.name) self.max_spending = tf.reduce_max(self.spend_amts, name="%s_max_spending" % self.name) if self.model.gamma == 1.0: #spend a tiny amount even if spend is 0 so log is not NaN #doesn't really seem like best practice but... #0 spend years can't be in final solution #and don't want divide by zero errors if optimizer attempts one #chain new op off old op but keep a reference to old op around just in case self.spend_amts_maybe_zero = self.spend_amts self.spend_amts = tf.maximum( self.spend_amts_maybe_zero, self.model.very_small_amts, name="%s_actual_spend_nonzero" % self.name) self.total_spending = tf.reduce_sum( self.spend_amts, name="%s_total_spending_nonzero" % self.name) else: self.total_spending = tf.reduce_sum( self.spend_amts, name="%s_total_spending" % self.name) if self.model.survival is not None: self.ce = self.model.gen_ce_survival( self.spend_amts, self.model.survival_tensor, "%s_ce" % self.name) else: self.ce = self.model.gen_ce(self.spend_amts, "%s_ce" % self.name)
def mse_loss(pred, data): return tf.reduce_mean(tf.reduce_sum(tf.square(pred - data), axis=[3]))
def _build_lstms(self): # now the LSTMs # these will collect the initial states for the forward # (and reverse LSTMs if we are doing bidirectional) # parse the options lstm_dim = self.options['lstm']['dim'] projection_dim = self.options['lstm']['projection_dim'] n_lstm_layers = self.options['lstm'].get('n_layers', 1) cell_clip = self.options['lstm'].get('cell_clip') proj_clip = self.options['lstm'].get('proj_clip') use_skip_connections = self.options['lstm'].get( 'use_skip_connections') if use_skip_connections: print("USING SKIP CONNECTIONS") # the sequence lengths from input mask if self.use_character_inputs: mask = tf.reduce_any(self.ids_placeholder > 0, axis=2) else: mask = self.ids_placeholder > 0 sequence_lengths = tf.reduce_sum(tf.cast(mask, tf.int32), axis=1) batch_size = tf.shape(sequence_lengths)[0] # for each direction, we'll store tensors for each layer self.lstm_outputs = {'forward': [], 'backward': []} self.lstm_state_sizes = {'forward': [], 'backward': []} self.lstm_init_states = {'forward': [], 'backward': []} self.lstm_final_states = {'forward': [], 'backward': []} update_ops = [] for direction in ['forward', 'backward']: if direction == 'forward': layer_input = self.embedding else: layer_input = tf.reverse_sequence( self.embedding, sequence_lengths, seq_axis=1, batch_axis=0 ) for i in range(n_lstm_layers): if projection_dim < lstm_dim: # are projecting down output lstm_cell = tf.nn.rnn_cell.LSTMCell( lstm_dim, num_proj=projection_dim, cell_clip=cell_clip, proj_clip=proj_clip) else: lstm_cell = tf.nn.rnn_cell.LSTMCell( lstm_dim, cell_clip=cell_clip, proj_clip=proj_clip) if use_skip_connections: # ResidualWrapper adds inputs to outputs if i == 0: # don't add skip connection from token embedding to # 1st layer output pass else: # add a skip connection lstm_cell = tf.nn.rnn_cell.ResidualWrapper(lstm_cell) # collect the input state, run the dynamic rnn, collect # the output state_size = lstm_cell.state_size # the LSTMs are stateful. To support multiple batch sizes, # we'll allocate size for states up to max_batch_size, # then use the first batch_size entries for each batch init_states = [ tf.Variable( tf.zeros([self._max_batch_size, dim]), trainable=False ) for dim in lstm_cell.state_size ] batch_init_states = [ state[:batch_size, :] for state in init_states ] if direction == 'forward': i_direction = 0 else: i_direction = 1 variable_scope_name = 'RNN_{0}/RNN/MultiRNNCell/Cell{1}'.format( i_direction, i) with tf.variable_scope(variable_scope_name): layer_output, final_state = tf.nn.dynamic_rnn( lstm_cell, layer_input, sequence_length=sequence_lengths, initial_state=tf.nn.rnn_cell.LSTMStateTuple( *batch_init_states), ) self.lstm_state_sizes[direction].append(lstm_cell.state_size) self.lstm_init_states[direction].append(init_states) self.lstm_final_states[direction].append(final_state) if direction == 'forward': self.lstm_outputs[direction].append(layer_output) else: self.lstm_outputs[direction].append( tf.reverse_sequence( layer_output, sequence_lengths, seq_axis=1, batch_axis=0 ) ) with tf.control_dependencies([layer_output]): # update the initial states for i in range(2): new_state = tf.concat( [final_state[i][:batch_size, :], init_states[i][batch_size:, :]], axis=0) state_update_op = tf.assign(init_states[i], new_state) update_ops.append(state_update_op) layer_input = layer_output self.mask = mask self.sequence_lengths = sequence_lengths self.update_state_op = tf.group(*update_ops)
eta = tf.constant(0.5) step = [ tf.assign(w_1, tf.subtract(w_1, tf.multiply(eta, d_w_1))) , tf.assign(b_1, tf.subtract(b_1, tf.multiply(eta, tf.reduce_mean(d_b_1, axis=[0])))) , tf.assign(w_2, tf.subtract(w_2, tf.multiply(eta, d_w_2))) , tf.assign(b_2, tf.subtract(b_2, tf.multiply(eta, tf.reduce_mean(d_b_2, axis=[0])))) ] acct_mat = tf.equal(tf.argmax(a_2, 1), tf.argmax(y, 1)) acct_res = tf.reduce_sum(tf.cast(acct_mat, tf.float32)) sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) res = '' for i in range(10000): batch_xs, batch_ys = mnist.train.next_batch(10) sess.run(step, feed_dict = {a_0: batch_xs, y : batch_ys}) if i % 1000 == 0: res = sess.run(acct_res, feed_dict = {a_0: mnist.test.images[:1000], y : mnist.test.labels[:1000]}) print(res) cost = tf.multiply(diff, diff)
def createModel(input_data, in_vocabulary_size, sequence_length, slots, slot_size, intent_size, layer_size=128, isTraining=True, embed_dim=64): cell_fw = tf.contrib.rnn.BasicLSTMCell(layer_size) cell_bw = tf.contrib.rnn.BasicLSTMCell(layer_size) if isTraining == True: cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob=0.5, output_keep_prob=0.5) cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob=0.5, output_keep_prob=0.5) if arg.use_bert: # we already have the embeddings in this case inputs = input_data else: if arg.embedding_path: embeddings_dict = load_embedding(arg.embedding_path) word_alphabet = create_full_vocabulary() embeddings_weight = build_embedd_table(word_alphabet, embeddings_dict, embedd_dim=embed_dim, caseless=True) embedding = tf.get_variable( name="embedding", shape=embeddings_weight.shape, initializer=tf.constant_initializer(embeddings_weight), trainable=True) else: embedding = tf.get_variable('embedding', [in_vocabulary_size, embed_dim]) print("embedding shape", embedding.shape) inputs = tf.nn.embedding_lookup(embedding, input_data) state_outputs, final_state = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, inputs, sequence_length=sequence_length, dtype=tf.float32) final_state = tf.concat([ final_state[0][0], final_state[0][1], final_state[1][0], final_state[1][1] ], 1) state_outputs = tf.concat([state_outputs[0], state_outputs[1]], 2) state_shape = state_outputs.get_shape() with tf.variable_scope('attention'): slot_inputs = state_outputs if not remove_slot_attn: with tf.variable_scope('slot_attn'): attn_size = state_shape[2].value origin_shape = tf.shape(state_outputs) hidden = tf.expand_dims(state_outputs, 1) hidden_conv = tf.expand_dims(state_outputs, 2) k = tf.get_variable("AttnW", [1, 1, attn_size, attn_size]) hidden_features = tf.nn.conv2d(hidden_conv, k, [1, 1, 1, 1], "SAME") hidden_features = tf.reshape(hidden_features, origin_shape) hidden_features = tf.expand_dims(hidden_features, 1) v = tf.get_variable("AttnV", [attn_size]) slot_inputs_shape = tf.shape(slot_inputs) slot_inputs = tf.reshape(slot_inputs, [-1, attn_size]) y = core_rnn_cell._linear(slot_inputs, attn_size, True) y = tf.reshape(y, slot_inputs_shape) y = tf.expand_dims(y, 2) s = tf.reduce_sum(v * tf.tanh(hidden_features + y), [3]) a = tf.nn.softmax(s) a = tf.expand_dims(a, -1) slot_d = tf.reduce_sum(a * hidden, [2]) slot_reinforce_state = tf.expand_dims(slot_d, 2) else: attn_size = state_shape[2].value slot_d = slot_inputs slot_reinforce_state = tf.expand_dims(slot_inputs, 2) slot_inputs = tf.reshape(slot_inputs, [-1, attn_size]) intent_input = final_state with tf.variable_scope('intent_attn'): attn_size = state_shape[2].value hidden = tf.expand_dims(state_outputs, 2) k = tf.get_variable("AttnW", [1, 1, attn_size, attn_size]) hidden_features = tf.nn.conv2d(hidden, k, [1, 1, 1, 1], "SAME") v = tf.get_variable("AttnV", [attn_size]) y = core_rnn_cell._linear(intent_input, attn_size, True) y = tf.reshape(y, [-1, 1, 1, attn_size]) s = tf.reduce_sum(v * tf.tanh(hidden_features + y), [2, 3]) a = tf.nn.softmax(s) a = tf.expand_dims(a, -1) a = tf.expand_dims(a, -1) d = tf.reduce_sum(a * hidden, [1, 2]) r_intent = d intent_context_states = d if arg.priority_order == 'intent_first': for n in range(arg.iteration_num): with tf.variable_scope('intent_subnet' + str(n - 1)): attn_size = state_shape[2].value hidden = tf.expand_dims(state_outputs, 2) k1 = tf.get_variable("W1", [1, 1, attn_size, attn_size]) k2 = tf.get_variable('W2', [1, 1, attn_size, attn_size]) slot_reinforce_features = tf.nn.conv2d( slot_reinforce_state, k1, [1, 1, 1, 1], "SAME") hidden_features = tf.nn.conv2d(hidden, k2, [1, 1, 1, 1], "SAME") v1 = tf.get_variable("AttnV", [attn_size]) bias = tf.get_variable("Bias", [attn_size]) s = tf.reduce_sum( v1 * tf.tanh(hidden_features + slot_reinforce_features + bias), [2, 3]) a = tf.nn.softmax(s) a = tf.expand_dims(a, -1) a = tf.expand_dims(a, -1) r = tf.reduce_sum(a * slot_reinforce_state, [1, 2]) r_intent = r + intent_context_states intent_output = tf.concat([r_intent, intent_input], 1) with tf.variable_scope('slot_subnet' + str(n - 1)): intent_gate = core_rnn_cell._linear( r_intent, attn_size, True) intent_gate = tf.reshape( intent_gate, [-1, 1, intent_gate.get_shape()[1].value]) v1 = tf.get_variable("gateV", [attn_size]) relation_factor = v1 * tf.tanh(slot_d + intent_gate) relation_factor = tf.reduce_sum(relation_factor, [2]) relation_factor = tf.expand_dims(relation_factor, -1) slot_reinforce_state1 = slot_d * relation_factor slot_reinforce_state = tf.expand_dims( slot_reinforce_state1, 2) slot_reinforce_vector = tf.reshape(slot_reinforce_state1, [-1, attn_size]) slot_output = tf.concat( [slot_reinforce_vector, slot_inputs], 1) else: for n in range(arg.iteration_num): with tf.variable_scope('slot_subnet' + str(n - 1)): intent_gate = core_rnn_cell._linear( r_intent, attn_size, True) intent_gate = tf.reshape( intent_gate, [-1, 1, intent_gate.get_shape()[1].value]) v1 = tf.get_variable("gateV", [attn_size]) relation_factor = v1 * tf.tanh(slot_d + intent_gate) relation_factor = tf.reduce_sum(relation_factor, [2]) relation_factor = tf.expand_dims(relation_factor, -1) slot_reinforce_state = slot_d * relation_factor slot_reinforce_vector = tf.reshape(slot_reinforce_state, [-1, attn_size]) slot_output = tf.concat( [slot_reinforce_vector, slot_inputs], 1) with tf.variable_scope('intent_subnet' + str(n - 1)): attn_size = state_shape[2].value hidden = tf.expand_dims(state_outputs, 2) slot_reinforce_output = tf.expand_dims( slot_reinforce_state, 2) k1 = tf.get_variable("W1", [1, 1, attn_size, attn_size]) k2 = tf.get_variable('W2', [1, 1, attn_size, attn_size]) slot_features = tf.nn.conv2d(slot_reinforce_output, k1, [1, 1, 1, 1], "SAME") hidden_features = tf.nn.conv2d(hidden, k2, [1, 1, 1, 1], "SAME") v1 = tf.get_variable("AttnV", [attn_size]) bias = tf.get_variable("Bias", [attn_size]) s = tf.reduce_sum( v1 * tf.tanh(hidden_features + slot_features + bias), [2, 3]) a = tf.nn.softmax(s) a = tf.expand_dims(a, -1) a = tf.expand_dims(a, -1) r = tf.reduce_sum(a * slot_reinforce_output, [1, 2]) r_intent = r + intent_context_states intent_output = tf.concat([r_intent, intent_input], 1) with tf.variable_scope('intent_proj'): intent = core_rnn_cell._linear(intent_output, intent_size, True) with tf.variable_scope('slot_proj'): slot = core_rnn_cell._linear(slot_output, slot_size, True) if arg.use_crf: nstep = tf.shape(state_outputs)[1] slot = tf.reshape(slot, [-1, nstep, slot_size]) outputs = [slot, intent] return outputs
def decoder(z, reuse=False): with tf.variable_scope('decoder', reuse=reuse): fc1 = fc_tanh(z, 1024) fc2 = fc_tanh(fc1, 1024) logits = tf.contrib.layers.fully_connected(fc2, 784, activation_fn=tf.identity) return logits # Build the computation graph for training z_dim = args.z x_dim = [28, 28, 1] train_x = tf.placeholder(tf.float32, shape=[None] + [784]) train_zmean, train_zstddev = encoder(train_x, z_dim) train_z = train_zmean + tf.multiply(train_zstddev, tf.random_normal(tf.stack([tf.shape(train_x)[0], z_dim]))) zstddev_logdet = tf.reduce_mean(tf.reduce_sum(2.0 * tf.log(train_zstddev + 1e-8), axis=1)) train_xmean = decoder(train_z) # Build the computation graph for generating samples gen_z = tf.placeholder(tf.float32, shape=[None, z_dim]) gen_xmean = decoder(gen_z, reuse=True) def compute_kernel(x, y): x_size = tf.shape(x)[0] y_size = tf.shape(y)[0] dim = tf.shape(x)[1] tiled_x = tf.tile(tf.reshape(x, tf.stack([x_size, 1, dim])), tf.stack([1, y_size, 1])) tiled_y = tf.tile(tf.reshape(y, tf.stack([1, y_size, dim])), tf.stack([x_size, 1, 1])) return tf.exp(-tf.reduce_mean(tf.square(tiled_x - tiled_y), axis=2) / tf.cast(dim, tf.float32))
#scenario 3: with 1 GB data set #pd_1 = pd.read_csv('/user-home/1001/data/gdelt1gb.csv', header=None, index_col=0) #scenario 4: with 15GB data set (CPU only) #pd_1 = pd.read_csv('/user-home/1001/data/gdelt-skgm-300-16-8_v2.csv', header=None, index_col=0) df_1 = pd_1.as_matrix() df_ph = tf.placeholder(tf.float64, shape=pd_1.shape) points = tf.get_variable("points", shape=pd_1.shape, dtype=tf.float64, initializer=tf.zeros_initializer()) centroids = tf.get_variable("centroids", shape=[clusters_n, pd_1.shape[1]], dtype=tf.float64, initializer=tf.zeros_initializer()) points_expanded = tf.expand_dims(points, 0) centroids_expanded = tf.expand_dims(centroids.initialized_value(), 1) distances = tf.reduce_sum(tf.square(tf.subtract(points_expanded, centroids_expanded)), 2) assignments = tf.argmin(distances, 0) assignments = tf.to_int32(assignments) partitions = tf.dynamic_partition(points, assignments, clusters_n) new_centroids = tf.concat([tf.expand_dims(tf.reduce_mean(partition, 0), 0) for partition in partitions], 0) update_centroids = tf.assign(centroids, new_centroids) init = tf.global_variables_initializer() # Want TensorFlow to not allocate "all of the memory" for the GPUs visible to it from keras import backend as K config = tf.ConfigProto() config.allow_soft_placement=True config.gpu_options.allow_growth=True
embed_dim=arg.embed_dim) slots_shape = tf.shape(slots) slots_reshape = tf.reshape(slots, [-1]) slot_outputs = training_outputs[0] with tf.variable_scope('slot_loss'): if arg.use_crf: log_likelihood, trans_params = tf.contrib.crf.crf_log_likelihood( slot_outputs, slots, sequence_length) slot_loss = tf.reduce_mean(-log_likelihood) else: crossent = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=slots_reshape, logits=slot_outputs) crossent = tf.reshape(crossent, slots_shape) slot_loss = tf.reduce_sum(crossent * slot_weights, 1) total_size = tf.reduce_sum(slot_weights, 1) total_size += 1e-12 slot_loss = slot_loss / total_size intent_output = training_outputs[1] with tf.variable_scope('intent_loss'): crossent = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=intent, logits=intent_output) intent_loss = tf.reduce_sum(crossent) / tf.cast(arg.batch_size, tf.float32) params = tf.trainable_variables() learning_rate = tf.train.exponential_decay(arg.learning_rate, global_step, arg.decay_steps, arg.decay_rate, staircase=False)
# Compute the average NCE loss for the batch. # tf.nce_loss automatically draws a new sample of the negative labels each # time we evaluate the loss. loss = tf.reduce_mean( tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, labels=train_labels, inputs=embed, num_sampled=num_sampled, num_classes=vocabulary_size)) # Construct the SGD optimizer using a learning rate of 1.0. optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss) # Compute the cosine similarity between minibatch examples and all embeddings. norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) normalized_embeddings = embeddings / norm valid_embeddings = tf.nn.embedding_lookup( normalized_embeddings, valid_dataset) similarity = tf.matmul( valid_embeddings, normalized_embeddings, transpose_b=True) # Add variable initializer. init = tf.global_variables_initializer() # Step 5: Begin training. num_steps = 100001 with tf.Session(graph=graph) as session: # We must initialize all variables before we use them. init.run()
from tensorflow.examples.tutorials.mnist import input_data import tensorflow as tf # 데이터 로드 mnist = input_data.read_data_sets('/tmp/tensorflow/mnist/input_data', one_hot=True) # 모델 정의 x = tf.placeholder(tf.float32, [None, 784]) y_ = tf.placeholder(tf.float32, [None, 10]) W = tf.Variable(tf.zeros([784, 10])) b = tf.Variable(tf.zeros([10])) y = tf.nn.softmax(tf.matmul(x, W) + b) #bias 가중치 # 크로스 엔트로피와 옵티마이저 정의 learnin_rate = 0.5 cross_entropy = tf.reduce_mean( -tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) train_step = tf.train.GradientDescentOptimizer( learning_rate=learnin_rate).minimize(cross_entropy)
def loss_fn(x, t, w): t = tf.multiply(2.0, t) - 1 costs = tf.maximum(0.0, 1.0 - tf.multiply(t, x)) weighted_costs = tf.multiply(costs, w) return tf.reduce_sum(weighted_costs)
# We need an operation to copy the online DQN to the target DQN copy_ops = [ target_var.assign(online_vars[var_name]) for var_name, target_var in target_vars.items() ] copy_online_to_target = tf.group(*copy_ops) # Now for the training operations learning_rate = 0.001 momentum = 0.95 with tf.variable_scope("train"): X_action = tf.placeholder(tf.int32, shape=[None]) y = tf.placeholder(tf.float32, shape=[None, 1]) q_value = tf.reduce_sum(online_q_values * tf.one_hot(X_action, n_outputs), axis=1, keep_dims=True) error = tf.abs(y - q_value) clipped_error = tf.clip_by_value(error, 0.0, 1.0) linear_error = 2 * (error - clipped_error) loss = tf.reduce_mean(tf.square(clipped_error) + linear_error) global_step = tf.Variable(0, trainable=False, name='global_step') optimizer = tf.train.MomentumOptimizer(learning_rate, momentum, use_nesterov=True) training_op = optimizer.minimize(loss, global_step=global_step) init = tf.global_variables_initializer() saver = tf.train.Saver()
def policy_activation(self, predictions): policies = tf.stack([policy(predictions) for policy in self.policies], axis=-1) meta_policy = tf.reshape(self.meta_policy(), [1, 1, self.num_policies()]) return tf.reduce_sum(policies * meta_policy, axis=-1)
final_output = tf.reshape(train_outputs, [-1, num_nodes[-1]]) # Computing logits logits = tf.matmul(final_output, w) + b # Computing predictions train_prediction = tf.nn.softmax(logits) # Reshape logits to time-major fashion [seq_len, batch_size, vocabulary_size] time_major_train_logits = tf.reshape(logits, [FLAGS.seq_len, FLAGS.batch_size, -1]) # We create train labels in a time major fashion [seq_len, batch_size, vocabulary_size] # so that this could be used with the loss function time_major_train_labels = tf.reshape(tf.concat(train_labels, axis=0), [FLAGS.seq_len, FLAGS.batch_size]) # Perplexity related operation train_perplexity_without_exp = tf.reduce_sum(tf.concat(train_labels_ohe, 0) * -tf.log(train_prediction + 1e-10)) / ( FLAGS.seq_len * FLAGS.batch_size) # ========================================================= # Validation inference logic # Separate state for validation data initial_valid_state = stacked_cell.zero_state(1, dtype=tf.float32) # Validation input related LSTM computation valid_outputs, initial_valid_state = tf.nn.dynamic_rnn( stacked_cell, tf.expand_dims(valid_inputs_embeds, 0), time_major=True, initial_state=initial_valid_state ) # Reshape the final outputs to [1, num_nodes]
def build(self): # ========== eager execution problem in tf2 ============= # create the variable inputs and targets self.inputs = tf.compat.v1.placeholder(tf.int32, [None, self.max_time_steps],name='inputs') self.targets = tf.compat.v1.placeholder(tf.int32, [None, self.max_time_steps],name='targets') # self.inputs = tf.keras.Input(shape=[None, self.max_time_steps],dtype = tf.dtypes.int32) # self.targets = tf.keras.Input(shape=[None, self.max_time_steps],dtype = tf.dtypes.int32) # look up embeddings for the given ids from a list of tensors # self.inputs_emb = tf.nn.embedding_lookup(params=self.embedding, ids=self.inputs) self.inputs_emb = tf.nn.embedding_lookup(params=self.embedding, ids=self.inputs) self.inputs_emb = tf.transpose(a=self.inputs_emb, perm=[1, 0, 2]) self.inputs_emb = tf.reshape(self.inputs_emb, [-1, self.emb_dim]) self.inputs_emb = tf.split(self.inputs_emb, self.max_time_steps, 0) # ================ lstm cell ================= if self.biderectional: lstm_cell_fw = self.cell lstm_cell_bw = self.cell # dropout if self.is_training: lstm_cell_fw = tf.compat.v1.nn.rnn_cell.DropoutWrapper(lstm_cell_fw, output_keep_prob=(1 - self.dropout_rate)) lstm_cell_bw = tf.compat.v1.nn.rnn_cell.DropoutWrapper(lstm_cell_bw, output_keep_prob=(1 - self.dropout_rate)) lstm_cell_fw = tf.compat.v1.nn.rnn_cell.MultiRNNCell([lstm_cell_fw] * self.num_layers) lstm_cell_bw = tf.compat.v1.nn.rnn_cell.MultiRNNCell([lstm_cell_bw] * self.num_layers) # get the length of each sample # self.length = tf.reduce_sum(input_tensor=tf.sign(self.inputs), axis=1) self.length = tf.reduce_sum(input_tensor=tf.sign(self.inputs), axis=1) self.length = tf.cast(self.length, tf.int32) # forward and backward # outputs, _, _ = tf.compat.v1.nn.rnn_cell.static_bidirectional_rnn( outputs, _, _ = tf.compat.v1.nn.static_bidirectional_rnn( lstm_cell_fw, lstm_cell_bw, self.inputs_emb, dtype=tf.float32, sequence_length=self.length ) else: lstm_cell = self.cell if self.is_training: lstm_cell = tf.compat.v1.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=(1 - self.dropout_rate)) lstm_cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell([lstm_cell] * self.num_layers) self.length = tf.reduce_sum(input_tensor=tf.sign(self.inputs), axis=1) self.length = tf.cast(self.length, tf.int32) outputs, _ = tf.compat.v1.nn.rnn_cell.static_rnn( lstm_cell, self.inputs_emb, dtype=tf.float32, sequence_length=self.length ) # outputs: list_steps[batch, 2*dim] outputs = tf.concat(outputs, 1) outputs = tf.reshape(outputs, [self.batch_size, self.max_time_steps, self.hidden_dim * 2]) # self attention module if self.is_attention: H1 = tf.reshape(outputs, [-1, self.hidden_dim * 2]) W_a1 = tf.compat.v1.get_variable("W_a1", shape=[self.hidden_dim * 2, self.attention_dim], initializer=self.initializer, trainable=True) u1 = tf.matmul(H1, W_a1) H2 = tf.reshape(tf.identity(outputs), [-1, self.hidden_dim * 2]) W_a2 = tf.compat.v1.get_variable("W_a2", shape=[self.hidden_dim * 2, self.attention_dim], initializer=self.initializer, trainable=True) u2 = tf.matmul(H2, W_a2) u1 = tf.reshape(u1, [self.batch_size, self.max_time_steps, self.hidden_dim * 2]) u2 = tf.reshape(u2, [self.batch_size, self.max_time_steps, self.hidden_dim * 2]) u = tf.matmul(u1, u2, transpose_b=True) # Array of weights for each time step A = tf.nn.softmax(u, name="attention") outputs = tf.matmul(A, tf.reshape(tf.identity(outputs), [self.batch_size, self.max_time_steps, self.hidden_dim * 2])) # linear self.outputs = tf.reshape(outputs, [-1, self.hidden_dim * 2]) self.softmax_w = tf.compat.v1.get_variable("softmax_w", [self.hidden_dim * 2, self.num_classes], initializer=self.initializer) self.softmax_b = tf.compat.v1.get_variable("softmax_b", [self.num_classes], initializer=self.initializer) self.logits = tf.matmul(self.outputs, self.softmax_w) + self.softmax_b self.logits = tf.reshape(self.logits, [self.batch_size, self.max_time_steps, self.num_classes]) # print(self.logits.get_shape().as_list()) if not self.is_crf: # softmax softmax_out = tf.nn.softmax(self.logits, axis=-1) self.batch_pred_sequence = tf.cast(tf.argmax(input=softmax_out, axis=-1), tf.int32) losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=self.targets) mask = tf.sequence_mask(self.length) self.losses = tf.boolean_mask(tensor=losses, mask=mask) self.loss = tf.reduce_mean(input_tensor=losses) else: # crf # self.log_likelihood, self.transition_params = tf.contrib.crf.crf_log_likelihood( # self.logits, self.targets, self.length) self.log_likelihood, self.transition_params = tfa.text.crf.crf_log_likelihood( self.logits, self.targets, self.length) self.batch_pred_sequence, self.batch_viterbi_score = tfa.text.crf.crf_decode(self.logits, self.transition_params, self.length) self.loss = tf.reduce_mean(input_tensor=-self.log_likelihood) self.train_summary = tf.compat.v1.summary.scalar("loss", self.loss) self.dev_summary = tf.compat.v1.summary.scalar("loss", self.loss) self.opt_op = self.optimizer.minimize(self.loss, global_step=self.global_step)
def learn( make_env, make_policy, *, n_episodes, horizon, delta, gamma, max_iters, sampler=None, use_natural_gradient=False, #can be 'exact', 'approximate' fisher_reg=1e-2, iw_method='is', iw_norm='none', bound='J', line_search_type='parabola', save_weights=0, improvement_tol=0., center_return=False, render_after=None, max_offline_iters=100, callback=None, clipping=False, entropy='none', positive_return=False, reward_clustering='none', capacity=10): np.set_printoptions(precision=3) max_samples = horizon * n_episodes if line_search_type == 'binary': line_search = line_search_binary elif line_search_type == 'parabola': line_search = line_search_parabola else: raise ValueError() # Building the environment env = make_env() ob_space = env.observation_space ac_space = env.action_space # Creating the memory buffer memory = Memory(capacity=capacity, batch_size=n_episodes, horizon=horizon, ob_space=ob_space, ac_space=ac_space) # Building the target policy and saving its parameters pi = make_policy('pi', ob_space, ac_space) all_var_list = pi.get_trainable_variables() var_list = [ v for v in all_var_list if v.name.split('/')[1].startswith('pol') ] shapes = [U.intprod(var.get_shape().as_list()) for var in var_list] n_parameters = sum(shapes) # Building a set of behavioral policies behavioral_policies = memory.build_policies(make_policy, pi) # Placeholders ob_ = ob = U.get_placeholder_cached(name='ob') ac_ = pi.pdtype.sample_placeholder([None], name='ac') mask_ = tf.placeholder(dtype=tf.float32, shape=(None), name='mask') rew_ = tf.placeholder(dtype=tf.float32, shape=(None), name='rew') disc_rew_ = tf.placeholder(dtype=tf.float32, shape=(None), name='disc_rew') clustered_rew_ = tf.placeholder(dtype=tf.float32, shape=(None)) gradient_ = tf.placeholder(dtype=tf.float32, shape=(n_parameters, 1), name='gradient') iter_number_ = tf.placeholder(dtype=tf.int32, name='iter_number') active_policies = tf.placeholder(dtype=tf.float32, shape=(capacity), name='active_policies') losses_with_name = [] # Total number of trajectories N_total = tf.reduce_sum(active_policies) * n_episodes # Split operations disc_rew_split = tf.reshape(disc_rew_ * mask_, [-1, horizon]) rew_split = tf.reshape(rew_ * mask_, [-1, horizon]) mask_split = tf.reshape(mask_, [-1, horizon]) # Policy densities target_log_pdf = pi.pd.logp(ac_) * mask_ target_log_pdf_split = tf.reshape(target_log_pdf, [-1, horizon]) behavioral_log_pdfs = tf.stack([ bpi.pd.logp(ac_) * mask_ for bpi in memory.policies ]) # Shape is (capacity, ntraj*horizon) behavioral_log_pdfs_split = tf.reshape(behavioral_log_pdfs, [memory.capacity, -1, horizon]) # Compute renyi divergencies and sum over time, then exponentiate emp_d2_split = tf.reshape( tf.stack([pi.pd.renyi(bpi.pd, 2) * mask_ for bpi in memory.policies]), [memory.capacity, -1, horizon]) emp_d2_split_cum = tf.exp(tf.reduce_sum(emp_d2_split, axis=2)) # Compute arithmetic and harmonic mean of emp_d2 emp_d2_mean = tf.reduce_mean(emp_d2_split_cum, axis=1) emp_d2_arithmetic = tf.reduce_sum( emp_d2_mean * active_policies) / tf.reduce_sum(active_policies) emp_d2_harmonic = tf.reduce_sum(active_policies) / tf.reduce_sum( 1 / emp_d2_mean) # Return processing: clipping, centering, discounting ep_return = clustered_rew_ #tf.reduce_sum(mask_split * disc_rew_split, axis=1) if clipping: rew_split = tf.clip_by_value(rew_split, -1, 1) if center_return: ep_return = ep_return - tf.reduce_mean(ep_return) rew_split = rew_split - (tf.reduce_sum(rew_split) / (tf.reduce_sum(mask_split) + 1e-24)) discounter = [pow(gamma, i) for i in range(0, horizon)] # Decreasing gamma discounter_tf = tf.constant(discounter) disc_rew_split = rew_split * discounter_tf # Reward statistics return_mean = tf.reduce_mean(ep_return) return_std = U.reduce_std(ep_return) return_max = tf.reduce_max(ep_return) return_min = tf.reduce_min(ep_return) return_abs_max = tf.reduce_max(tf.abs(ep_return)) return_step_max = tf.reduce_max(tf.abs(rew_split)) # Max step reward return_step_mean = tf.abs(tf.reduce_mean(rew_split)) positive_step_return_max = tf.maximum(0.0, tf.reduce_max(rew_split)) negative_step_return_max = tf.maximum(0.0, tf.reduce_max(-rew_split)) return_step_maxmin = tf.abs(positive_step_return_max - negative_step_return_max) losses_with_name.extend([(return_mean, 'InitialReturnMean'), (return_max, 'InitialReturnMax'), (return_min, 'InitialReturnMin'), (return_std, 'InitialReturnStd'), (emp_d2_arithmetic, 'EmpiricalD2Arithmetic'), (emp_d2_harmonic, 'EmpiricalD2Harmonic'), (return_step_max, 'ReturnStepMax'), (return_step_maxmin, 'ReturnStepMaxmin')]) if iw_method == 'is': # Sum the log prob over time. Shapes: target(Nep, H), behav (Cap, Nep, H) target_log_pdf_episode = tf.reduce_sum(target_log_pdf_split, axis=1) behavioral_log_pdf_episode = tf.reduce_sum(behavioral_log_pdfs_split, axis=2) # To avoid numerical instability, compute the inversed ratio log_inverse_ratio = behavioral_log_pdf_episode - target_log_pdf_episode abc = tf.exp(log_inverse_ratio) * tf.expand_dims(active_policies, -1) iw = 1 / tf.reduce_sum( tf.exp(log_inverse_ratio) * tf.expand_dims(active_policies, -1), axis=0) iwn = iw / n_episodes # Compute the J w_return_mean = tf.reduce_sum(ep_return * iwn) # Empirical D2 of the mixture and relative ESS ess_renyi_arithmetic = N_total / emp_d2_arithmetic ess_renyi_harmonic = N_total / emp_d2_harmonic # Log quantities losses_with_name.extend([ (tf.reduce_max(iw), 'MaxIW'), (tf.reduce_min(iw), 'MinIW'), (tf.reduce_mean(iw), 'MeanIW'), (U.reduce_std(iw), 'StdIW'), (tf.reduce_min(target_log_pdf_episode), 'MinTargetPdf'), (tf.reduce_min(behavioral_log_pdf_episode), 'MinBehavPdf'), (ess_renyi_arithmetic, 'ESSRenyiArithmetic'), (ess_renyi_harmonic, 'ESSRenyiHarmonic') ]) else: raise NotImplementedError() if bound == 'J': bound_ = w_return_mean elif bound == 'max-d2-harmonic': bound_ = w_return_mean - tf.sqrt( (1 - delta) / (delta * ess_renyi_harmonic)) * return_abs_max elif bound == 'max-d2-arithmetic': bound_ = w_return_mean - tf.sqrt( (1 - delta) / (delta * ess_renyi_arithmetic)) * return_abs_max else: raise NotImplementedError() # Policy entropy for exploration ent = pi.pd.entropy() meanent = tf.reduce_mean(ent) losses_with_name.append((meanent, 'MeanEntropy')) # Add policy entropy bonus if entropy != 'none': scheme, v1, v2 = entropy.split(':') if scheme == 'step': entcoeff = tf.cond(iter_number_ < int(v2), lambda: float(v1), lambda: float(0.0)) losses_with_name.append((entcoeff, 'EntropyCoefficient')) entbonus = entcoeff * meanent bound_ = bound_ + entbonus elif scheme == 'lin': ip = tf.cast(iter_number_ / max_iters, tf.float32) entcoeff_decay = tf.maximum( 0.0, float(v2) + (float(v1) - float(v2)) * (1.0 - ip)) losses_with_name.append((entcoeff_decay, 'EntropyCoefficient')) entbonus = entcoeff_decay * meanent bound_ = bound_ + entbonus elif scheme == 'exp': ent_f = tf.exp( -tf.abs(tf.reduce_mean(iw) - 1) * float(v2)) * float(v1) losses_with_name.append((ent_f, 'EntropyCoefficient')) bound_ = bound_ + ent_f * meanent else: raise Exception('Unrecognized entropy scheme.') losses_with_name.append((w_return_mean, 'ReturnMeanIW')) losses_with_name.append((bound_, 'Bound')) losses, loss_names = map(list, zip(*losses_with_name)) ''' if use_natural_gradient: p = tf.placeholder(dtype=tf.float32, shape=[None]) target_logpdf_episode = tf.reduce_sum(target_log_pdf_split * mask_split, axis=1) grad_logprob = U.flatgrad(tf.stop_gradient(iwn) * target_logpdf_episode, var_list) dot_product = tf.reduce_sum(grad_logprob * p) hess_logprob = U.flatgrad(dot_product, var_list) compute_linear_operator = U.function([p, ob_, ac_, disc_rew_, mask_], [-hess_logprob]) ''' assert_ops = tf.group(*tf.get_collection('asserts')) print_ops = tf.group(*tf.get_collection('prints')) compute_lossandgrad = U.function([ ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_, active_policies ], losses + [U.flatgrad(bound_, var_list), assert_ops, print_ops]) compute_grad = U.function([ ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_, active_policies ], [U.flatgrad(bound_, var_list), assert_ops, print_ops]) compute_bound = U.function([ ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_, active_policies ], [bound_, assert_ops, print_ops]) compute_losses = U.function([ ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_, active_policies ], losses) #compute_temp = U.function([ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_, active_policies], [log_inverse_ratio, abc, iw]) set_parameter = U.SetFromFlat(var_list) get_parameter = U.GetFlat(var_list) if sampler is None: seg_gen = traj_segment_generator(pi, env, n_episodes, horizon, stochastic=True) sampler = type("SequentialSampler", (object, ), { "collect": lambda self, _: seg_gen.__next__() })() U.initialize() # Starting optimizing episodes_so_far = 0 timesteps_so_far = 0 iters_so_far = 0 tstart = time.time() lenbuffer = deque(maxlen=n_episodes) rewbuffer = deque(maxlen=n_episodes) while True: iters_so_far += 1 if render_after is not None and iters_so_far % render_after == 0: if hasattr(env, 'render'): render(env, pi, horizon) if callback: callback(locals(), globals()) if iters_so_far >= max_iters: print('Finished...') break logger.log('********** Iteration %i ************' % iters_so_far) theta = get_parameter() with timed('sampling'): seg = sampler.collect(theta) add_disc_rew(seg, gamma) lens, rets = seg['ep_lens'], seg['ep_rets'] lenbuffer.extend(lens) rewbuffer.extend(rets) episodes_so_far += len(lens) timesteps_so_far += sum(lens) # Adding batch of trajectories to memory memory.add_trajectory_batch(seg) # Get multiple batches from memory seg_with_memory = memory.get_trajectories() # Get clustered reward reward_matrix = np.reshape( seg_with_memory['disc_rew'] * seg_with_memory['mask'], (-1, horizon)) ep_reward = np.sum(reward_matrix, axis=1) ep_reward = cluster_rewards(ep_reward, reward_clustering) args = ob, ac, rew, disc_rew, clustered_rew, mask, iter_number, active_policies = ( seg_with_memory['ob'], seg_with_memory['ac'], seg_with_memory['rew'], seg_with_memory['disc_rew'], ep_reward, seg_with_memory['mask'], iters_so_far, memory.get_active_policies_mask()) def evaluate_loss(): loss = compute_bound(*args) return loss[0] def evaluate_gradient(): gradient = compute_grad(*args) return gradient[0] if use_natural_gradient: def evaluate_fisher_vector_prod(x): return compute_linear_operator(x, *args)[0] + fisher_reg * x def evaluate_natural_gradient(g): return cg(evaluate_fisher_vector_prod, g, cg_iters=10, verbose=0) else: evaluate_natural_gradient = None with timed('summaries before'): logger.record_tabular("Iteration", iters_so_far) logger.record_tabular("InitialBound", evaluate_loss()) logger.record_tabular("EpLenMean", np.mean(lenbuffer)) logger.record_tabular("EpRewMean", np.mean(rewbuffer)) logger.record_tabular("EpThisIter", len(lens)) logger.record_tabular("EpisodesSoFar", episodes_so_far) logger.record_tabular("TimestepsSoFar", timesteps_so_far) logger.record_tabular("TimeElapsed", time.time() - tstart) if save_weights > 0 and iters_so_far % save_weights == 0: logger.record_tabular('Weights', str(get_parameter())) import pickle file = open('checkpoint' + str(iters_so_far) + '.pkl', 'wb') pickle.dump(theta, file) with timed("offline optimization"): theta, improvement = optimize_offline( theta, set_parameter, line_search, evaluate_loss, evaluate_gradient, evaluate_natural_gradient, max_offline_ite=max_offline_iters) set_parameter(theta) with timed('summaries after'): meanlosses = np.array(compute_losses(*args)) for (lossname, lossval) in zip(loss_names, meanlosses): logger.record_tabular(lossname, lossval) logger.dump_tabular() env.close()
def loss(self, predictions, policy, cfv): r = tf.stop_gradient( cpea.rm_policy(cfv - tf.reduce_sum(cfv * policy, axis=1, keepdims=True))) error = tf.square(r - predictions) / 2.0 return tf.reduce_mean(tf.reduce_sum(error, axis=1))
b_fc2 = bias_variable([labels_count]) y = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) #print (y.get_shape()) # => (40000, 10) # To evaluate network performance we use [cross-entropy](https://en.wikipedia.org/wiki/Cross_entropy) and to minimise it [ADAM optimiser](http://arxiv.org/pdf/1412.6980v8.pdf) is used. # # ADAM optimiser is a gradient based optimization algorithm, based on adaptive estimates, it's more sophisticated than steepest gradient descent and is well suited for problems with large data or many parameters. # In[ ]: # cost function cross_entropy = -tf.reduce_sum(y_*tf.log(y)) # optimisation function train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(cross_entropy) # evaluation correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) # To predict values from test data, highest probability is picked from "one-hot vector" indicating that chances of an image being one of the digits are highest. # In[ ]:
def loss(self, predictions, policy, cfv): r = tf.stop_gradient( cpea.rm_policy(cfv - tf.reduce_sum(cfv * policy, axis=1, keepdims=True))) log_policy = tf.log(tf.clip_by_value(policy, 1e-15, 1 - 1e-15)) return -tf.reduce_mean(tf.reduce_sum(r * log_policy, axis=1))
def built_model(self): with tf.variable_scope("position"): weight_depth = tf.get_variable( "weight_depth", shape=[MAX_DEPTH, OUTPUT_LENTH], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1)) weight_son = tf.get_variable( "weight_son", shape=[MAX_TREE_SIZE, OUTPUT_LENTH], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1)) gather = tf.gather(weight_son, self.position, axis=0) muti = tf.multiply(gather, weight_depth) self.position = tf.reduce_sum(muti, axis=2) with tf.variable_scope("feature_embedding"): weight = tf.get_variable( "weight", shape=[VOCAB_SIZE, OUTPUT_LENTH], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1)) self.embedding = tf.nn.embedding_lookup(weight, self.input_tensor) with tf.variable_scope("lstm"): lstm_input = tf.concat([self.position, self.embedding], axis=-1) x = tf.unstack(lstm_input, SEQUENCE_NUM, 1) lstm_fw_cell = [] lstm_bw_cell = [] for _ in range(3): lstm_fw_cell.append( tf.contrib.rnn.BasicLSTMCell(HIDDEN_SIZE, forget_bias=1.0)) lstm_bw_cell.append( tf.contrib.rnn.BasicLSTMCell(HIDDEN_SIZE, forget_bias=1.0)) #lstm_qx = tf.contrib.rnn.BasicLSTMCell(HIDDEN_SIZE,forget_bias =1.0) #lstm_hx = tf.contrib.rnn.BasicLSTMCell(HIDDEN_SIZE,forget_bias = 1.0) mul_lstm_fw_cell = tf.contrib.rnn.MultiRNNCell(lstm_fw_cell) mul_lstm_bw_cell = tf.contrib.rnn.MultiRNNCell(lstm_bw_cell) output, fw_state, bw_state = tf.contrib.rnn.static_bidirectional_rnn( mul_lstm_fw_cell, mul_lstm_bw_cell, x, dtype=tf.float32, sequence_lenth=self.sequence_num) # lstm_concat = tf.concat(output,axis=-1) # lstm_concat = tf.transpose(lstm_concat,[1,0,2]) self.lstm_output = tf.concat([fw_state[-1].h, bw_state[-1].h], -1) with tf.variable_scope("fc_1"): fc = tf.layers.dense( self.lstm_output, DENSE1, activation=tf.nn.relu, ) self.fc_1 = tf.layers.dropout(fc, self.drop_rate) #1-KEEPDROP with tf.variable_scope("fc_2"): fc = tf.layers.dense(self.fc_1, DENSE2, activation=tf.nn.relu) self.fc_2 = tf.layers.dropout(fc, self.drop_rate) with tf.variable_scope("softmax_layer"): self.output_tensor = tf.layers.dense(self.fc_2, DENSE3)