def mean(mean, variance, std=False): '''Output mean of ReLU for general Gaussian input. f(x) = max(x, 0). This function is broadcast-able, so you can provide multiple input means with a single variance or multiple input variances with a single input mean or multiple input means and variances. Args: mean: Input mean of size (Batch, Size). variance: Input variance vector (Batch, Size) or scalar v such that variance = v * ones(Size). std: Whether the provided `variance` is the standard deviation. Returns: Output mean of ReLU for general Gaussian input (Batch, Size). ''' std = variance if std else tf.sqrt(variance) zero_mean = std / tf.sqrt(2.0 * math.pi) if mean is None: return zero_mean # efficient computation when mean is zeros u = mean / (math.sqrt(2.0) * std) bias = 0.5 * mean * (1.0 + tf.erf(u)) return zero_mean * tf.exp(-u ** 2.0) + bias
def soft_triplet_loss(anchor, positive, negative, extra=True, scope="soft_triplet_loss"): r"""Loss for triplet networks as described in the paper: `Deep Metric Learning using Triplet Network <https://arxiv.org/abs/1412.6622>`_ by Hoffer et al. It is a softmax loss using :math:`(anchor-positive)^2` and :math:`(anchor-negative)^2` as logits. Args: anchor (tf.Tensor): anchor feature vectors of shape [Batch, N]. positive (tf.Tensor): features of positive match of the same shape. negative (tf.Tensor): features of negative match of the same shape. extra (bool): also return distances for pos and neg. Returns: tf.Tensor: triplet-loss as scalar (and optionally average_pos_dist, average_neg_dist) """ eps = 1e-10 with tf.name_scope(scope): d_pos = tf.sqrt(tf.reduce_sum(tf.square(anchor - positive), 1) + eps) d_neg = tf.sqrt(tf.reduce_sum(tf.square(anchor - negative), 1) + eps) logits = tf.stack([d_pos, d_neg], axis=1) ones = tf.ones_like(tf.squeeze(d_pos), dtype="int32") loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=ones)) if extra: pos_dist = tf.reduce_mean(d_pos, name='pos-dist') neg_dist = tf.reduce_mean(d_neg, name='neg-dist') return loss, pos_dist, neg_dist else: return loss
def adam(params, cost_or_grads, alpha=3e-4, hps=None, epsilon=1e-8): updates = [] if type(cost_or_grads) is not list: gs = tf.gradients(cost_or_grads, params) else: gs = cost_or_grads beta2 = 1-1./(hps.train_its*hps.polyak_epochs) # all-reduce grads = [Z.allreduce_mean(g) for g in gs] t = tf.Variable(1., 'adam_t') alpha_t = alpha * tf.sqrt((1. - tf.pow(beta2, t))) / \ (1. - tf.pow(hps.beta1, t)) updates.append(t.assign_add(1)) for w, g in zip(params, grads): mom2 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m2') if hps.beta1 > 0: mom1 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m1') mom1_new = hps.beta1 * mom1 + (1. - hps.beta1) * g updates.append(mom1.assign(mom1_new)) else: mom1_new = g m2_new = beta2 * mom2 + (1. - beta2) * tf.square(g) delta_t = mom1_new / (tf.sqrt(m2_new) + epsilon) w_new = hps.weight_decay * w - alpha_t * delta_t updates.append(mom2.assign(m2_new)) updates.append(w.assign(w_new)) # Polyak averaging polyak_avg_op, polyak_swap_op, ema = polyak(params, beta2) train_op = tf.group(polyak_avg_op, *updates) return train_op, polyak_swap_op, ema
def update_phis(self, ii, dirname): # Assumes a_matr is a_optimal is set. Will have to be so by script that runs this function after the loop # a_matr_cast = tf.cast(self.a_matr, tf.float64) # phis_cast = tf.cast(self.phis, tf.float64) # residual = tf.cast(self.data,tf.float64) - tf.matmul(phis_cast,a_matr_cast) # residual_sum = tf.reduce_mean(tf.reduce_sum(residual, reduction_indices = 0)) # val_error = self.sess.run(residual_sum) residual = self.data - tf.matmul(self.phis, self.a_matr) residual_sum = tf.reduce_mean(tf.reduce_sum(residual, reduction_indices = 0)) # Visualize input here # print("plotting data") # self.plot_obj.plot_input_data(self.sess.run(self.data), ii) # self.plot_obj.plot_reconstructions(self.sess.run(tf.matmul(phis_cast, a_matr_cast)), ii) # print("Val of Residual error after we do learningis: {}".format(val_error)) # self.reconstruction_error_array.append(val_error) dbasis = (1/self.batch_size)* tf.matmul(residual, tf.transpose(self.a_matr)) norm_grad_basis = tf.sqrt(tf.reduce_sum(dbasis ** 2, reduction_indices = 0)) dbasis = dbasis / norm_grad_basis phis = self.phis + self.LR * dbasis phi_norm = tf.sqrt(tf.reduce_sum(phis ** 2.0, reduction_indices = 0)) # self.phis = phis/phi_norm self.phis_so_far = self.sess.run(phis/phi_norm, feed_dict = {self.phis:self.phis_so_far, self.data: self.loaded_data}) # assign = tf.assign(self.phis,phis/phi_norm) # self.sess.run(assign) if ii % 100 == 0: name_of_pickle_file = dirname + "/" + "phis.pkl" output = open(name_of_pickle_file, 'wb') print("Now pickling phis for sparse coding") pickle.dump(self.phis_so_far,output) print("Done pickling") print("The value sum of active coefficients after we do learning", np.sum(np.abs(self.sess.run(self.a_matr))))
def batchnormalize(X, eps=1e-8, g=None, b=None): if X.get_shape().ndims == 4: mean = tf.reduce_mean(X, [0,1,2]) std = tf.reduce_mean( tf.square(X-mean), [0,1,2] ) X = (X-mean) / tf.sqrt(std+eps) if g is not None and b is not None: g = tf.reshape(g, [1,1,1,-1]) b = tf.reshape(b, [1,1,1,-1]) X = X*g + b elif X.get_shape().ndims == 2: mean = tf.reduce_mean(X, 0) std = tf.reduce_mean(tf.square(X-mean), 0) X = (X-mean) / tf.sqrt(std+eps)#std if g is not None and b is not None: g = tf.reshape(g, [1,-1]) b = tf.reshape(b, [1,-1]) X = X*g + b else: raise NotImplementedError return X
def _build_iid_normal_model(self, num_timesteps, latent_size, observation_size, transition_variance, observation_variance): """Build a model whose outputs are IID normal by construction.""" transition_variance = self._build_placeholder(transition_variance) observation_variance = self._build_placeholder(observation_variance) # Use orthogonal matrices to project a (potentially # high-dimensional) latent space of IID normal variables into a # low-dimensional observation that is still IID normal. random_orthogonal_matrix = lambda: np.linalg.qr( np.random.randn(latent_size, latent_size))[0][:observation_size, :] observation_matrix = self._build_placeholder(random_orthogonal_matrix()) model = tfd.LinearGaussianStateSpaceModel( num_timesteps=num_timesteps, transition_matrix=self._build_placeholder( np.zeros([latent_size, latent_size])), transition_noise=tfd.MultivariateNormalDiag( scale_diag=tf.sqrt(transition_variance) * tf.ones([latent_size], dtype=self.dtype)), observation_matrix=observation_matrix, observation_noise=tfd.MultivariateNormalDiag( scale_diag=tf.sqrt(observation_variance) * tf.ones([observation_size], dtype=self.dtype)), initial_state_prior=tfd.MultivariateNormalDiag( scale_diag=tf.sqrt(transition_variance) * tf.ones([latent_size], dtype=self.dtype)), validate_args=True) return model
def build_likelihood(self): """ Constuct a tensorflow function to compute the bound on the marginal likelihood. For a derivation of the terms in here, see the associated SGPR notebook. """ num_inducing = tf.shape(self.Z)[0] num_data = tf.shape(self.Y)[0] output_dim = tf.shape(self.Y)[1] err = self.Y - self.mean_function(self.X) Kdiag = self.kern.Kdiag(self.X) Kuf = self.kern.K(self.Z, self.X) Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6 L = tf.cholesky(Kuu) # Compute intermediate matrices A = tf.matrix_triangular_solve(L, Kuf, lower=True)*tf.sqrt(1./self.likelihood.variance) AAT = tf.matmul(A, tf.transpose(A)) B = AAT + eye(num_inducing) LB = tf.cholesky(B) c = tf.matrix_triangular_solve(LB, tf.matmul(A, err), lower=True) * tf.sqrt(1./self.likelihood.variance) #compute log marginal bound bound = -0.5*tf.cast(num_data*output_dim, tf.float64)*np.log(2*np.pi) bound += -tf.cast(output_dim, tf.float64)*tf.reduce_sum(tf.log(tf.user_ops.get_diag(LB))) bound += -0.5*tf.cast(num_data*output_dim, tf.float64)*tf.log(self.likelihood.variance) bound += -0.5*tf.reduce_sum(tf.square(err))/self.likelihood.variance bound += 0.5*tf.reduce_sum(tf.square(c)) bound += -0.5*(tf.reduce_sum(Kdiag)/self.likelihood.variance - tf.reduce_sum(tf.user_ops.get_diag(AAT))) return bound
def build_predict(self, Xnew, full_cov=False): """ Compute the mean and variance of the latent function at some new points Xnew. For a derivation of the terms in here, see the associated SGPR notebook. """ num_inducing = tf.shape(self.Z)[0] err = self.Y - self.mean_function(self.X) Kuf = self.kern.K(self.Z, self.X) Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6 Kus = self.kern.K(self.Z, Xnew) L = tf.cholesky(Kuu) A = tf.matrix_triangular_solve(L, Kuf, lower=True)*tf.sqrt(1./self.likelihood.variance) B = tf.matmul(A, tf.transpose(A)) + eye(num_inducing) LB = tf.cholesky(B) c = tf.matrix_triangular_solve(LB, tf.matmul(A, err), lower=True) * tf.sqrt(1./self.likelihood.variance) tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True) tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True) mean = tf.matmul(tf.transpose(tmp2), c) if full_cov: var = self.kern.K(Xnew) + tf.matmul(tf.transpose(tmp2), tmp2) - tf.matmul(tf.transpose(tmp1), tmp1) var = tf.tile(tf.expand_dims(var, 2), tf.pack([1,1, tf.shape(self.Y)[1]])) else: var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) - tf.reduce_sum(tf.square(tmp1), 0) var = tf.tile(tf.expand_dims(var, 1), tf.pack([1, tf.shape(self.Y)[1]])) return mean + self.mean_function(Xnew), var
def _dist_to_opt(self): """Distance to optimum. Returns: D_t ops """ dist_to_opt_ops = [] # Running average of the norm of gradient self._grad_norm = tf.sqrt(self._grad_norm_squared) avg_op = self._moving_averager.apply([self._grad_norm,]) dist_to_opt_ops.append(avg_op) with tf.control_dependencies([avg_op]): self._grad_norm_avg = self._moving_averager.average(self._grad_norm) # Single iteration distance estimation, note here # self._grad_norm_avg is per variable self._d_t = self._grad_norm_avg / self._grad_norm_squared_avg # Running average of distance avg_op = self._moving_averager.apply([self._d_t]) dist_to_opt_ops.append(avg_op) with tf.control_dependencies([avg_op]): self._dist_to_opt_avg = tf.identity( self._moving_averager.average(self._d_t)) if self._sparsity_debias: self._dist_to_opt_avg /= tf.sqrt(self._sparsity_avg) return dist_to_opt_ops # D_t
def pearsoncorrelation(ypred, y): muy_ypred = tf.reduce_mean(ypred) muy_y = tf.reduce_mean(y) numerator = tf.reduce_sum(tf.multiply(ypred - muy_ypred, y - muy_y)) denominator = tf.multiply(tf.sqrt(tf.reduce_sum(tf.square(ypred - muy_ypred))), tf.sqrt(tf.reduce_sum(tf.square(y - muy_y)))) + 1e-10 return numerator / denominator
def p_zt(self, prev_state, t): """Computes the model p(z_t| z_{t-1}).""" batch_size = tf.shape(prev_state)[0] if t > 0: z_mu_p = prev_state + self.bs[t - 1] p_zt = tf.contrib.distributions.Normal( loc=z_mu_p, scale=tf.sqrt(tf.ones_like(z_mu_p) * self.variance)) return p_zt else: # p(z_0) is mixture of two Normals mu_pos = tf.ones([batch_size, self.state_size], dtype=self.dtype) * self.prior_mode_mean mu_neg = tf.ones([batch_size, self.state_size], dtype=self.dtype) * -self.prior_mode_mean z0_pos = tf.contrib.distributions.Normal( loc=mu_pos, scale=tf.sqrt(tf.ones_like(mu_pos) * self.variance)) z0_neg = tf.contrib.distributions.Normal( loc=mu_neg, scale=tf.sqrt(tf.ones_like(mu_neg) * self.variance)) mode_probs = tf.convert_to_tensor([self.mixing_coeff, 1-self.mixing_coeff], dtype=tf.float64) mode_probs = tf.tile(mode_probs[tf.newaxis, tf.newaxis, :], [batch_size, 1, 1]) mode_selection_dist = tf.contrib.distributions.Categorical(probs=mode_probs) z0_dist = tf.contrib.distributions.Mixture( cat=mode_selection_dist, components=[z0_pos, z0_neg], validate_args=False) return z0_dist
def prob_is_largest(self, Y, mu, var, gh_x, gh_w): # work out what the mean and variance is of the indicated latent function. oh_on = tf.cast(tf.one_hot(tf.reshape(Y, (-1,)), self.num_classes, 1.0, 0.0), float_type) mu_selected = tf.reduce_sum(oh_on * mu, 1) var_selected = tf.reduce_sum(oh_on * var, 1) # generate Gauss Hermite grid X = tf.reshape(mu_selected, (-1, 1)) + gh_x * tf.reshape( tf.sqrt(tf.clip_by_value(2.0 * var_selected, 1e-10, np.inf)), (-1, 1) ) # compute the CDF of the Gaussian between the latent functions and the grid (including the selected function) dist = (tf.expand_dims(X, 1) - tf.expand_dims(mu, 2)) / tf.expand_dims( tf.sqrt(tf.clip_by_value(var, 1e-10, np.inf)), 2 ) cdfs = 0.5 * (1.0 + tf.erf(dist / np.sqrt(2.0))) cdfs = cdfs * (1 - 2e-4) + 1e-4 # blank out all the distances on the selected latent function oh_off = tf.cast(tf.one_hot(tf.reshape(Y, (-1,)), self.num_classes, 0.0, 1.0), float_type) cdfs = cdfs * tf.expand_dims(oh_off, 2) + tf.expand_dims(oh_on, 2) # take the product over the latent functions, and the sum over the GH grid. return tf.matmul(tf.reduce_prod(cdfs, reduction_indices=[1]), tf.reshape(gh_w / np.sqrt(np.pi), (-1, 1)))
def cosine_distance(v1, v2): """ Calculate the cosine distance between the representations of the words of the two sentences. Parameters ---------- v1: Tensor Tensor of shape (batch_size, 1, num_sentence_words, context_rnn_hidden_size) representing the first sentence to take the cosine similarity with. v2: Tensor Tensor of shape (batch_size, num_sentence_words, 1, context_rnn_hidden_size) representing the second sentence to take the cosine similarity with. """ # The product of the two vectors is shape # (batch_size, num_sentence_words, num_sentence_words, rnn_hidden_size) # Taking the sum over the last axis reesults in shape: # (batch_size, num_sentence_words, num_sentence_words) cosine_numerator = tf.reduce_sum(tf.multiply(v1, v2), axis=-1) # Shape: (batch_size, 1, num_sentence_words) v1_norm = tf.sqrt(tf.maximum(tf.reduce_sum(tf.square(v1), axis=-1), EPSILON)) # Shape: (batch_size, num_sentence_words, 1) v2_norm = tf.sqrt(tf.maximum(tf.reduce_sum(tf.square(v2), axis=-1), EPSILON)) # Shape: (batch_size, num_sentence_words, num_sentence_words) return cosine_numerator / v1_norm / v2_norm
def dense(x, num_units, nonlinearity=None, init_scale=1., counters={},init=False, ema=None, train_scale=True, init_w=tf.random_normal_initializer(0, 0.05),**kwargs): ''' fully connected layer ''' name = get_name('dense', counters) with tf.variable_scope(name): if init: # data based initialization of parameters V = tf.get_variable('V', [int(x.get_shape()[1]),num_units], tf.float32, init_w, trainable=True) V_norm = tf.nn.l2_normalize(V.initialized_value(), [0]) x_init = tf.matmul(x, V_norm) m_init, v_init = tf.nn.moments(x_init, [0]) scale_init = init_scale/tf.sqrt(v_init + 1e-10) # g = tf.get_variable('g', dtype=tf.float32, initializer=scale_init, trainable=train_scale) # b = tf.get_variable('b', dtype=tf.float32, initializer=-m_init*scale_init, trainable=True) g = tf.get_variable('g', dtype=tf.float32, initializer=tf.constant(np.ones(num_units),tf.float32), trainable=train_scale) b = tf.get_variable('b', dtype=tf.float32, initializer=tf.constant(np.zeros(num_units),tf.float32), trainable=True) x_init = tf.reshape(scale_init,[1,num_units])*(x_init-tf.reshape(m_init,[1,num_units])) if nonlinearity is not None: x_init = nonlinearity(x_init) return x_init else: V,g,b = get_vars_maybe_avg(['V','g','b'], ema) # tf.assert_variables_initialized([V,g,b]) # use weight normalization (Salimans & Kingma, 2016) x = tf.matmul(x, V) scaler = g/tf.sqrt(tf.reduce_sum(tf.square(V),[0])) x = tf.reshape(scaler,[1,num_units])*x + tf.reshape(b,[1,num_units]) # apply nonlinearity if nonlinearity is not None: x = nonlinearity(x) return x
def xavier_init( n_inputs, n_outputs, uniform=True ): if uniform: init_range = tf.sqrt( 6.0 / (n_inputs + n_outputs) ) return tf.random_uniform_initializer( -init_range, init_range ) else: stddev = tf.sqrt( 3.0 / (n_inputs + n_outputs) ) return tf.truncated_normal_initializer( stddev=stddev )
def summary_gradient_updates(grads, opt, lr): """get summary ops for the magnitude of gradient updates""" # strategy: # make a dict of variable name -> [variable, grad, adagrad slot] vars_grads = {} for v in tf.trainable_variables(): vars_grads[v.name] = [v, None, None] for g, v in grads: vars_grads[v.name][1] = g vars_grads[v.name][2] = opt.get_slot(v, 'accumulator') # now make summaries ret = [] for vname, (v, g, a) in vars_grads.items(): if g is None: continue if isinstance(g, tf.IndexedSlices): # a sparse gradient - only take norm of params that are updated updates = lr * g.values if a is not None: updates /= tf.sqrt(tf.gather(a, g.indices)) else: updates = lr * g if a is not None: updates /= tf.sqrt(a) values_norm = tf.sqrt(tf.reduce_sum(v * v)) + 1.0e-7 updates_norm = tf.sqrt(tf.reduce_sum(updates * updates)) ret.append(tf.summary.scalar('UPDATE/' + vname.replace(":", "_"), updates_norm / values_norm)) return ret
def __init__( self, sequence_length, vocab_size, embedding_size, hidden_units, l2_reg_lambda, batch_size, trainableEmbeddings): # Placeholders for input, output and dropout self.input_x1 = tf.placeholder(tf.int32, [None, sequence_length], name="input_x1") self.input_x2 = tf.placeholder(tf.int32, [None, sequence_length], name="input_x2") self.input_y = tf.placeholder(tf.float32, [None], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") # Keeping track of l2 regularization loss (optional) l2_loss = tf.constant(0.0, name="l2_loss") # Embedding layer with tf.name_scope("embedding"): self.W = tf.Variable( tf.constant(0.0, shape=[vocab_size, embedding_size]), trainable=trainableEmbeddings,name="W") self.embedded_words1 = tf.nn.embedding_lookup(self.W, self.input_x1) self.embedded_words2 = tf.nn.embedding_lookup(self.W, self.input_x2) print self.embedded_words1 # Create a convolution + maxpool layer for each filter size with tf.name_scope("output"): self.out1=self.stackedRNN(self.embedded_words1, self.dropout_keep_prob, "side1", embedding_size, sequence_length, hidden_units) self.out2=self.stackedRNN(self.embedded_words2, self.dropout_keep_prob, "side2", embedding_size, sequence_length, hidden_units) self.distance = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(self.out1,self.out2)),1,keep_dims=True)) self.distance = tf.div(self.distance, tf.add(tf.sqrt(tf.reduce_sum(tf.square(self.out1),1,keep_dims=True)),tf.sqrt(tf.reduce_sum(tf.square(self.out2),1,keep_dims=True)))) self.distance = tf.reshape(self.distance, [-1], name="distance") with tf.name_scope("loss"): self.loss = self.contrastive_loss(self.input_y,self.distance, batch_size) #### Accuracy computation is outside of this class. with tf.name_scope("accuracy"): self.temp_sim = tf.subtract(tf.ones_like(self.distance),tf.rint(self.distance), name="temp_sim") #auto threshold 0.5 correct_predictions = tf.equal(self.temp_sim, self.input_y) self.accuracy=tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def xavier_init(input_size, output_size, uniform=True): if uniform: init_range= tf.sqrt(6.0/(input_size+output_size)) return tf.random_uniform_initializer(stdevv=init_range) else: init_range= tf.sqrt(3.0/(input_size+output_size)) return tf.random_uniform_initializer(stdevv=init_range)
def sample_weights(self, weights): log_p = 0 log_q = 0 sampled_weights = [] for layer_i in range(len(self.network_architecture['decoder_net'])): if layer_i == 0: eps = tf.random_normal((self.n_z+1, self.network_architecture['decoder_net'][layer_i]), 0, 1, dtype=tf.float32) weights_ = tf.add(weights['l'+str(layer_i)+'mean'], tf.multiply(tf.sqrt(tf.exp(weights['l'+str(layer_i)+'logvar'])), eps)) n_decoder_weights = (self.n_z+1) * self.network_architecture['decoder_net'][layer_i] log_p += self.log_p_theta(weights_, n_decoder_weights) log_q += self.log_q_theta(weights_, weights['l'+str(layer_i)+'mean'], weights['l'+str(layer_i)+'logvar'], n_decoder_weights) else: eps = tf.random_normal((self.network_architecture['decoder_net'][layer_i-1]+1, self.network_architecture['decoder_net'][layer_i]), 0, 1, dtype=tf.float32) weights_ = tf.add(weights['l'+str(layer_i)+'mean'], tf.multiply(tf.sqrt(tf.exp(weights['l'+str(layer_i)+'logvar'])), eps)) n_decoder_weights = self.network_architecture['decoder_net'][layer_i-1]+1 * self.network_architecture['decoder_net'][layer_i] log_p += self.log_p_theta(weights_, n_decoder_weights) log_q += self.log_q_theta(weights_, weights['l'+str(layer_i)+'mean'], weights['l'+str(layer_i)+'logvar'], n_decoder_weights) sampled_weights.append(weights_) eps = tf.random_normal((self.network_architecture['decoder_net'][-1]+1, self.n_input), 0, 1, dtype=tf.float32) weights_ = tf.add(weights['out_mean_mean'], tf.multiply(tf.sqrt(tf.exp(weights['out_mean_logvar'])), eps)) sampled_weights.append(weights_) n_decoder_weights = self.network_architecture['decoder_net'][-1]+1 * self.n_input log_p += self.log_p_theta(weights_, n_decoder_weights) log_q += self.log_q_theta(weights_, weights['out_mean_mean'], weights['out_mean_logvar'], n_decoder_weights) # print log_p # print log_q # fasdf return sampled_weights, log_p, log_q
def recognition_network(self,weights,biases,batch_norm): # lin_layer = tf.add(tf.matmul(self.x,weights['l1']),biases['lb1']) # layer_1 = self.transfert_fct(tf.add(tf.matmul(lin_layer,weights['h1']),biases['b1'])) ###no batch norm # layer_1 = self.transfert_fct(tf.add(tf.matmul(self.x,weights['h1']),biases['b1'])) # layer_2 = self.transfert_fct(tf.add(tf.matmul(layer_1,weights['h2']),biases['b2'])) #batch norm epsilon = 1e-16 xm1,xv1 = tf.nn.moments(self.x,[0]) bn_x = ((self.x-xm1)/tf.sqrt(xv1+epsilon))*batch_norm['gn_x']+batch_norm['bn_x'] layer_1 = self.transfert_fct(tf.add(tf.matmul(bn_x,weights['h1']),biases['b1'])) bm1,bv1 = tf.nn.moments(layer_1,[0]) bn_1 = ((layer_1-bm1)/tf.sqrt(bv1+epsilon))*batch_norm['gn_g1']+batch_norm['gn_g1'] layer_2 = self.transfert_fct(tf.add(tf.matmul(bn_1,weights['h2']),biases['b2'])) bm2,bv2 = tf.nn.moments(layer_2,[0]) bn_2 = ((layer_2-bm2)/tf.sqrt(bv2+epsilon))*batch_norm['gn_g2']+batch_norm['gn_g2'] # z_mean = tf.add(tf.matmul(layer_2,weights['out_mean']),biases['out_mean']) z_mean = tf.add(tf.matmul(bn_2,weights['out_mean']),biases['out_mean']) #####perche' softplus???? # z_log_sigma_sq = tf.nn.softplus(tf.add(tf.matmul(layer_2,weights['out_log_sigma']),biases['out_log_sigma'])) z_log_sigma_sq = (tf.add(tf.matmul(layer_2,weights['out_log_sigma']),biases['out_log_sigma'])) return(z_mean,z_log_sigma_sq)
def dense(x, num_units, nonlinearity=None, init_scale=1., counters={}, init=False, ema=None, **kwargs): ''' fully connected layer ''' name = get_name('dense', counters) with tf.variable_scope(name): V = get_var_maybe_avg('V', ema, shape=[int(x.get_shape()[1]),num_units], dtype=tf.float32, initializer=tf.random_normal_initializer(0, 0.05), trainable=True) g = get_var_maybe_avg('g', ema, shape=[num_units], dtype=tf.float32, initializer=tf.constant_initializer(1.), trainable=True) b = get_var_maybe_avg('b', ema, shape=[num_units], dtype=tf.float32, initializer=tf.constant_initializer(0.), trainable=True) # use weight normalization (Salimans & Kingma, 2016) x = tf.matmul(x, V) scaler = g / tf.sqrt(tf.reduce_sum(tf.square(V), [0])) x = tf.reshape(scaler, [1, num_units]) * x + tf.reshape(b, [1, num_units]) if init: # normalize x m_init, v_init = tf.nn.moments(x, [0]) scale_init = init_scale/tf.sqrt(v_init + 1e-10) with tf.control_dependencies([g.assign(g*scale_init), b.assign_add(-m_init*scale_init)]): x = tf.nn.l2_normalize(x, axis=0) # apply nonlinearity if nonlinearity is not None: x = nonlinearity(x) return x
def _encode(self, boxes, anchors): """Encodes a box collection with respect to an anchor collection. Args: boxes: BoxList holding N boxes to be encoded. anchors: BoxList of anchors. Returns: a tensor representing N anchor-encoded boxes of the format [ty, tx, tl]. """ # Convert anchors to the center coordinate representation. ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() la = tf.sqrt(ha * wa) ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes() l = tf.sqrt(h * w) # Avoid NaN in division and log below. la += EPSILON l += EPSILON tx = (xcenter - xcenter_a) / la ty = (ycenter - ycenter_a) / la tl = tf.log(l / la) # Scales location targets for joint training. if self._scale_factors: ty *= self._scale_factors[0] tx *= self._scale_factors[1] tl *= self._scale_factors[2] return tf.transpose(tf.stack([ty, tx, tl]))
def evalFunction( classVec, attributeVec, groundTruthLabels ): classVec = classVec/tf.sqrt(tf.reduce_sum(tf.square(classVec), 1, keep_dims=True)) attributeVec = attributeVec / tf.sqrt(tf.reduce_sum(tf.square(attributeVec), 1, keep_dims=True)) similarity = tf.matmul(classVec, attributeVec, transpose_b=True) return similarity
def f1(): #The tensorflow path if no jump occurs vector= inter_vec_temp/tf.sqrt(new_norm) propa = prob / tf.sqrt(new_norm) #we already evolved by Heff so just normalize the state and move on with the same random number counter=tf.constant(0) t=self.r return t,counter,norm,propa,vector
def encoder(inputs, training=True, scope="encoder", reuse=None): ''' Args: inputs: A 2d tensor with shape of [N, Tx], with dtype of int32. Encoder inputs. training: Whether or not the layer is in training mode. scope: Optional scope for `variable_scope` reuse: Boolean, whether to reuse the weights of a previous layer by the same name. Returns: A collection of Hidden vectors. So-called memory. Has the shape of (N, Tx, e). ''' with tf.variable_scope(scope, reuse=reuse): with tf.variable_scope("text_embedding"): embedding = embed(inputs, hp.vocab_size, hp.embed_size) # (N, Tx, e) with tf.variable_scope("encoder_prenet"): tensor = fc_block(embedding, hp.enc_channels, training=training) # (N, Tx, c) with tf.variable_scope("encoder_conv"): for i in range(hp.enc_layers): outputs = conv_block(tensor, size=hp.enc_filter_size, rate=2**i, training=training, scope="encoder_conv_{}".format(i)) # (N, Tx, c) tensor = (outputs + tensor) * tf.sqrt(0.5) with tf.variable_scope("encoder_postnet"): keys = fc_block(tensor, hp.embed_size, training=training) # (N, Tx, e) vals = tf.sqrt(0.5) * (keys + embedding) # (N, Tx, e) return keys, vals
def disjunction_of_literals(literals, label="no_label"): list_of_literal_tensors = [lit.tensor for lit in literals] literals_tensor = tf.concat(1,list_of_literal_tensors) if default_tnorm == "product": result = 1.0-tf.reduce_prod(1.0-literals_tensor, 1, keep_dims=True) if default_tnorm == "yager2": result = tf.minimum(1.0, tf.sqrt(tf.reduce_sum(tf.square(literals_tensor), 1, keep_dims=True))) if default_tnorm == "luk": print "data aggregator is lukas" result = tf.minimum(1.0, tf.reduce_sum(literals_tensor, 1, keep_dims=True)) PR(result) if default_tnorm == "goedel": result = tf.reduce_max(literals_tensor, 1, keep_dims=True, name=label) if default_aggregator == "product": return tf.reduce_prod(result, keep_dims=True) if default_aggregator == "mean": print "data aggregator is mean" return tf.reduce_mean(result, keep_dims=True, name=label) if default_aggregator == "gmean": return tf.exp(tf.mul(tf.reduce_sum(tf.log(result), keep_dims=True), tf.inv(tf.to_float(tf.size(result)))), name=label) if default_aggregator == "hmean": print "data aggregator is hmean" return tf.div(tf.to_float(tf.size(result)), tf.reduce_sum(tf.inv(result), keep_dims=True)) if default_aggregator == "min": print "data aggregator is min" return tf.reduce_min(result, keep_dims=True, name=label) if default_aggregator == "qmean": print "data aggregator is qmean" return tf.sqrt(tf.reduce_mean(tf.square(result), keep_dims=True), name=label) if default_aggregator == "cmean": print "data aggregator is cmean" return tf.pow(tf.reduce_mean(tf.pow(result, 3), keep_dims=True), tf.inv(tf.to_float(3)), name=label)
def apply_gradients(self, grads_and_vars, global_step=None, name=None): ts = super().apply_gradients(grads_and_vars, global_step, name) mn, vn = self.get_slot_names() dynamics = [] with tf.name_scope(name, 'Adam_Dynamics'): b1_pow, b2_pow = self._beta1_power, self._beta2_power lr_k = self._lr_t * tf.sqrt(1. - b2_pow) / (1. - b1_pow) for g, w in grads_and_vars: m = self.get_slot(w, mn) v = self.get_slot(w, vn) mk = tf.add(self._beta1_t * m, (1. - self._beta1_t) * g, name=m.op.name) vk = tf.add(self._beta2_t * v, (1. - self._beta2_t) * g * g, name=v.op.name) wk = tf.subtract(w, lr_k * mk / (tf.sqrt(vk + self._epsilon_t**2)), name=w.op.name) # IMPORTANT NOTE: epsilon should be outside sqrt as from the original implementation, # but this brings to computational instability of the hypergradient. dynamics.extend([(w, wk), (m, mk), (v, vk)]) b1_powk = b1_pow * self._beta1_t b2_powk = b2_pow * self._beta2_t dynamics.extend([(b1_pow, b1_powk), (b2_pow, b2_powk)]) return ts, dynamics
def _encode(self, boxes, anchors): """Encodes a box collection with respect to an anchor collection. Args: boxes: BoxList holding N boxes to be encoded. anchors: BoxList of anchors. Returns: a tensor representing N anchor-encoded boxes of the format [ty, tx, tl]. """ # Convert anchors to the center coordinate representation. ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() la = tf.sqrt(ha * wa) ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes() l = tf.sqrt(h * w) # Avoid NaN in division and log below. la += EPSILON l += EPSILON top = tf.abs(ycenter_a - ycenter + 0.5*h) bown = tf.abs(ycenter_a - ycenter - 0.5*h) left = tf.abs(xcenter_a - xcenter + 0.5*w) right = tf.abs(xcenter_a - xcenter - 0.5*w) # Scales location targets for joint training. if self._scale_factors: top *= self._scale_factors[0] bown *= self._scale_factors[0] left *= self._scale_factors[1] right *= self._scale_factors[1] return tf.transpose(tf.stack([top, bown, left, right]))
def bhattacharyya(self): """Approximate bhattacharyya distance between cover and non-cover distances. Similar to Mahalanobis distance, but for distributions with different variances. Assumes normality, hence approximate. Returns: tf.Tensor: bhattacharyya distance between distributions of the cover and non-cover pairs' distances. tf.Tensor: mean cover pair distance tf.Tensor: mean non-cover pair distance """ y_A, y_B = self.subnet_A[-1], self.subnet_B[-1] squared_dists = tf.reduce_sum(tf.square(y_A - y_B), reduction_indices=1, ) cover_pairs = tf.where(tf.equal(self.is_cover, tf.ones_like(self.is_cover))) non_cover_pairs = tf.where(tf.equal(self.is_cover, tf.zeros_like(self.is_cover))) pair_dists = tf.sqrt(tf.gather(squared_dists, tf.reshape(cover_pairs, [-1]))) non_pair_dists = tf.sqrt(tf.gather(squared_dists, tf.reshape(non_cover_pairs, [-1]))) mu_pairs, sigma2_pairs = tf.nn.moments(pair_dists, axes=[0], name='d_pairs') mu_non_pairs, sigma2_non_pairs = tf.nn.moments(non_pair_dists, axes=[0], name='d_non_pairs') bhatt = tf.add( 0.25 * tf.log(0.25 * (sigma2_pairs/sigma2_non_pairs + sigma2_non_pairs/sigma2_pairs + 2)), 0.25 * (mu_pairs - mu_non_pairs)**2 / (sigma2_pairs + sigma2_non_pairs), name='bhatt') return bhatt, mu_pairs, mu_non_pairs
def get_weight_stats(x, axis): """ Compute weight statistics over the given axis. Args: x: tf.Tensor a batch of activations. axis: int axis to perform statistics over. Returns: tf.Tensor a 3-D tensor with statistics. """ if x is None: return [] stats = [] l1 = tf.reduce_mean(tf.abs(x), axis=axis) l2 = tf.sqrt(tf.reduce_mean(x**2, axis=axis) + 1e-6) mean, var = tf.nn.moments(x, [axis]) stats.extend([l1, l2, mean, tf.sqrt(var + 1e-8)]) stats = [tf.reshape(s, [-1, 1, 1]) for s in stats] return stats
def norm(tensor):#normalzie last line return tensor/(tf.sqrt(tf.reduce_sum(tf.square(tensor),-1,keep_dims=True))+1e-12)
def run_test_sample_consistent_mean_covariance( self, sess_run_fn, dist, num_samples=int(1e5), seed=24, rtol=1e-2, atol=0.1, cov_rtol=None, cov_atol=None): """Tests that sample/mean/covariance are consistent with each other. "Consistency" means that `sample`, `mean`, `covariance`, etc all correspond to the same distribution. Args: sess_run_fn: Python `callable` taking `list`-like of `Tensor`s and returning a list of results after running one "step" of TensorFlow computation, typically set to `sess.run`. dist: Distribution instance or object which implements `sample`, `log_prob`, `event_shape_tensor` and `batch_shape_tensor`. num_samples: Python `int` scalar indicating the number of Monte-Carlo samples to draw from `dist`. seed: Python `int` indicating the seed to use when sampling from `dist`. In general it is not recommended to use `None` during a test as this increases the likelihood of spurious test failure. rtol: Python `float`-type indicating the admissible relative error between analytical and sample statistics. atol: Python `float`-type indicating the admissible absolute error between analytical and sample statistics. cov_rtol: Python `float`-type indicating the admissible relative error between analytical and sample covariance. Default: rtol. cov_atol: Python `float`-type indicating the admissible absolute error between analytical and sample covariance. Default: atol. """ x = dist.sample(num_samples, seed=seed) sample_mean = tf.reduce_mean(input_tensor=x, axis=0) sample_covariance = tf.reduce_mean( input_tensor=_vec_outer_square(x - sample_mean), axis=0) sample_variance = tf.linalg.diag_part(sample_covariance) sample_stddev = tf.sqrt(sample_variance) [ sample_mean_, sample_covariance_, sample_variance_, sample_stddev_, mean_, covariance_, variance_, stddev_ ] = sess_run_fn([ sample_mean, sample_covariance, sample_variance, sample_stddev, dist.mean(), dist.covariance(), dist.variance(), dist.stddev(), ]) self.assertAllClose(mean_, sample_mean_, rtol=rtol, atol=atol) self.assertAllClose(covariance_, sample_covariance_, rtol=cov_rtol or rtol, atol=cov_atol or atol) self.assertAllClose(variance_, sample_variance_, rtol=rtol, atol=atol) self.assertAllClose(stddev_, sample_stddev_, rtol=rtol, atol=atol)
def amp(x): return 1 + tf.sqrt(1.e-8 + tf.reduce_sum(x**2, axis=-1, keepdims=True))
def configure(inputs, batch_size, target_outputs, is_training, learning_rate, beta1, is_depthwise_sep, decay, gen_scale): """Operations to calculate network losses and run training operations.""" target_outputs0 = target_outputs with tf.variable_scope("gen"): output0, phase_components = generator( inputs=inputs, num_outputs=target_outputs.get_shape().as_list()[-1], is_training=is_training, is_depthwise_sep=is_depthwise_sep) output = output0 if adversarial: #Theoretical argument for EMA tracking is in https://openreview.net/pdf?id=SJgw_sRqFQ #with tf.variable_scope("tracking/gen"): # tracking_output = generator( # inputs=inputs, # num_outputs=target_outputs.get_shape().as_list()[-1], # is_training=is_training, # is_depthwise_sep=is_depthwise_sep # ) def amp(x): return 1 + tf.sqrt(1.e-8 + tf.reduce_sum(x**2, axis=-1, keepdims=True)) output = tf.concat([inputs, phase_components], axis=-1) target_outputs = tf.concat([inputs, target_outputs], axis=-1) if use_gradient_penalty: x_hat = output + tf.random_uniform( output.get_shape().as_list()) * (target_outputs - output) discr_batch = tf.concat([output, target_outputs, x_hat], axis=0) else: discr_batch = tf.concat([output, target_outputs], axis=0) with tf.variable_scope("main/discr"): preds = large_discriminator(discr_batch) #with tf.variable_scope("tracking/discr"): # track_pred = large_discriminator(output) fake_pred = preds[:batch_size] real_pred = preds[batch_size:2 * batch_size] if use_gradient_penalty: x_hat_pred = preds[2 * batch_size:3 * batch_size] if use_gradient_penalty: grad = tf.gradients(x_hat_pred, [x_hat])[0] grad_norm2 = tf.sqrt( 1.e-6 + tf.reduce_sum(tf.square(grad), axis=[1, 2, 3])) gradient_penalty = tf.reduce_mean((grad_norm2 - 1.)**2) if use_gradient_penalty or standard_wass: discr_loss = tf.reduce_mean(fake_pred - real_pred) gen_loss = -tf.reduce_mean(fake_pred) else: #noise = tf.random_uniform(real_pred.get_shape().as_list(), maxval=0.05) discr_loss = tf.reduce_mean((real_pred - 1)**2 + (fake_pred)**2) gen_loss = tf.reduce_mean((fake_pred - 1)**2) if standard_wass: for v in tf.trainable_variables("main/discr"): tf.add_to_collection( "clip_weights", v.assign(tf.clip_by_value(v, -0.01, 0.01))) #mu = tf.get_variable( # auto_name("avg_loss"), # initializer=tf.constant(0.707, dtype=tf.float32), # trainable=False # ) #mu_op = mu.assign(0.999*mu + 0.001*tf.sqrt(discr_loss)) #tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, mu_op) #mu_scaled = mu/0.707 #discr_lr_scale = tf.cond(mu_scaled > 0.6, lambda: 1., lambda: (mu_scaled/0.6)**2 ) if use_gradient_penalty: discr_loss += 10 * gradient_penalty #discr_loss /= 100 #gen_loss /= 100 if use_l2_loss: gen_l2_loss = tf.add_n( [tf.nn.l2_loss(v) for v in tf.trainable_variables("gen")]) discr_l2_loss = tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables("main/discr") ]) discr_loss += 5.e-5 * discr_l2_loss gen_loss += 5.e-5 * gen_l2_loss #discr_loss = tf.reduce_mean( tf.nn.relu(1-real_pred) + tf.nn.relu(1+fake_pred), axis=-1 ) + 10*gradient_penalty #+ 1.e-5*discr_l2_loss #gen_loss = -tf.reduce_mean( fake_pred, axis=-1 )# + 5.e-5*gen_l2_loss #discr_loss = tf.reduce_mean(fake_pred - real_pred) / 1 + 10*gradient_penalty + 1.e-5*discr_l2_loss #gen_loss = -tf.reduce_mean(fake_pred) / 1 + 1.e-5*gen_l2_loss #Create optimizer for stochastic gradient descent (SGD) discr_optimizer = tf.train.AdamOptimizer(learning_rate=0.00005, beta1=0.5) #discr_optimizer = tf.train.RMSPropOptimizer(learning_rate=0.00005, decay=0.5) #l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()]) #total_loss = gen_loss + discr_loss + 10*gradient_penalty + 5.e-5*l2_loss ##Tracking #for v, t in zip(tf.trainable_variables("main"), tf.trainable_variables("tracking")): # tf.add_to_collection( tf.GraphKeys.UPDATE_OPS, t.assign(decay*t+(1-decay)*v) ) else: #Mean squared errors mse = 10 * tf.reduce_mean(tf.square(output - target_outputs), axis=[1, 2, 3]) alrc_mse = mse #alrc(mse) alrc_mse = tf.reduce_mean(alrc_mse) mse = tf.reduce_mean(mse) ##L2 regularization l2_loss = tf.add_n( [tf.nn.l2_loss(v) for v in tf.trainable_variables()]) gen_loss = alrc_mse + 5.e-5 * l2_loss #Create optimizer for stochastic gradient descent (SGD) gen_optimizer = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.5) #gen_optimizer = tf.train.RMSPropOptimizer(learning_rate=0.0001, decay=0.5) #( # learning_rate=learning_rate, # beta1=beta1, # beta2=0.9 # ) #Update ops for batch normalisation update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): if adversarial: #train_op = gen_optimizer.minimize(total_loss) gen_train_op = gen_optimizer.minimize( gen_loss, var_list=tf.trainable_variables("gen")) discr_train_op = discr_optimizer.minimize( discr_loss, var_list=tf.trainable_variables("main/discr")) train_op = [gen_train_op, discr_train_op] else: train_op = gen_optimizer.minimize(gen_loss) output_loss = { "Loss": tf.reduce_mean(tf.abs(phase_components - target_outputs0)), "pred_real": tf.reduce_mean(real_pred), "pred_fake": tf.reduce_mean(fake_pred) } return train_op, output_loss, output0
def generator(inputs, num_outputs, is_training, is_depthwise_sep=False): """Convolutional neural network (CNN) for image supersampling. Args: Inputs: Images tensor with shape [batch_size, heigh, width, channels]. num_outputs: Number of channels in network output. is_training: Bool indicating whether to use training operations Returns: Super-sampled images """ base_size = 32 x = inputs x = tf.contrib.layers.batch_norm(x, is_training=is_training) x = conv(x, num_outputs=32, is_training=is_training) #Encoder for i in range(1, 4): x = conv(x, num_outputs=base_size * 2**i, stride=2, is_depthwise_sep=is_depthwise_sep, is_training=is_training, actv_fn=std_actv) if i == 2: low_level = x #Residual blocks for _ in range(6): #Number of blocks x = residual_block(x, skip=3, is_training=is_training) #Decoder for i in range(2, -1, -1): x = conv(x, num_outputs=base_size * 2**i, stride=2, is_depthwise_sep=is_depthwise_sep, is_training=is_training, transpose=True, actv_fn=std_actv) #if x.get_shape().as_list() == low_level.get_shape().as_list(): #Easy way to find concat level! # x = tf.concat([x, low_level], axis=-1) # for _ in range(3): # x = conv( # x, # num_outputs=base_size*2**i, # is_depthwise_sep=is_depthwise_sep, # is_training=is_training, # ) x = conv( x, num_outputs=32, is_depthwise_sep=is_depthwise_sep, is_training=is_training, ) #Project features onto output image x = conv(x, num_outputs=num_outputs, biases_initializer=None, actv_fn=None, is_batch_norm=True, is_training=is_training) x /= tf.sqrt(1.e-8 + tf.reduce_sum(x**2, axis=-1, keepdims=True)) x0 = x x *= inputs return x, x0
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """See base class.""" assignments = [] for (grad, param) in grads_and_vars: if grad is None or param is None: continue param_name = self._get_variable_name(param.name) m = tf.get_variable(name=param_name + "/lamb_m", shape=param.shape.as_list(), dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer()) v = tf.get_variable(name=param_name + "/lamb_v", shape=param.shape.as_list(), dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer()) # Standard Adam update. next_m = (tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad)) next_v = (tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2, tf.square(grad))) update = next_m / (tf.sqrt(next_v) + self.epsilon) # Just adding the square of the weights to the loss function is *not* # the correct way of using L2 regularization/weight decay with Adam, # since that will interact with the m and v parameters in strange ways. # # Instead we want ot decay the weights in a manner that doesn't interact # with the m/v parameters. This is equivalent to adding the square # of the weights to the loss with plain (non-momentum) SGD. if self._do_use_weight_decay(param_name): update += self.weight_decay_rate * param ############## BELOW ARE THE SPECIFIC PARTS FOR LAMB ############## # Note: Here are two choices for scaling function \phi(z) # minmax: \phi(z) = min(max(z, \gamma_l), \gamma_u) # identity: \phi(z) = z # The authors does not mention what is \gamma_l and \gamma_u # UPDATE: after asking authors, they provide me the code below. # ratio = array_ops.where(math_ops.greater(w_norm, 0), array_ops.where( # math_ops.greater(g_norm, 0), (w_norm / g_norm), 1.0), 1.0) r1 = tf.sqrt(tf.reduce_sum(tf.square(param))) r2 = tf.sqrt(tf.reduce_sum(tf.square(update))) r = tf.where(tf.greater(r1, 0.0), tf.where(tf.greater(r2, 0.0), r1 / r2, 1.0), 1.0) eta = self.learning_rate * r update_with_lr = eta * update next_param = param - update_with_lr assignments.extend( [param.assign(next_param), m.assign(next_m), v.assign(next_v)]) return tf.group(*assignments, name=name)
def build_roi_align_graph(rois, feature_maps, image_meta, crop_size, config): """ Implement roi align. (support different sample ratio.) Args: rois: tensor with shape [batch_size, num_rois, 4] feature_maps: list of feature_map, each one is a tensor with shape [batch_size, H, W, 256] image_meta: tensor with shape [batch_size, 12] crop_size: output size after roi align. config: Returns: list of pooled tensor, each one with shape [batch_size, crop_H, crop_W, 256] """ # Assign each ROI to a level in the pyramid based on the ROI area. y1, x1, y2, x2 = tf.split(rois, 4, axis=2) h = y2 - y1 w = x2 - x1 # Use shape of first image. Images in a batch must have the same size. image_shape = tensor_utils.parse_image_meta(image_meta)['image_shape'][0] # Equation 1 in the Feature Pyramid Networks paper. Account for # the fact that our coordinates are normalized here. # e.g. a 224x224 ROI (in pixels) maps to P4 image_area = tf.cast(image_shape[0] * image_shape[1], tf.float32) roi_level = tensor_utils.log2( tf.sqrt(h * w) / (224.0 / tf.sqrt(image_area))) roi_level = tf.minimum( 5, tf.maximum(2, 4 + tf.cast(tf.round(roi_level), tf.int32))) roi_level = tf.squeeze(roi_level, 2) # Loop through levels and apply ROI pooling to each. P2 to P5. pooled = [] box_to_level = [] for i, level in enumerate(range(2, 6)): ix = tf.where(tf.equal(roi_level, level)) level_boxes = tf.gather_nd(rois, ix) # Box indices for crop_and_resize. box_indices = tf.cast(ix[:, 0], tf.int32) # Keep track of which box is mapped to which level box_to_level.append(ix) # Stop gradient propogation to ROI proposals level_boxes = tf.stop_gradient(level_boxes) box_indices = tf.stop_gradient(box_indices) pooled.append( roi_align(feature_maps[i], level_boxes, box_indices=box_indices, output_size=crop_size, sample_ratio=config.sample_ratio)) # use 2 # Pack pooled features into one tensor pooled = tf.concat(pooled, axis=0) # Pack box_to_level mapping into one array and add another # column representing the order of pooled boxes box_to_level = tf.concat(box_to_level, axis=0) box_range = tf.expand_dims(tf.range(tf.shape(box_to_level)[0]), 1) box_to_level = tf.concat([tf.cast(box_to_level, tf.int32), box_range], axis=1) # Rearrange pooled features to match the order of the original boxes # Sort box_to_level by batch then box index # TF doesn't have a way to sort by two columns, so merge them and sort. sorting_tensor = box_to_level[:, 0] * 100000 + box_to_level[:, 1] ix = tf.nn.top_k(sorting_tensor, k=tf.shape(box_to_level)[0]).indices[::-1] ix = tf.gather(box_to_level[:, 2], ix) pooled = tf.gather(pooled, ix) # Re-add the batch dimension shape = tf.concat([tf.shape(rois)[:2], tf.shape(pooled)[1:]], axis=0) pooled = tf.reshape(pooled, shape) return pooled
# x, y의 데이터 값 data = [[2, 81], [4, 93], [6, 91], [8, 97]] x_data = [x_row[0] for x_row in data] y_data = [y_row[1] for y_row in data] # 기울기 a와 y 절편 b의 값을 임의로 정한다. # 단, 기울기의 범위는 0 ~ 10 사이이며 y 절편은 0 ~ 100 사이에서 변하게 한다. a = tf.Variable(tf.random_uniform([1], 0, 10, dtype=tf.float64, seed=0)) b = tf.Variable(tf.random_uniform([1], 0, 100, dtype=tf.float64, seed=0)) # y에 대한 일차 방정식 ax+b의 식을 세운다. y = a * x_data + b # 텐서플로 RMSE 함수 rmse = tf.sqrt(tf.reduce_mean(tf.square(y - y_data))) # 학습률 값 learning_rate = 0.1 # RMSE 값을 최소로 하는 값 찾기 gradient_decent = tf.train.GradientDescentOptimizer(learning_rate).minimize( rmse) # 텐서플로를 이용한 학습 with tf.Session() as sess: # 변수 초기화 sess.run(tf.global_variables_initializer()) # 2001번 실행(0번 째를 포함하므로) for step in range(2001): sess.run(gradient_decent)
# time we evaluate the loss. # Explanation of the meaning of NCE loss: # http://mccormickml.com/2016/04/19/word2vec-tutorial-the-skip-gram-model/ loss = tf.reduce_mean( tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, labels=train_labels, inputs=embed, num_sampled=num_sampled, num_classes=vocabulary_size)) # Construct the SGD optimizer using a learning rate of 1.0. optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss) # Compute the cosine similarity between minibatch examples and all embeddings. norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) normalized_embeddings = embeddings / norm valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset) similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True) # Add variable initializer. init = tf.global_variables_initializer() # Step 5: Begin training. num_steps = 500000 with tf.Session(graph=graph) as session: # We must initialize all variables before we use them.
def eval_once(saver, summary_writer, cifar_top_k_op, mnist_top_k_op, summary_op,itercount): """Run Eval once. Args: saver: Saver. summary_writer: Summary writer. top_k_op: Top K op. summary_op: Summary op. """ with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/cifar10_train/model.ckpt-0, # extract global_step from it. global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] ckpt_path_and_name = ckpt.model_checkpoint_path.split('-')[0] else: print('No checkpoint file found') return # Start the queue runners. coord = tf.train.Coordinator() try: threads = [] for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): threads.extend(qr.create_threads(sess, coord=coord, daemon=True, start=True)) NoiseTest = True if NoiseTest: # saver is put here to always modify the same net, with different amount of noise! # counter is increased to be able to plot the graph! saver.save(sess,ckpt_path_and_name,global_step=int(global_step)+1) ## adding noise print('Adding noise to the loaded net..') train_vars = tf.trainable_variables() shared_vars = [var for var in train_vars if 'shared_' in var.name] cifar_vars = [var for var in train_vars if 'cifar_' in var.name] mnist_vars = [var for var in train_vars if 'mnist_' in var.name] for v in shared_vars: #print(v) v1 = sess.graph.get_tensor_by_name(v.name) v_shape = tf.shape(v1) l = len(v_shape.eval()) mean, variance = tf.nn.moments(v1,list(range(l))) #mean, variance = tf.nn.moments(v1,[0]) #print(v.name) #print('mean : ', mean.eval()) #print('vari : ', variance.eval()) # sqrt(variance) noise = tf.random_normal(shape=tf.shape(v1), mean=0.0, stddev=tf.sqrt(variance)*0.01*itercount, dtype=tf.float32) #noise = tf.random_normal(shape=tf.shape(v1), mean=0.0, stddev=0.01, dtype=tf.float32) sess.run(tf.assign(v1,v1+noise)) # saving noisy one #print("###########") #print(ckpt.model_checkpoint_path) #print(ckpt_path_and_name) #saver.save(sess,ckpt_path_and_name,global_step=int(global_step)+10) num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size)) cifar_true_count = 0 # Counts the number of correct predictions. cifar_total_sample_count = num_iter * FLAGS.batch_size step = 0 while step < num_iter and not coord.should_stop(): cifar_predictions = sess.run([cifar_top_k_op]) cifar_true_count += np.sum(cifar_predictions) step += 1 # Compute precision @ 1. cifar_precision = cifar_true_count / cifar_total_sample_count print('%s: CIFAR precision @ %d = %.3f' % (datetime.now(),int(global_step), cifar_precision)) mnist_true_count = 0 # Counts the number of correct predictions. mnist_total_sample_count = num_iter * FLAGS.batch_size step = 0 while step < num_iter and not coord.should_stop(): mnist_predictions = sess.run([mnist_top_k_op]) mnist_true_count += np.sum(mnist_predictions) step += 1 # Compute precision @ 1. mnist_precision = mnist_true_count / mnist_total_sample_count print('%s: MNIST precision @ %d = %.3f' % (datetime.now(),int(global_step), mnist_precision)) summary = tf.Summary() summary.ParseFromString(sess.run(summary_op)) summary.value.add(tag='CIFAR Precision @ 1', simple_value=cifar_precision) summary.value.add(tag='MNIST Precision @ 1', simple_value=mnist_precision) summary_writer.add_summary(summary, global_step) resultsFile.write(str(global_step)+";"+str(cifar_precision)+";"+str(mnist_precision)+"\n") except Exception as e: # pylint: disable=broad-except coord.request_stop(e) coord.request_stop() coord.join(threads, stop_grace_period_secs=10) return global_step, NoiseTest
def body1(self, num, object_num, loss, predict, labels, nilboy): """ calculate loss Args: predict: 3-D tensor [cell_size, cell_size, 5 * boxes_per_cell] labels : [max_objects, 5] (x_center, y_center, w, h, class) """ label = labels[num:num + 1, :] label = tf.reshape(label, [-1]) #calculate objects tensor [CELL_SIZE, CELL_SIZE] min_x = (label[0] - label[2] / 2) / (self.image_size / self.cell_size) max_x = (label[0] + label[2] / 2) / (self.image_size / self.cell_size) min_y = (label[1] - label[3] / 2) / (self.image_size / self.cell_size) max_y = (label[1] + label[3] / 2) / (self.image_size / self.cell_size) min_x = tf.floor(min_x) min_y = tf.floor(min_y) max_x = tf.ceil(max_x) max_y = tf.ceil(max_y) temp = tf.cast(tf.pack([max_y - min_y, max_x - min_x]), dtype=tf.int32) objects = tf.ones(temp, tf.float32) temp = tf.cast( tf.pack( [min_y, self.cell_size - max_y, min_x, self.cell_size - max_x]), tf.int32) temp = tf.reshape(temp, (2, 2)) objects = tf.pad(objects, temp, "CONSTANT") #calculate objects tensor [CELL_SIZE, CELL_SIZE] #calculate responsible tensor [CELL_SIZE, CELL_SIZE] center_x = label[0] / (self.image_size / self.cell_size) center_x = tf.floor(center_x) center_y = label[1] / (self.image_size / self.cell_size) center_y = tf.floor(center_y) response = tf.ones([1, 1], tf.float32) temp = tf.cast( tf.pack([ center_y, self.cell_size - center_y - 1, center_x, self.cell_size - center_x - 1 ]), tf.int32) temp = tf.reshape(temp, (2, 2)) response = tf.pad(response, temp, "CONSTANT") #objects = response #calculate iou_predict_truth [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] predict_boxes = predict[:, :, self.num_classes + self.boxes_per_cell:] predict_boxes = tf.reshape( predict_boxes, [self.cell_size, self.cell_size, self.boxes_per_cell, 4]) predict_boxes = predict_boxes * [ self.image_size / self.cell_size, self.image_size / self.cell_size, self.image_size, self.image_size ] base_boxes = np.zeros([self.cell_size, self.cell_size, 4]) for y in range(self.cell_size): for x in range(self.cell_size): #nilboy base_boxes[y, x, :] = [ self.image_size / self.cell_size * x, self.image_size / self.cell_size * y, 0, 0 ] base_boxes = np.tile( np.resize(base_boxes, [self.cell_size, self.cell_size, 1, 4]), [1, 1, self.boxes_per_cell, 1]) predict_boxes = base_boxes + predict_boxes iou_predict_truth = self.iou(predict_boxes, label[0:4]) #calculate C [cell_size, cell_size, boxes_per_cell] C = iou_predict_truth * tf.reshape(response, [self.cell_size, self.cell_size, 1]) #calculate I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] I = iou_predict_truth * tf.reshape(response, (self.cell_size, self.cell_size, 1)) max_I = tf.reduce_max(I, 2, keep_dims=True) I = tf.cast((I >= max_I), tf.float32) * tf.reshape( response, (self.cell_size, self.cell_size, 1)) #calculate no_I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] no_I = tf.ones_like(I, dtype=tf.float32) - I p_C = predict[:, :, self.num_classes:self.num_classes + self.boxes_per_cell] #calculate truth x,y,sqrt_w,sqrt_h 0-D x = label[0] y = label[1] sqrt_w = tf.sqrt(tf.abs(label[2])) sqrt_h = tf.sqrt(tf.abs(label[3])) #sqrt_w = tf.abs(label[2]) #sqrt_h = tf.abs(label[3]) #calculate predict p_x, p_y, p_sqrt_w, p_sqrt_h 3-D [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] p_x = predict_boxes[:, :, :, 0] p_y = predict_boxes[:, :, :, 1] #p_sqrt_w = tf.sqrt(tf.abs(predict_boxes[:, :, :, 2])) * ((tf.cast(predict_boxes[:, :, :, 2] > 0, tf.float32) * 2) - 1) #p_sqrt_h = tf.sqrt(tf.abs(predict_boxes[:, :, :, 3])) * ((tf.cast(predict_boxes[:, :, :, 3] > 0, tf.float32) * 2) - 1) #p_sqrt_w = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 2])) #p_sqrt_h = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 3])) #p_sqrt_w = predict_boxes[:, :, :, 2] #p_sqrt_h = predict_boxes[:, :, :, 3] p_sqrt_w = tf.sqrt( tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 2]))) p_sqrt_h = tf.sqrt( tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 3]))) #calculate truth p 1-D tensor [NUM_CLASSES] P = tf.one_hot(tf.cast(label[4], tf.int32), self.num_classes, dtype=tf.float32) #calculate predict p_P 3-D tensor [CELL_SIZE, CELL_SIZE, NUM_CLASSES] p_P = predict[:, :, 0:self.num_classes] #class_loss class_loss = tf.nn.l2_loss( tf.reshape(objects, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale #class_loss = tf.nn.l2_loss(tf.reshape(response, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale #object_loss object_loss = tf.nn.l2_loss(I * (p_C - C)) * self.object_scale #object_loss = tf.nn.l2_loss(I * (p_C - (C + 1.0)/2.0)) * self.object_scale #noobject_loss #noobject_loss = tf.nn.l2_loss(no_I * (p_C - C)) * self.noobject_scale noobject_loss = tf.nn.l2_loss(no_I * (p_C)) * self.noobject_scale #coord_loss coord_loss = (tf.nn.l2_loss(I * (p_x - x) / (self.image_size / self.cell_size)) + tf.nn.l2_loss(I * (p_y - y) / (self.image_size / self.cell_size)) + tf.nn.l2_loss(I * (p_sqrt_w - sqrt_w)) / self.image_size + tf.nn.l2_loss(I * (p_sqrt_h - sqrt_h)) / self.image_size) * self.coord_scale nilboy = I return num + 1, object_num, [ loss[0] + class_loss, loss[1] + object_loss, loss[2] + noobject_loss, loss[3] + coord_loss ], predict, labels, nilboy
def _f(values): return tf.sqrt(tf.abs(values)) * tf.sign(values)
def train_model(graph, var_dict, train_data, max_epoch, hyper_param, output_dir, test_data=None, ex_printer=None, session=None): """ train a model with provided data """ learning_rate = hyper_param["learning_rate"] batch_size = hyper_param["batch_size"] log_file = os.path.join(output_dir, "train.log") if output_dir is not None else None with graph.as_default(): # the saver to keep model saver = tf.train.Saver() last_best_accuracy = 0. # place holders for the model train_inputs = graph.get_tensor_by_name(var_dict["train_inputs"]) train_outputs = graph.get_tensor_by_name(var_dict["train_outputs"]) seq2seq_feed_previous = graph.get_tensor_by_name(var_dict["seq2seq_feed_previous"]) input_mask = graph.get_tensor_by_name(var_dict["train_input_mask"]) output_mask = graph.get_tensor_by_name(var_dict["train_output_mask"]) type_masks = graph.get_tensor_by_name(var_dict["type_masks"]) # operaters needed for testing token_accuracy = graph.get_tensor_by_name(var_dict["token_accuracy"]) sentence_accuracy = graph.get_tensor_by_name(var_dict["sentence_accuracy"]) total_loss = graph.get_tensor_by_name(var_dict["total_loss"]) global_step = tf.placeholder(tf.int32, name="global_step") optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate) # gradient processing grad_clip_norm = hyper_param["gradient_clip_norm"] if "gradient_clip_norm" in hyper_param else None grad_noise = hyper_param["gradient_noise"] if "gradient_noise" in hyper_param else None grad_noise_gamma = hyper_param["gradient_noise_gamma"] if "gradient_noise_gamma" in hyper_param else None grads_and_vars = optimizer.compute_gradients(total_loss) (grads, variables) = zip(*grads_and_vars) if grad_clip_norm: print("clipping norm: {}".format(grad_clip_norm)) capped_grads, _ = tf.clip_by_global_norm(grads, grad_clip_norm) grads_and_vars = zip(capped_grads, variables) if grad_noise: if grad_noise_gamma: grad_noise /= tf.pow(1.0 + tf.to_float(global_step), grad_noise_gamma) grads_tmp = [] for g in grads: if g is not None: noisy_grad = g + tf.sqrt(grad_noise)*tf.random_normal(tf.shape(g)) grads_tmp.append(noisy_grad) else: grads_tmp.append(g) print("noise added") grads_and_vars = zip(grads_tmp, variables) train_step = optimizer.apply_gradients(grads_and_vars) session = tf.Session() if session is None else session with session.as_default(): if log_file is not None: # initialize the logfile output_log = open(log_file, "w") log_header = "tr_tok_acc, test_tok_acc, tr_sen_acc, test_sen_acc, tr_loss, test_loss" if test_data is None: log_header = "tr_tok_acc, tr_sen_acc, tr_loss" output_log.write(log_header) output_log.write("\n") session.run(tf.global_variables_initializer()) nbatches = int(np.ceil(len(train_data.Xs) / float(batch_size))) for n in range(max_epoch): print("================ epoch %d ==================" % n) print("PROGRESS: 00.00%") tr_token_accuracies = [] tr_sentence_accuracies = [] tr_losses = [] for i in range(nbatches): left = i * batch_size right = min((i + 1) * batch_size, len(train_data.Xs)) Xt = train_data.Xs[left : right] Yt = train_data.Ys[left : right] XMasks = train_data.XMasks[left : right] YMasks = train_data.YMasks[left : right] ty_masks = train_data.type_masks[:,left : right,:] #### HERE feed prvious set to true to make it work ##### training_result = session.run([ token_accuracy, sentence_accuracy, total_loss, train_step ], feed_dict={ train_inputs : Xt, train_outputs : Yt, input_mask: XMasks, output_mask : YMasks, type_masks: ty_masks, seq2seq_feed_previous : False, global_step: n}) tr_token_accuracies.append(training_result[0]) tr_sentence_accuracies.append(training_result[1]) tr_losses.append(training_result[2]) print("training_loss = {:.5f}".format(np.mean(tr_losses))) print("train_token_accuracy = {:.5f}".format(np.mean(tr_token_accuracies))) print("train_sentence_accuracy = {:.5f}".format(np.mean(tr_sentence_accuracies))) if test_data is not None: test_result = test_model(graph, var_dict, session, test_data, batch_size, ex_printer=ex_printer if (n % 30 == 0 or n == max_epoch - 1) else None) test_token_accuracy = test_result[0] test_sentence_accuracy = test_result[1] test_loss = test_result[2] log_str = "{}, {}, {}, {}, {}, {}".format( np.mean(tr_token_accuracies), test_token_accuracy, np.mean(tr_sentence_accuracies), test_sentence_accuracy, np.mean(tr_losses), test_loss) else: log_str = "{}, {}, {}".format(np.mean(tr_token_accuracies), np.mean(tr_sentence_accuracies), np.mean(tr_losses)) if output_dir: if test_data is not None: current_seq_acc = test_sentence_accuracy else: current_seq_acc = np.mean(tr_sentence_accuracies) if current_seq_acc > last_best_accuracy: last_best_accuracy = current_seq_acc # add global step so that we can keep multiple models around saver.save(session, os.path.join(output_dir, "table_nl_prog"), global_step=n) if log_file is not None: # write corresponding data to log output_log.write(log_str) output_log.write("\n") if np.mean(tr_losses) < 0.05: break return session
def call(self, inputs, training=None, mask=None): query, key, value = self._unpack(inputs) query_mask, key_mask, _ = self._unpack(mask) batch_size = tf.shape(query)[0] dimension_query = query.get_shape().as_list()[-1] seq_len = tf.shape(query)[-2] key_len = tf.shape(key)[-2] feature_dim = tf.shape(value)[-1] query = tf.matmul( query, tf.tile(tf.expand_dims(self.kernel_query, 0), [batch_size, 1, 1])) key = tf.matmul( key, tf.tile(tf.expand_dims(self.kernel_key, 0), [batch_size, 1, 1])) value = tf.matmul( value, tf.tile(tf.expand_dims(self.kernel_value, 0), [batch_size, 1, 1])) if self.use_bias: query += self.b_query key += self.b_key value += self.b_value def _reshape_multihead(origin_input): """ reshape for multi head Input shape: (Batch size, steps, features) Output shape: (Batch size * head num, steps, features // head num) """ return tf.concat(tf.split(origin_input, self.head_num, axis=2), axis=0) def _reshape_mask(mask): """ repeat mask for multi head Input shape: (Batch size, steps) Output shape: (Batch size * head num, steps) """ if mask is None: return None seq_len = tf.shape(mask)[1] mask = tf.expand_dims(mask, axis=1) mask = tf.tile(mask, [1, self.head_num, 1]) return tf.reshape(mask, shape=(-1, seq_len)) query_ = _reshape_multihead(query) key_ = _reshape_multihead(key) value_ = _reshape_multihead(value) key_mask = _reshape_mask(key_mask) # (Batch size * head num, query steps, key steps) similaritys = tf.matmul(query_, tf.transpose(key_, [0, 2, 1])) # scale similaritys /= tf.sqrt(tf.cast(dimension_query, tf.float32)) if self.sequence_mask: ones = tf.ones((seq_len, key_len)) similaritys -= (ones - tf.matrix_band_part(ones, -1, 0)) * 1e9 if key_mask is not None: similaritys -= (1.0 - tf.cast(tf.expand_dims(key_mask, axis=-2), tf.float32)) * 1e9 attention_weights = tf.keras.activations.softmax(similaritys) attention_outputs = tf.matmul(attention_weights, value_) attention_outputs = tf.reshape( attention_outputs, (-1, self.head_num, seq_len, feature_dim // self.head_num)) attention_outputs = tf.transpose(attention_outputs, [0, 2, 1, 3]) attention_outputs = tf.reshape(attention_outputs, (-1, seq_len, feature_dim)) attention_outputs = tf.matmul( attention_outputs, tf.tile(tf.expand_dims(self.kernel_project, 0), [batch_size, 1, 1])) if self.use_bias: attention_outputs += self.b_project if self.activation is not None: attention_outputs = self.activation(attention_outputs) if query_mask is not None: attention_outputs *= tf.cast(tf.expand_dims(query_mask, axis=-1), tf.float32) return attention_outputs
def __init__(self, env_action_space, env_observation_space, planning_horizon=50, max_iterations=5, population_size=500, num_elite=50, num_agents=5, alpha_cov=tf.constant(2.0, dtype=tf.float32), h_sigma=tf.constant(1.0, dtype=tf.float32)): """ This class defines a Covariance Matrix Adaptation Evolutionary-Strategy. (https://arxiv.org/pdf/1604.00772.pdf) Note: this optimzer is not optimized for more than one agent Parameters --------- env_action_space: gym.ActionSpace Defines the action space of the gym environment. env_observation_space: gym.ObservationSpace Defines the observation space of the gym environment. planning_horizon: Int Defines the planning horizon for the optimizer (how many steps to lookahead and optimize for). max_iterations: tf.int32 Defines the maximimum iterations for the CMAES optimizer to refine its guess for the optimal solution. population_size: tf.int32 Defines the population size of the particles evaluated at each iteration. num_elite: tf.int32 Defines the number of elites kept for the next iteration from the population. num_agents: tf.int32 Defines the number of runner running in parallel alpha_cov: tf.float32 Defines the alpha covariance to be used. h_sigma: tf.float32 Defines the h sigma to be used. """ super(CMAESOptimizer, self).__init__(name=None, planning_horizon=planning_horizon, max_iterations=max_iterations, num_agents=num_agents, env_action_space=env_action_space, env_observation_space= env_observation_space) self._solution_dim = [self._num_agents, self._planning_horizon, self._dim_U] self._population_size = population_size self._num_elite = num_elite previous_solution_values = tf.constant(np.tile((self._action_lower_bound + self._action_upper_bound) / 2, [self._planning_horizon * self._num_agents, 1]), dtype=tf.float32) previous_solution_values = tf.reshape(previous_solution_values, [-1]) solution_variance_values = tf.constant(np.tile(np.square(self._action_lower_bound - self._action_upper_bound) / 16, [self._planning_horizon * self._num_agents, 1]), dtype=tf.float32) solution_variance_values = tf.reshape(solution_variance_values, [-1]) # Recombination weights self._weights = tf.concat([ tf.math.log(tf.cast(self._num_elite, dtype=tf.float32) + 0.5) - tf.math.log(tf.range(1, tf.cast(self._num_elite, dtype=tf.float32) + 1)), tf.zeros(shape=(self._population_size - self._num_elite,), dtype=tf.float32), ], axis=0) # Normalize weights such as they sum to one and reshape into a column matrix self._weights = (self._weights / tf.reduce_sum(self._weights))[:, tf.newaxis] self._mu_eff = tf.reduce_sum(self._weights) ** 2 / \ tf.reduce_sum(self._weights ** 2) self._solution_size = tf.reduce_prod(self._solution_dim) #step_size_control self._c_sigma = (self._mu_eff + 2) / (tf.cast(self._solution_size, dtype=tf.float32) + self._mu_eff + 5) self._d_sigma = 1 + 2 * tf.maximum(0, tf.sqrt((self._mu_eff - 1) / (tf.cast(self._solution_size, dtype=tf.float32) + 1)) - 1) \ + self._c_sigma #Covariance Matrix Adaptation self._cc = (4 + self._mu_eff / tf.cast(self._solution_size, dtype=tf.float32)) / \ (tf.cast(self._solution_size, dtype=tf.float32) + 4 + 2 * self._mu_eff / tf.cast(self._solution_size, dtype=tf.float32)) self._alpha_cov = alpha_cov self._h_sigma = h_sigma self._c1 = self._alpha_cov / ((tf.cast(self._solution_size, dtype=tf.float32) + 1.3) ** 2 + self._mu_eff) c_mu_option_two = self._alpha_cov * (self._mu_eff - 2 + 1 / self._mu_eff) / \ ((tf.cast(self._solution_size, dtype=tf.float32) + 2) ** 2 + self._alpha_cov * self._mu_eff / 2) self._c_mu = tf.minimum(1 - self._c1, c_mu_option_two) #define trainable parameters # Mean self._m = tf.Variable(previous_solution_values) # Step-size self._sigma = tf.Variable(tf.math.sqrt(solution_variance_values)) # Covariance matrix self._C = tf.Variable(tf.eye(num_rows=tf.cast(self._solution_size, dtype=tf.float32), dtype=tf.float32)) # Evolution path for σ self._p_sigma = tf.Variable(tf.zeros((tf.cast(self._solution_size, dtype=tf.float32),), dtype=tf.float32)) # Evolution path for C self._p_C = tf.Variable(tf.zeros((tf.cast(self._solution_size, dtype=tf.float32),), dtype=tf.float32)) # Coordinate system (normalized eigenvectors) self._B = tf.Variable(tf.eye(num_rows=tf.cast(self._solution_size, dtype=tf.float32), dtype=tf.float32)) # Scaling (square root of eigenvalues) self._D = tf.Variable(tf.eye(num_rows=tf.cast(self._solution_size, dtype=tf.float32), dtype=tf.float32)) self._expectation_of_normal = tf.sqrt(tf.cast(self._solution_size, dtype=tf.float32) * (1 - 1 / (4 * tf.cast( self._solution_size, dtype=tf.float32)) + 1 / (21 * tf.cast( self._solution_size, dtype=tf.float32) ** 2))) return
def xavier_init(size): # 初始化参数时使用的xavier_init函数 in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) # 初始化标准差 return tf.random_normal(shape=size, stddev=xavier_stddev) # 返回初始化的结果
def _renorm_correction_and_moments( renorm_params, mean, variance, training, ): """Returns the correction and update values for renorm.""" stddev = tf.sqrt(variance + renorm_params.epsilon) # Compute the average mean and standard deviation, as if they were # initialized with this batch's moments. mixed_renorm_mean = (renorm_params.renorm_mean + (1. - renorm_params.renorm_mean_weight) * mean) mixed_renorm_stddev = (renorm_params.renorm_stddev + (1. - renorm_params.renorm_stddev_weight) * stddev) # Compute the corrections for batch renorm. r = stddev / mixed_renorm_stddev d = (mean - mixed_renorm_mean) / mixed_renorm_stddev # Ensure the corrections use pre-update moving averages. with ops.control_dependencies([r, d]): mean = array_ops.identity(mean) stddev = array_ops.identity(stddev) rmin, rmax, dmax = [ renorm_params.renorm_clipping.get(key) for key in ['rmin', 'rmax', 'dmax'] ] if rmin is not None: r = tf.maximum(r, rmin) if rmax is not None: r = tf.minimum(r, rmax) if dmax is not None: d = tf.maximum(d, -dmax) d = tf.minimum(d, dmax) # When not training, use r=1, d=0. r = utils.smart_cond(training, lambda: r, lambda: array_ops.ones_like(r)) d = utils.smart_cond(training, lambda: d, lambda: array_ops.zeros_like(d)) def _update_renorm_variable(var, weight, value): """Updates a moving average and weight, returns the unbiased value.""" value = array_ops.identity(value) def _do_update(): # Update the variables without zero debiasing. The debiasing will be # accomplished by dividing the exponential moving average by the weight. # For example, after a single update, the moving average would be # (1-decay) * value. and the weight will be 1-decay, with their ratio # giving the value. # Make sure the weight is not updated until before r and d computation. with ops.control_dependencies([value]): weight_value = array_ops.constant(1., dtype=weight.dtype) new_var = moving_averages.assign_moving_average( var, value, renorm_params.renorm_momentum, zero_debias=False) new_weight = moving_averages.assign_moving_average( weight, weight_value, renorm_params.renorm_momentum, zero_debias=False) return new_var / new_weight def _fake_update(): return array_ops.identity(var) return utils.smart_cond(training, _do_update, _fake_update) with ops.colocate_with(renorm_params.moving_mean): new_mean = _update_renorm_variable(renorm_params.renorm_mean, renorm_params.renorm_mean_weight, mean) with ops.colocate_with(renorm_params.moving_variance): new_stddev = _update_renorm_variable( renorm_params.renorm_stddev, renorm_params.renorm_stddev_weight, stddev) # Make sqrt(moving_variance + epsilon) = new_stddev. new_variance = tf.square(new_stddev) - renorm_params.epsilon return (r, d, new_mean, new_variance)
myEstimator.fit(nIter=nIter) xi = myEstimator.x.numpy() plt.figure() img = plt.imshow(xi) img.set_clim(0.0, 1.0) plt.savefig('results/LLS.png') A = aiaiTools.layers.linear.NumpyLinearOperator(FP, BP, x_shape, y_shape) ATA = aiaiTools.layers.meta.LinearSequence(A, A.transpose()) ATA_fourier = aiaiTools.layers.linear.FourierApproximation2D(ATA, x) ATA_inv = aiaiTools.layers.linear.FourierOperator2D( ATA_fourier.IFT(1 / ATA_fourier.H)) appodization = aiaiTools.layers.linear.GaussianBlur2D(0.5, x_shape) M = aiaiTools.layers.linear.FourierOperator2D( ATA_fourier.IFT(1 / tf.sqrt(ATA_fourier.H) * appodization.H)) # M = aiaiTools.layers.linear.FourierOperator2D(ATA_fourier.IFT(1/tf.sqrt(ATA_fourier.H))) myForwardModel = aiaiTools.layers.meta.LinearSequence(M, A) myEstimator = aiaiTools.models.optimization.LeastSquares(myForwardModel, y, x_shape, dynamic=True) myEstimator.compile(learning_rate=learning_rate) myEstimator.fit(nIter=nIter) xi = M(myEstimator.x.numpy()) plt.figure() img = plt.imshow(xi) img.set_clim(0.0, 1.0) plt.savefig('results/PLLS.png')
def build_graph(self, x1, y_pred_, learning_rate, units, hold_prob): if not self.rated is None: norm_val = tf.constant(1, tf.float32, name='rated') else: norm_val = y_pred_ with tf.name_scope("build_lstm") as scope: if self.trial == 0: lstm_1 = tf.keras.layers.LSTM(units[0], name='lstm1', return_sequences=True, activation=tf.nn.elu) full_out_dropout = tf.nn.dropout(lstm_1(x1), rate=1 - hold_prob) shape = full_out_dropout.get_shape().as_list() full_out_dropout = tf.reshape(full_out_dropout, [-1, shape[1] * shape[2]]) elif self.trial == 1: lstm_1 = tf.keras.layers.LSTM(units[0], name='lstm1', return_sequences=True, activation=tf.nn.elu) full_one_dropout = tf.nn.dropout(lstm_1(x1), rate=1 - hold_prob) shape = full_one_dropout.get_shape().as_list() lstm_1_flat = tf.reshape(full_one_dropout, [-1, shape[1] * shape[2]]) full_layer_one = tf.keras.layers.Dense(units=shape[1] * shape[2], activation=tf.nn.elu, name='dense1') full_out_dropout = tf.nn.dropout(full_layer_one(lstm_1_flat), rate=1 - hold_prob) elif self.trial == 2: lstm_1 = tf.keras.layers.LSTM(units[0], name='lstm1', return_sequences=True, activation=tf.nn.elu) full_one_dropout = tf.nn.dropout(lstm_1(x1), rate=1 - hold_prob) shape = full_one_dropout.get_shape().as_list() lstm_2_flat = tf.reshape(full_one_dropout, [-1, shape[1] * shape[2]]) full_layer_two = tf.keras.layers.Dense(units=shape[1] * shape[2], activation=tf.nn.elu, name='dense1') full_two_dropout = tf.nn.dropout(full_layer_two(lstm_2_flat), rate=1 - hold_prob) full_two_dropout = tf.reshape(full_two_dropout, [-1, shape[1], shape[2]]) lstm_2 = tf.keras.layers.LSTM(units[2], name='lstm2', return_sequences=True, activation=tf.nn.elu) full_out_dropout = tf.nn.dropout(lstm_2(full_two_dropout), rate=1 - hold_prob) shape = full_out_dropout.get_shape().as_list() full_out_dropout = tf.reshape(full_out_dropout, [-1, shape[1] * shape[2]]) elif self.trial == 3: lstm_1 = tf.keras.layers.LSTM(units[0], name='lstm1', return_sequences=True, activation=tf.nn.elu) full_one_dropout = tf.nn.dropout(lstm_1(x1), rate=1 - hold_prob) shape = full_one_dropout.get_shape().as_list() lstm_2_flat = tf.reshape(full_one_dropout, [-1, shape[1] * shape[2]]) full_layer_two = tf.keras.layers.Dense(units=shape[1] * shape[2], activation=tf.nn.elu, name='dense1') full_two_dropout = tf.nn.dropout(full_layer_two(lstm_2_flat), rate=1 - hold_prob) full_two_dropout = tf.reshape(full_two_dropout, [-1, shape[1], shape[2]]) lstm_2 = tf.keras.layers.LSTM(units[2], name='lstm2', return_sequences=True, activation=tf.nn.elu) full_three_dropout = tf.nn.dropout(lstm_2(full_two_dropout), rate=1 - hold_prob) shape = full_three_dropout.get_shape().as_list() lstm_2_flat = tf.reshape(full_three_dropout, [-1, shape[1] * shape[2]]) full_layer_three = tf.keras.layers.Dense(units=shape[1] * shape[2], activation=tf.nn.elu, name='dense2') full_three_dropout = tf.nn.dropout( full_layer_three(lstm_2_flat), rate=1 - hold_prob) full_three_dropout = tf.reshape(full_three_dropout, [-1, shape[1], shape[2]]) lstm_3 = tf.keras.layers.LSTM(units[2], name='lstm3', return_sequences=True, activation=tf.nn.elu) full_out_dropout = tf.nn.dropout(lstm_3(full_three_dropout), rate=1 - hold_prob) shape = full_out_dropout.get_shape().as_list() full_out_dropout = tf.reshape(full_out_dropout, [-1, shape[1] * shape[2]]) if self.probabilistic: prob_layer = tf.keras.layers.Dense(y_pred_.shape[1], activation=tf.nn.softmax, name='dense_prob') y_pred = prob_layer(full_out_dropout) else: y_pred, W, b = self.normal_full_layer(full_out_dropout, 1) if self.trial == 0: weights = lstm_1.trainable_weights elif self.trial == 1: weights = lstm_1.trainable_weights + full_layer_one.trainable_weights elif self.trial == 2: weights = lstm_1.trainable_weights + full_layer_two.trainable_weights + lstm_2.trainable_weights elif self.trial == 3: weights = lstm_1.trainable_weights + full_layer_two.trainable_weights + lstm_2.trainable_weights + full_layer_three.trainable_weights + lstm_3.trainable_weights if self.probabilistic: weights += prob_layer.trainable_weights else: weights += [W, b] with tf.name_scope("train_lstm") as scope: if self.probabilistic: cost_lstm = tf.losses.softmax_cross_entropy(y_pred_, y_pred) optimizer_lstm = tf.compat.v1.train.AdamOptimizer( learning_rate=learning_rate) train_lstm = optimizer_lstm.minimize(cost_lstm) accuracy_lstm = 1 / tf.metrics.accuracy(y_pred - y_pred_) sse_lstm = 1 / tf.metrics.recall(y_pred - y_pred_) rse_lstm = 1 / tf.metrics.precision(y_pred - y_pred_) else: err = tf.divide(tf.abs(y_pred - y_pred_), norm_val) cost_lstm = tf.reduce_mean(tf.square(err)) optimizer_lstm = tf.compat.v1.train.AdamOptimizer( learning_rate=learning_rate) train_lstm = optimizer_lstm.minimize(cost_lstm) accuracy_lstm = tf.reduce_mean(err) sse_lstm = tf.reduce_sum(tf.square(err)) rse_lstm = tf.sqrt(tf.reduce_mean(tf.square(err))) return train_lstm, cost_lstm, accuracy_lstm, sse_lstm, rse_lstm, weights
def init_weights(self, shape, Fin, Fout): scale = tf.sqrt( 2.0/ (Fin+Fout) ) W = tf.random_uniform( shape, minval=-scale, maxval=scale ) return W
def __init__(self, size, eps=1e-2, default_clip_range=np.inf, sess=None): """A normalizer that ensures that observations are approximately distributed according to a standard Normal distribution (i.e. have mean zero and variance one). Args: size (int): the size of the observation to be normalized eps (float): a small constant that avoids underflows default_clip_range (float): normalized observations are clipped to be in [-default_clip_range, default_clip_range] sess (object): the TensorFlow session to be used """ self.size = size self.eps = eps self.default_clip_range = default_clip_range self.sess = sess if sess is not None else tf.get_default_session() self.local_sum = np.zeros(self.size, np.float32) self.local_sumsq = np.zeros(self.size, np.float32) self.local_count = np.zeros(1, np.float32) self.sum_tf = tf.get_variable(initializer=tf.zeros_initializer(), shape=self.local_sum.shape, name='sum', trainable=False, dtype=tf.float32) self.sumsq_tf = tf.get_variable(initializer=tf.zeros_initializer(), shape=self.local_sumsq.shape, name='sumsq', trainable=False, dtype=tf.float32) self.count_tf = tf.get_variable(initializer=tf.ones_initializer(), shape=self.local_count.shape, name='count', trainable=False, dtype=tf.float32) self.mean = tf.get_variable(initializer=tf.zeros_initializer(), shape=(self.size, ), name='mean', trainable=False, dtype=tf.float32) self.std = tf.get_variable(initializer=tf.ones_initializer(), shape=(self.size, ), name='std', trainable=False, dtype=tf.float32) self.count_pl = tf.placeholder(name='count_pl', shape=(1, ), dtype=tf.float32) self.sum_pl = tf.placeholder(name='sum_pl', shape=(self.size, ), dtype=tf.float32) self.sumsq_pl = tf.placeholder(name='sumsq_pl', shape=(self.size, ), dtype=tf.float32) self.update_op = tf.group(self.count_tf.assign_add(self.count_pl), self.sum_tf.assign_add(self.sum_pl), self.sumsq_tf.assign_add(self.sumsq_pl)) self.recompute_op = tf.group( tf.assign(self.mean, self.sum_tf / self.count_tf), tf.assign( self.std, tf.sqrt( tf.maximum( tf.square(self.eps), self.sumsq_tf / self.count_tf - tf.square(self.sum_tf / self.count_tf)))), ) self.lock = threading.Lock()
def Conv2D(name, input_dim, output_dim, filter_size, inputs, he_init=True, mask_type=None, stride=1, weightnorm=None, biases=True, gain=1.): """ inputs: tensor of shape (batch size, num channels, height, width) mask_type: one of None, 'a', 'b' returns: tensor of shape (batch size, num channels, height, width) """ with tf.name_scope(name) as scope: if mask_type is not None: mask_type, mask_n_channels = mask_type mask = np.ones((filter_size, filter_size, input_dim, output_dim), dtype='float32') center = filter_size // 2 # Mask out future locations # filter shape is (height, width, input channels, output channels) mask[center + 1:, :, :, :] = 0. mask[center, center + 1:, :, :] = 0. # Mask out future channels for i in xrange(mask_n_channels): for j in xrange(mask_n_channels): if (mask_type == 'a' and i >= j) or (mask_type == 'b' and i > j): mask[center, center, i::mask_n_channels, j::mask_n_channels] = 0. def uniform(stdev, size): return np.random.uniform(low=-stdev * np.sqrt(3), high=stdev * np.sqrt(3), size=size).astype('float32') fan_in = input_dim * filter_size**2 fan_out = output_dim * filter_size**2 / (stride**2) if mask_type is not None: # only approximately correct fan_in /= 2. fan_out /= 2. if he_init: filters_stdev = np.sqrt(4. / (fan_in + fan_out)) else: # Normalized init (Glorot & Bengio) filters_stdev = np.sqrt(2. / (fan_in + fan_out)) if _weights_stdev is not None: filter_values = uniform( _weights_stdev, (filter_size, filter_size, input_dim, output_dim)) else: filter_values = uniform( filters_stdev, (filter_size, filter_size, input_dim, output_dim)) # print "WARNING IGNORING GAIN" filter_values *= gain filters = lib.param(name + '.Filters', filter_values) if weightnorm == None: weightnorm = _default_weightnorm if weightnorm: norm_values = np.sqrt( np.sum(np.square(filter_values), axis=(0, 1, 2))) target_norms = lib.param(name + '.g', norm_values) with tf.name_scope('weightnorm') as scope: norms = tf.sqrt( tf.reduce_sum(tf.square(filters), reduction_indices=[0, 1, 2])) filters = filters * (target_norms / norms) if mask_type is not None: with tf.name_scope('filter_mask'): filters = filters * mask result = tf.nn.conv2d(input=inputs, filter=filters, strides=[1, 1, stride, stride], padding='SAME', data_format='NCHW') if biases: _biases = lib.param(name + '.Biases', np.zeros(output_dim, dtype='float32')) result = tf.nn.bias_add(result, _biases, data_format='NCHW') return result
def lstm(params): data, count, dictionary, embeddings, normalized_embeddings, weights, biases = word2vec.get_word2vec( 2, False) words_size = embeddings.shape[0] embedding_size = embeddings.shape[1] print('Most common words (+UNK)', count[:5]) print('embedding size:%s data:%s' % (embedding_size, [dictionary[word] for word in data[:100]])) # Create a small validation set. valid_size = 1000 valid_text = data[:valid_size] train_text = data[valid_size:] train_size = len(train_text) p_num_unrollings = params['num_unrollings'] p_batch_size = params['batch_size'] class BatchGenerator(object): def __init__(self, text, batch_size, num_unrollings): assert batch_size >= 1 assert num_unrollings >= 1 self._text = text self._text_size = len(text) self._batch_size = batch_size self._num_unrollings = num_unrollings segment = self._text_size // batch_size self._cursor_boundary = [ offset * segment for offset in range(batch_size) ] self._cursor = self._cursor_boundary[:] self._last_batch = self._next_batch() def _next_batch(self): """Generate a single batch from the current cursor position in the data.""" batch = np.zeros(shape=(self._batch_size, embedding_size), dtype=np.float) for b in range(self._batch_size): batch[b] = normalized_embeddings[self._text[self._cursor[b]]] self._cursor[b] = (self._cursor[b] + 1) if self._cursor[self._batch_size - 1] == self._text_size: self._cursor = self._cursor_boundary[:] return batch def next(self): """Generate the next array of batches from the data. The array consists of the last batch of the previous array, followed by p_num_unrollings new ones. """ batches = [self._last_batch] for _ in range(self._num_unrollings): batches.append(self._next_batch()) self._last_batch = batches[-1] return batches def batches2string(batches): """Convert a sequence of batches back into their (most likely) string representation.""" s = [''] * batches[0].shape[0] for b in batches: words = [ dictionary[w] for w in np.argmax(np.matmul(b, normalized_embeddings.T), 1) ] s = [' '.join(x) for x in zip(s, words)] return s train_batches = BatchGenerator(train_text, p_batch_size, p_num_unrollings) valid_batches = BatchGenerator(valid_text, 1, 1) print(batches2string(train_batches.next())) print(batches2string(train_batches.next())) print(batches2string(train_batches.next())) print(batches2string(valid_batches.next())) print(batches2string(valid_batches.next())) print(batches2string(valid_batches.next())) def logprob(predictions, labels): """Log-probability of the true labels in a predicted batch.""" predictions[predictions < 1e-10] = 1e-10 return np.sum( -np.log([predictions[i, label] for i, label in enumerate(labels)])) / labels.shape[0] graph = tf.Graph() with graph.as_default(): p_num_nodes = params['num_nodes'] p_max_k = params['max_k'] def create_trainable_variables(): ''' Parameters: num_nodes*0:num_nodes*1 : Input gate num_nodes*1:num_nodes*2 : Forget gate num_nodes*2:num_nodes*3 : Output gate num_nodes*3:num_nodes*4 : New memory cell ''' W = { 'L1_W': tf.Variable( tf.truncated_normal([embedding_size, p_num_nodes * 4], mean=0, stddev=0.1, name="L1_W")), 'L1_U': tf.Variable( tf.truncated_normal([p_num_nodes, p_num_nodes * 4], mean=0, stddev=0.1, name="L1_U")), 'L1_b': tf.Variable(tf.zeros([1, p_num_nodes * 4]), name="L1_b"), 'L2_W': tf.Variable( tf.truncated_normal([p_num_nodes, p_num_nodes * 4], mean=0, stddev=0.1, name="L2_W")), 'L2_U': tf.Variable( tf.truncated_normal([p_num_nodes, p_num_nodes * 4], mean=0, stddev=0.1, name="L2_U")), 'L2_b': tf.Variable(tf.zeros([1, p_num_nodes * 4]), name="L2_b"), 'L3_W': tf.Variable( tf.truncated_normal([p_num_nodes, p_num_nodes * 4], mean=0, stddev=0.1, name="L3_W")), 'L3_U': tf.Variable( tf.truncated_normal([p_num_nodes, p_num_nodes * 4], mean=0, stddev=0.1, name="L3_U")), 'L3_b': tf.Variable(tf.zeros([1, p_num_nodes * 4]), name="L3_b"), 'L4_W': tf.Variable( tf.truncated_normal([p_num_nodes, embedding_size], mean=0, stddev=0.1, name="L4_W")), 'L4_b': tf.Variable(tf.zeros([embedding_size]), name="L4_b"), } return W def create_variables(batch_size, num_unrollings): # Input data. train_data = list() for _ in range(num_unrollings + 1): train_data.append( tf.placeholder(tf.float32, shape=[batch_size, embedding_size])) inputs = { 'inputs': train_data[:num_unrollings], 'labels': train_data[1:], # labels are inputs shifted by one time step. 'data': train_data, 'dropout': tf.placeholder(tf.float32, name="dropout"), } # Variables saving state across unrollings. last_state = { 'h1': tf.Variable(tf.zeros([batch_size, p_num_nodes]), trainable=False, name="h1"), 'c1': tf.Variable(tf.zeros([batch_size, p_num_nodes]), trainable=False, name="c1"), 'h2': tf.Variable(tf.zeros([batch_size, p_num_nodes]), trainable=False, name="h2"), 'c2': tf.Variable(tf.zeros([batch_size, p_num_nodes]), trainable=False, name="c2"), 'h3': tf.Variable(tf.zeros([batch_size, p_num_nodes]), trainable=False, name="h3"), 'c3': tf.Variable(tf.zeros([batch_size, p_num_nodes]), trainable=False, name="c3"), } return inputs, last_state # Definition of the cell computation. def lstm_cell(x, h, c, W, U, b): """Create a LSTM cell. See e.g.: http://arxiv.org/pdf/1402.1128v1.pdf Note that in this formulation, we omit the various connections between the previous c (i.e. state) and the gates.""" raw_data = tf.matmul(x, W) + tf.matmul(h, U) + b gates = tf.sigmoid(raw_data[:, :p_num_nodes * 3]) input_gate = gates[:, :p_num_nodes] # p_batch_size x p_num_nodes forget_gate = gates[:, p_num_nodes:p_num_nodes * 2] # p_batch_size x p_num_nodes output_gate = gates[:, p_num_nodes * 2:p_num_nodes * 3] # p_batch_size x p_num_nodes new_memory_cell = raw_data[:, p_num_nodes * 3:] # p_batch_size x p_num_nodes c_next = forget_gate * c + input_gate * tf.tanh( new_memory_cell) # p_batch_size x p_num_nodes h_next = output_gate * tf.tanh(c_next) return h_next, c_next def create_model(W, inputs, last_state): ys = list() h1 = last_state['h1'] c1 = last_state['c1'] h2 = last_state['h2'] c2 = last_state['c2'] h3 = last_state['h3'] c3 = last_state['c3'] # construct 2 layer LSTM for x in inputs['inputs']: h1, c1 = lstm_cell(x, h1, c1, W['L1_W'], W['L1_U'], W['L1_b']) x2 = tf.nn.dropout(h1, inputs['dropout'], name="dropout") h2, c2 = lstm_cell(x2, h2, c2, W['L2_W'], W['L2_U'], W['L2_b']) x3 = tf.nn.dropout(h2, inputs['dropout'], name="dropout") h3, c3 = lstm_cell(x3, h3, c3, W['L3_W'], W['L3_U'], W['L3_b']) ys.append(h3) # State saving across unrollings. with tf.control_dependencies([ last_state['h1'].assign(h1), last_state['c1'].assign(c1), last_state['h2'].assign(h2), last_state['c2'].assign(c2), last_state['h3'].assign(h3), last_state['c3'].assign(c3) ]): # Classifier. Y_pred = tf.nn.xw_plus_b(tf.concat(0, ys), W['L4_W'], W['L4_b']) norm = tf.sqrt( tf.reduce_sum(tf.square(Y_pred), 1, keep_dims=True)) normalized_Y_pred = Y_pred / norm Y = tf.concat(0, inputs['labels']) l2_loss = params['beta_regularization_value'] * ( tf.nn.l2_loss(W['L1_W']) + tf.nn.l2_loss(W['L2_W']) + tf.nn.l2_loss(W['L3_W']) + tf.nn.l2_loss(W['L4_W'])) loss = tf.contrib.losses.cosine_distance( normalized_Y_pred, Y, dim=1) + l2_loss model = { 'loss': loss, 'Y_pred': Y_pred, } return model # Convert vec to word norm_embeddings = tf.constant(normalized_embeddings.T) W = create_trainable_variables() inputs, last_state = create_variables(p_batch_size, p_num_unrollings) # Unrolled LSTM loop. model = create_model(W, inputs, last_state) # Optimizer. global_step = tf.Variable(0) learning_rate = tf.train.exponential_decay( params['start_learning_rate'], global_step, 5000, 0.1, staircase=True) optimizer = tf.train.GradientDescentOptimizer(learning_rate) gradients, v = zip(*optimizer.compute_gradients(model['loss'])) gradients, _ = tf.clip_by_global_norm(gradients, 1.25) optimizer = optimizer.apply_gradients(zip(gradients, v), global_step=global_step) grad_sum = [ tf.sqrt(tf.reduce_mean(tf.square(gradient))) for gradient in gradients[:len(gradients) - 2] ] v_sum = [ tf.sqrt(tf.reduce_mean(tf.square(variable))) for variable in v[:len(gradients) - 2] ] grad_v_sum = [grad / v for grad, v in zip(grad_sum, v_sum)] grad_sum_string = tf.Print(grad_sum, [grad_sum], message="grad_sum: ") v_sum_string = tf.Print(v_sum, [v_sum], message="v_sum: ") grad_v_sum_string = tf.Print(grad_v_sum, [grad_v_sum], message="grad_v_sum: ") # Sampling and validation eval: batch 1, no unrolling. sample_batch_size = 1 sample_num_unrollings = 1 sample_inputs, sample_last_state = create_variables( sample_batch_size, sample_num_unrollings) sample_model = create_model(W, sample_inputs, sample_last_state) reset_sample_state = tf.group( sample_last_state['h1'].assign( tf.zeros([sample_batch_size, p_num_nodes])), sample_last_state['c1'].assign( tf.zeros([sample_batch_size, p_num_nodes])), sample_last_state['h2'].assign( tf.zeros([sample_batch_size, p_num_nodes])), sample_last_state['c2'].assign( tf.zeros([sample_batch_size, p_num_nodes])), sample_last_state['h3'].assign( tf.zeros([sample_batch_size, p_num_nodes])), sample_last_state['c3'].assign( tf.zeros([sample_batch_size, p_num_nodes]))) similarity = tf.matmul(sample_model['Y_pred'], norm_embeddings) sample_next = tf.nn.top_k(similarity, p_max_k)[1] # Add ops to save and restore all the variables. saver = tf.train.Saver() p_epochs = params['epochs'] p_summary_frequency = params['summary_frequency'] with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() print('Initialized') if os.path.exists(params['savefile']) and params['resume']: # Restore variables from disk. saver.restore(session, params['savefile']) print("Model restored.") start_time = time.time() n_batch = len(data) // p_batch_size for epoch in range(int(math.ceil(p_epochs))): # p_epochs can be 0.001 to test overfit fraction = p_epochs - epoch if (fraction) < 1: n_batch = n_batch * fraction total_step = int(math.ceil(n_batch)) mean_loss = 0 print("Epoch %s start / total p_epochs %s, total steps %s" % (epoch, p_epochs, total_step)) for step in range(total_step): batches = train_batches.next() inputs_dict = dict() for i in range(p_num_unrollings + 1): inputs_dict[inputs['data'][i]] = batches[i] inputs_dict[inputs['dropout']] = params['dropout'] _, loss_e, learning_rate_e = session.run( [optimizer, model['loss'], learning_rate], feed_dict=inputs_dict) mean_loss += loss_e if step % p_summary_frequency == 0: mean_loss = mean_loss / p_summary_frequency # The mean loss is an estimate of the loss over the last few batches. # PP = exp(CE) = exp(-log(prediction)) = 1/prediction. max PP = 1 / (1/50000) = 50000 print( 'Average loss at step(%d):%f learning rate:%.2f time:%s' % (step, mean_loss, learning_rate_e, timedelta(seconds=(time.time() - start_time)))) mean_loss = 0 def sample(candiate_indices): # check https://github.com/fchollet/keras/blob/master/examples/lstm_text_generation.py#L62 k = int(abs(random.normalvariate( 0, p_max_k / 2))) % p_max_k index = candiate_indices[k] # Skip UNK if len(candiate_indices) > 1: while index == 0: k = int( abs(random.normalvariate( 0, p_max_k / 2))) % p_max_k index = candiate_indices[k] return index if step % (p_summary_frequency * 10) == 0: # Generate some samples. print('=' * 80) for _ in range(5): word = int( random.uniform(0, 1) * words_size) % words_size feed = np.array([embeddings[word]]) sentence = dictionary[word] reset_sample_state.run() for _ in range(79): prediction = sample_next.eval({ sample_inputs['inputs'][0]: feed, sample_inputs['dropout']: 1, }) index = sample(prediction[0, :]) feed = np.array([embeddings[index]]) sentence += ' ' + dictionary[index] print(sentence) print('=' * 80) # Save the variables to disk. save_path = saver.save(session, params['savefile']) # Measure validation set perplexity. valid_mean_loss = 0 reset_sample_state.run() for _ in range(valid_size): validation_batches = valid_batches.next() sample_feeds = { sample_inputs['inputs'][0]: validation_batches[0], sample_inputs['labels'][0]: validation_batches[1], sample_inputs['dropout']: 1, } valid_loss = session.run([sample_model['loss']], feed_dict=sample_feeds) valid_mean_loss += valid_loss[0] print('Validation set loss: %.2f. saved:%s' % (valid_mean_loss / valid_size, save_path))
def __init__(self, args): inputs = tf.placeholder(shape=(args.batch_size, None), dtype=tf.int32, name='inputs') mask = tf.placeholder(shape=(args.batch_size, None), dtype=tf.float32, name='inputs_mask') seq_length = tf.placeholder(shape=args.batch_size, dtype=tf.float32, name='seq_length') self.input_form = [inputs, mask, seq_length] encoder_inputs = inputs decoder_inputs = tf.concat( [tf.zeros(shape=(args.batch_size, 1), dtype=tf.int32), inputs], axis=1) decoder_targets = tf.concat( [inputs, tf.zeros(shape=(args.batch_size, 1), dtype=tf.int32)], axis=1) decoder_mask = tf.concat( [mask, tf.zeros(shape=(args.batch_size, 1), dtype=tf.float32)], axis=1) x_size = out_size = args.map_size[0] * args.map_size[1] embeddings = tf.Variable(tf.random_uniform( [x_size, args.x_latent_size], -1.0, 1.0), dtype=tf.float32) encoder_inputs_embedded = tf.nn.embedding_lookup( embeddings, encoder_inputs) decoder_inputs_embedded = tf.nn.embedding_lookup( embeddings, decoder_inputs) with tf.variable_scope("encoder"): encoder_cell = tf.nn.rnn_cell.GRUCell(args.rnn_size) _, encoder_final_state = tf.nn.dynamic_rnn( encoder_cell, encoder_inputs_embedded, sequence_length=seq_length, dtype=tf.float32, ) mu_w = tf.get_variable("mu_w", [args.rnn_size, args.rnn_size], tf.float32, tf.random_normal_initializer(stddev=0.02)) mu_b = tf.get_variable("mu_b", [args.rnn_size], tf.float32, initializer=tf.constant_initializer(0.0)) sigma_w = tf.get_variable("sigma_w", [args.rnn_size, args.rnn_size], tf.float32, tf.random_normal_initializer(stddev=0.02)) sigma_b = tf.get_variable("sigma_b", [args.rnn_size], tf.float32, initializer=tf.constant_initializer(0.0)) mu = tf.matmul(encoder_final_state, mu_w) + mu_b log_sigma_sq = tf.matmul(encoder_final_state, sigma_w) + sigma_b eps = tf.random_normal(shape=tf.shape(log_sigma_sq), mean=0, stddev=1, dtype=tf.float32) if args.eval: z = tf.zeros(shape=(args.batch_size, args.rnn_size), dtype=tf.float32) else: z = mu + tf.sqrt(tf.exp(log_sigma_sq)) * eps self.batch_post_embedded = z with tf.variable_scope("decoder"): decoder_cell = tf.nn.rnn_cell.GRUCell(args.rnn_size) decoder_init_state = z decoder_outputs, _ = tf.nn.dynamic_rnn( decoder_cell, decoder_inputs_embedded, initial_state=decoder_init_state, sequence_length=seq_length, dtype=tf.float32, ) out_w = tf.get_variable("out_w", [out_size, args.rnn_size], tf.float32, tf.random_normal_initializer(stddev=0.02)) out_b = tf.get_variable("out_b", [out_size], tf.float32, initializer=tf.constant_initializer(0.0)) batch_rec_loss = tf.reduce_mean(decoder_mask * tf.reshape( tf.nn.sampled_softmax_loss( weights=out_w, biases=out_b, labels=tf.reshape(decoder_targets, [-1, 1]), inputs=tf.reshape(decoder_outputs, [-1, args.rnn_size]), num_sampled=args.neg_size, num_classes=out_size), [args.batch_size, -1]), axis=-1) batch_latent_loss = -0.5 * tf.reduce_sum( 1 + log_sigma_sq - tf.square(mu) - tf.exp(log_sigma_sq), axis=1) self.rec_loss = rec_loss = tf.reduce_mean(batch_rec_loss) self.latent_loss = latent_loss = tf.reduce_mean(batch_latent_loss) self.loss = loss = tf.reduce_mean([rec_loss, latent_loss]) self.train_op = tf.train.AdamOptimizer( args.learning_rate).minimize(loss) target_out_w = tf.nn.embedding_lookup(out_w, decoder_targets) target_out_b = tf.nn.embedding_lookup(out_b, decoder_targets) self.batch_likelihood = tf.reduce_mean(decoder_mask * tf.log_sigmoid( tf.reduce_sum(decoder_outputs * target_out_w, -1) + target_out_b), axis=-1, name="batch_likelihood") saver = tf.train.Saver(tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES), max_to_keep=10) self.save, self.restore = saver.save, saver.restore
def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev)
def __init__(self, learning_rate=0.001, n_particles=1): self.learning_rate = learning_rate self.n_particles = n_particles self.D = 1 #Recogntiion model self.mean1 = tf.Variable([-1.]) self.log_var1 = tf.Variable([1.]) self.mean2 = tf.Variable([-1.]) self.log_var2 = tf.Variable([1.]) #Sample self.eps1 = tf.random_normal((self.n_particles, self.D), 0, 1, dtype=tf.float32) self.eps2 = tf.random_normal((self.n_particles, self.D), 0, 1, dtype=tf.float32) self.z1 = tf.add(self.mean1, tf.mul(tf.sqrt(tf.exp(self.log_var1)), self.eps1)) self.z2 = tf.add(self.mean2, tf.mul(tf.sqrt(tf.exp(self.log_var2)), self.eps2)) self.log_q1 = self.log_q_z(self.z1, self.mean1, self.log_var1) self.log_q2 = self.log_q_z(self.z2, self.mean2, self.log_var2) # self.log_p_z = self.log_p_z() self.p_z1 = self.p_z(self.z1) self.p_z2 = self.p_z(self.z2) self.w1 = self.p_z1 / tf.exp(self.log_q1) self.w2 = self.p_z2 / tf.exp(self.log_q2) self.w_total = tf.reduce_sum(self.w1) + tf.reduce_sum(self.w2) self.pi1 = tf.reduce_sum(self.w1) / self.w_total self.pi2 = tf.reduce_sum(self.w2) / self.w_total self.z_all = tf.concat(0, [self.z1, self.z2]) self.q_all = self.pi1 * tf.exp(self.log_q_z(self.z_all, self.mean1, self.log_var1)) + self.pi2 * tf.exp(self.log_q_z(self.z_all, self.mean2, self.log_var2)) self.p_all = tf.concat(0, [self.p_z1, self.p_z2]) self.w_all = self.p_all / self.q_all self.elbo = tf.log(tf.reduce_mean(self.w_all)) # self.log_p_z = tf.clip_by_value(aaaa, clip_value_min=-8, clip_value_max=8) # self.log_q_z = self.log_q_z() # # self.log_q_z = tf.clip_by_value(bbbb, clip_value_min=-8, clip_value_max=8) # self.log_w = self.log_p_z - self.log_q_z # self.w = self.p_z / tf.exp(self.log_q_z) # #SVI Objective # self.elbo = tf.reduce_mean(self.log_p_z - self.log_q_z) #average over particles #W-SVI Objective # max_ = tf.reduce_max(self.log_w) #max over particles # min_ = tf.reduce_min(self.log_w) #max over particles # #IWAE Code # log_ws_minus_max = self.log_w - min_ # ws = tf.exp(log_ws_minus_max) # ws_normalized = ws / tf.reduce_sum(ws) # self.elbo = tf.reduce_sum(ws_normalized * self.log_w) #average over particles # self.elbo = tf.log(tf.reduce_mean(tf.exp(self.log_w - max_))) + max_ #average over particles # self.elbo = tf.log(tf.reduce_mean(tf.exp(self.log_w - min_))) +min_ #average over particles # self.elbo = tf.log(tf.reduce_mean(self.w)) # self.elbo = tf.log(tf.reduce_mean(tf.exp(self.log_w))) #average over particles # Use ADAM optimizer self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, epsilon=1e-04).minimize(-self.elbo)
def iterate(t, mean): # ----------------------------------------------------- # (1) Sample a new population of solutions ∼ N(m, σ²C) # ----------------------------------------------------- z = tf.random.normal([self._population_size, self._solution_size], dtype=tf.float32) y = tf.matmul(z, tf.matmul(self._B, self._D)) samples = self._m + self._sigma * y samples = tf.reshape(samples, [self._population_size, *self._solution_dim]) # ------------------------------------------------- # (2) Selection and Recombination: Moving the Mean # ------------------------------------------------- # Evaluate and sort solutions samples_feasible = tf.clip_by_value(samples, self._action_lower_bound_horizon, self._action_upper_bound_horizon) penalty = tf.norm(tf.reshape(samples - samples_feasible, [self._population_size, self._num_agents, -1]), axis=2) ** 2 samples = samples_feasible # ------------------------------------------------- # (2) Selection and Recombination: Moving the Mean # ------------------------------------------------- # Evaluate and sort solutions rewards = self._trajectory_evaluator(current_state, samples, time_step) - penalty rewards = tf.reduce_sum(rewards, axis=1) #TODO: double check this, very flaky self._x_sorted = tf.gather(samples, tf.argsort(rewards, direction='DESCENDING')) # The new mean is a weighted average of the top-μ solutions x_diff = (tf.reshape(self._x_sorted, [self._population_size, self._solution_size]) - self._m) x_mean = tf.reduce_sum(tf.multiply(x_diff, self._weights), axis=0) m = self._m + x_mean # ---------------------- # (3) Step-size control # ---------------------- y_mean = x_mean / self._sigma D_inv = tf.linalg.tensor_diag(tf.math.reciprocal(tf.linalg.diag_part(self._D))) C_inv_half = tf.matmul(tf.matmul(self._B, D_inv), tf.transpose(self._B)) p_sigma = ((1 - self._c_sigma) * self._p_sigma) + (tf.math.sqrt(self._c_sigma * (2 - self._c_sigma) * self._mu_eff) * tf.squeeze(tf.matmul(C_inv_half, y_mean[:, tf.newaxis]))) sigma = self._sigma * tf.exp((self._c_sigma / self._d_sigma) * ((tf.norm(p_sigma) / self._expectation_of_normal) - 1)) # ----------------------------------- # (4) Adapting the Covariance Matrix # ----------------------------------- p_C = ((1 - self._cc) * self._p_C + (self._h_sigma * tf.sqrt(self._cc * (2 - self._cc) * self._mu_eff) * y_mean)) p_C_matrix = p_C[:, tf.newaxis] y_mean_unweighted = x_diff / self._sigma y_mean_unweighted_squared = tf.map_fn(fn=lambda e: e * tf.transpose(e), elems=y_mean_unweighted[:, tf.newaxis]) y_s = tf.reduce_sum(tf.multiply(y_mean_unweighted_squared, self._weights[:, tf.newaxis]), axis=0) C = ((1 - self._c1 - self._c_mu) * self._C + self._c1 * p_C_matrix * tf.transpose(p_C_matrix) + self._c_mu * y_s) # ----------------------------------- # (5) Ensure the symmetry of the covariance matrix here # ----------------------------------- C_upper = tf.linalg.band_part(C, 0, -1) C_upper_no_diag = C_upper - tf.linalg.tensor_diag(tf.linalg.diag_part(C_upper)) C = C_upper + tf.transpose(C_upper_no_diag) # ----------------------------------- # (6)Update the values # ----------------------------------- u, B, _ = tf.linalg.svd(C) diag_D = tf.sqrt(u) D = tf.linalg.tensor_diag(diag_D) # Assign values self._p_C.assign(p_C) self._p_sigma.assign(p_sigma) self._C.assign(C) self._sigma.assign(sigma) self._B.assign(B) self._D.assign(D) self._m.assign(m) return t + tf.constant(1, dtype=tf.int32), m
def timeflow(self, t=0.0): self.setalpha(t) Momentum = tf.matmul(tf.matmul( self.body.wb, self.body.Ib), self.body.Q) + tf.scalar_mul( self.body.m, tf.cross(self.body.rs, self.body.vs)) Feqc = tf.scalar_mul(Mtot, g) Feqa = tf.diag([Mtot, Mtot, Mtot]) Crossvec = tf.zeros((1, 3), dtype=tf.float64) Teqalpha = tf.zeros((3, 3), dtype=tf.float64) Teqc = tf.zeros((1, 3), dtype=tf.float64) mlsum = tf.zeros((1, 3), dtype=tf.float64) sumDs = tf.zeros((3, 3), dtype=tf.float64) wbs = tf.matmul(self.body.wb, self.body.Q) #[1,3] matrix tot_lbtomots = [] for p in range(numLeg): for i in range(numsubleg): self.leg[p].sub[i].omega += self.leg[p].sub[ i].alpha * dtime #omega를 시간에 따라 갱신 self.leg[p].sub[i].theta += self.leg[p].sub[ i].omega * dtime #theta를 시간에 따라 갱신 self.leg[p].sub[i].Q = tf.scalar_mul(tf.cos(self.leg[p].sub[i].theta), tf.eye(3, dtype=tf.float64)) + \ tf.scalar_mul(1.-tf.cos(self.leg[p].sub[i].theta), tf.matmul(self.leg[p].sub[i].axis, self.leg[p].sub[i].axis, transpose_a = True)) + \ tf.scalar_mul(tf.sin(self.leg[p].sub[i].theta), tf.cross(tf.tile(self.leg[p].sub[i].axis,[3,1]), tf.eye(3, dtype=tf.float64))) Qs = [tf.matmul(self.leg[p].sub[0].Q, self.body.Q)] #Qs는 i번째 subleg에서 space로의 좌표변환 #List of rotation matrices of each sublegs in space frame #Type : list of [3,3] Tensor for i in range(1, numsubleg): Qs.append(tf.matmul(self.leg[p].sub[i].Q, Qs[i - 1])) Is = [ tf.matmul( tf.matmul(Qs[i], self.leg[p].sub[i].Ib, transpose_a=True), Qs[i]) for i in range(numsubleg) ] e = [ tf.matmul(self.leg[p].sub[i].axis, Qs[i]) for i in range(numsubleg) ] #List of axes of each sublegs in space frame #Type : list of [None,3] Tensor Qalpha = [ tf.scalar_mul(self.leg[p].sub[i].alpha, e[i]) for i in range(numsubleg) ] Qalphasum = [Qalpha[0]] for i in range(1, numsubleg): Qalphasum.append(Qalphasum[i - 1] + Qalpha[i]) Qw = [ tf.scalar_mul(self.leg[p].sub[i].omega, e[i]) for i in range(numsubleg) ] ws = [wbs + Qw[0]] for i in range(1, numsubleg): ws.append(ws[i - 1] + Qw[i]) w = [ tf.matmul(ws[i], Qs[i], transpose_b=True) for i in range(numsubleg) ] ls = [[ tf.matmul(self.leg[p].sub[i].l[0], Qs[i]), tf.matmul(self.leg[p].sub[i].l[1], Qs[i]) ] for i in range(numsubleg)] #ls = 2Dtensor lbtomotbs = tf.matmul(self.body.lbtomot[p], self.body.Q) # lbtomotbs = 2Dtensor lbtomots = [lbtomotbs + ls[0][0]] # lbtomots = 2Dtensor for i in range(1, numsubleg): lbtomots.append(lbtomots[i - 1] + ls[i - 1][1] + ls[i][0]) for i in range(numsubleg): mlsum += tf.scalar_mul(self.leg[p].sub[i].m, lbtomots[i]) #각운동량 디버깅용 vmotbs = [tf.cross(wbs, lbtomotbs) + tf.cross(ws[0], ls[0][0])] for i in range(1, numsubleg): vmotbs.append(vmotbs[i - 1] + tf.cross(ws[i - 1], ls[i - 1][1]) + tf.cross(ws[i], ls[i][0])) #Calculating External Forces vs = self.body.vs for i in range(numsubleg): Collisiontemp = tf.cast( tf.less(lbtomots[i] + ls[i][1] + self.body.rs, tf.zeros((1, 3), dtype=tf.float64)), tf.float64) Collisionz = tf.multiply(Collisiontemp, tf.constant([[0, 0, 1]], tf.float64)) Collisionxy = tf.matmul( Collisionz, tf.constant([[0, 0, 0], [0, 0, 0], [1, 1, 0]], tf.float64)) ##더 연산량을 줄일 수 있을 듯 방법을 강구하라 vs += tf.cross(ws[i], ls[i][0] + ls[i][1]) vCollision = tf.cast( tf.less(vs, tf.zeros((1, 3), dtype=tf.float64)), tf.float64) Ftemp = tf.multiply( Collisionz, Fadded + tf.multiply( (vCollision - Offset), Fsubed)) Feqc += Ftemp Teqc += tf.cross(lbtomots[i] + ls[i][1], Ftemp) FrictionTemp = -tf.multiply(tf.scalar_mul( Fricscale, vs), Collisionxy) ##########하.. 힘이 너무 다 틀렸어 Feqc += FrictionTemp Teqc += tf.cross(lbtomots[i] + ls[i][1], FrictionTemp) A = [ tf.cross(wbs, tf.cross(wbs, lbtomotbs)) + tf.cross(Qalphasum[0], ls[0][0]) + tf.cross(ws[0], tf.cross(ws[0], ls[0][0])) ] for i in range(1, numsubleg): A.append( tf.cross(Qalphasum[i - 1], ls[i - 1][1]) + tf.cross(Qalphasum[i], ls[i][0]) + tf.cross(ws[i - 1], tf.cross(ws[i - 1], ls[i - 1][1])) + tf.cross(ws[i], tf.cross(ws[i], ls[i][0]))) mlsquare = tf.zeros((1), dtype=tf.float64) for i in range(numsubleg): mlsquare += tf.scalar_mul( self.leg[p].sub[i].m, tf.matmul(lbtomots[i], lbtomots[i], transpose_b=True)) mlsquare = tf.reshape(mlsquare, [-1]) Dya = tf.zeros([3, 3], dtype=tf.float64) for i in range(numsubleg): Dya += tf.scalar_mul( self.leg[p].sub[i].m, tf.matmul(lbtomots[i], lbtomots[i], transpose_a=True)) ############### Ds = tf.diag(tf.concat([mlsquare, mlsquare, mlsquare], axis=0)) - Dya Teqalpha += Ds sumDs += Ds #Qb * Ib * Qb.transpose() for i in range(numsubleg): Feqc -= tf.scalar_mul(self.leg[p].sub[i].m, A[i]) Crossvec += tf.scalar_mul(self.leg[p].sub[i].m, lbtomots[i]) Teqc += tf.matmul( tf.cross(tf.matmul(w[i], self.leg[p].sub[i].Ib), w[i]), Qs[i]) Teqc -= tf.matmul(Qalphasum[i], Is[i]) Teqalpha += Is[i] #Qs_i * I_i * Qs_i^T for i in range(numsubleg): Momentum += tf.matmul(tf.matmul(w[i], self.leg[p].sub[i].Ib), Qs[i]) Momentum += tf.scalar_mul( self.leg[p].sub[i].m, tf.cross(lbtomots[i] + self.body.rs, vmotbs[i] + self.body.vs)) #leg update #float32 -> float64 conversion : 171013 Fine #update 'Q's of leg - 20171012 fine tot_lbtomots += lbtomots Teqalpha += tf.matmul( tf.matmul(self.body.Q, self.body.Ib, transpose_a=True), self.body.Q) Teqc += tf.matmul( tf.cross(tf.matmul(self.body.wb, self.body.Ib), self.body.wb), self.body.Q) Teqc += tf.cross(mlsum, g) Teqanorm = tf.reduce_sum(tf.square(mlsum)) alphabs = tf.matmul( Teqc - tf.scalar_mul(1. / Mtot, tf.cross(mlsum, Feqc)), tf.matrix_inverse(Teqalpha + tf.scalar_mul( 1. / Mtot, Teqanorm * tf.eye(3, dtype=tf.float64) - tf.matmul(mlsum, mlsum, transpose_a=True)) #여기가 너무 헷갈림....... )) asb = tf.scalar_mul(1. / Mtot, Feqc - tf.cross(mlsum, alphabs)) alphab = tf.matmul(alphabs, self.body.Q, transpose_b=True) self.body.wb += tf.scalar_mul(dtime, alphab) self.body.Q += tf.scalar_mul( dtime, tf.cross(tf.concat([wbs, wbs, wbs], axis=0), self.body.Q)) self.body.vs += tf.scalar_mul(dtime, asb) self.body.rs += tf.scalar_mul(dtime, self.body.vs) # Q to quaternion qw = tf.scalar_mul( 0.5, tf.sqrt(tf.reduce_sum(tf.diag_part(self.body.Q)) + 1.)) qv = -tf.reduce_sum(tf.cross(self.body.Q, tf.eye(3, dtype=tf.float64)), axis=0) / tf.scalar_mul(4., qw) # quaternion normalization qvsquare = tf.reduce_sum(tf.square(qv)) qnorm = tf.sqrt(tf.square(qw) + qvsquare) qw /= qnorm qv /= qnorm # quaternion to Q self.body.Q = tf.scalar_mul(qw*qw-qvsquare,tf.eye(3, dtype = tf.float64))\ + 2 * tf.matmul(tf.reshape(qv, [3, 1]), tf.reshape(qv, [1, 3]))\ + 2 * qw * tf.cross(tf.tile(tf.reshape(qv, [1,3]), [3,1]), tf.eye(3, dtype = tf.float64)) return Momentum, [x + self.body.rs for x in tot_lbtomots]
target = tf.constant(0.) # L2 norm is a loss function squaring the difference l2_y_vals = tf.square(target - x_vals) l2_y_out = sess.run(l2_y_vals) l1_y_vals = tf.abs(target - x_vals) l1_y_out = sess.run(l1_y_vals) # Pseudo huber loss is a continuous and smooth approximation to the huber loss function. delta1 = 0.25 delta2 = 0.5 phuber1_y_vals = tf.multiply(tf.square(delta1), tf.sqrt(1. + tf.square(target - x_vals)) - 1.) phuber1_y_out = sess.run(phuber1_y_vals) phuber2_y_vals = tf.multiply(tf.square(delta2), tf.sqrt(1. + tf.square(target - x_vals)) - 1.) phuber2_y_out = sess.run(phuber2_y_vals) x_array = sess.run(x_vals) plt.plot(x_array, l2_y_out, 'b--', label='L2 Loss') plt.plot(x_array, l1_y_out, 'r--', label='L1 Loss') plt.plot(x_array, phuber1_y_out, 'k--', label='Huber Loss D=0.25') plt.plot(x_array, phuber2_y_out, 'g--', label='Huber Loss D=0.5') plt.legend(loc='lower right', prop={'size': 11}) plt.show() # For classification function
def _pairwise_distance_computation(input_tensor, margin): input_tensor = tf.expand_dims(input_tensor, 1) d_sq = tf.reduce_sum(tf.square(input_tensor - tf.transpose(input_tensor, (1, 0, 2))), \ 2, keep_dims=False) d = tf.sqrt(d_sq + 1e-8) return tf.exp(margin - d), d