def frechet_inception_distance(x_features, y_features, batch_size, sqrt=False): batch_scores = list() batches = int(x_features.shape.as_list()[0]/batch_size) for i in range(batches): if batches-1 == i: x_features_batch = x_features[i*batch_size: , :] y_features_batch = y_features[i*batch_size: , :] else: x_features_batch = x_features[i*batch_size : (i+1)*batch_size, :] y_features_batch = y_features[i*batch_size : (i+1)*batch_size, :] samples = x_features_batch.shape.as_list()[0] x_feat = tf.reshape(x_features_batch, (samples, -1)) y_feat = tf.reshape(y_features_batch, (samples, -1)) x_mean = tf.reduce_mean(x_feat, axis=0) y_mean = tf.reduce_mean(y_feat, axis=0) # Review this two lines. x_cov = covariance(x_feat) y_cov = covariance(y_feat) means = dot_product(x_mean, x_mean) + dot_product(y_mean, y_mean) - 2*dot_product(x_mean, y_mean) cov_s = linalg.sqrtm(tf.matmul(x_cov, y_cov), True) cov_s = cov_s.real covas = tf.trace(x_cov + y_cov - 2*cov_s) fid = means + covas if sqrt: fid = tf.sqrt(fid) batch_scores.append(np.array(fid)) return np.mean(batch_scores), np.std(batch_scores)
def gauss_kl(min_q_mu, q_sq,K): q_mu=-1*min_q_mu #q_sqrt=tf.cholesky(tf.squeeze(q_sqrt)) # K is a variance...we sqrt later ''' N=1 Q=5 q_mu=tf.random_normal([Q,1],dtype=tf.float64) q_var=tf.random_normal([Q,Q],dtype=tf.float64) q_var=q_var+tf.transpose(q_var [1,0])+1e+1*np.eye(Q) K=q_var q_sqrt=tf.cholesky(q_var) q_sqrt=tf.expand_dims(q_sqrt,-1) num_latent=1 s=tf.Session() s.run(tf.initialize_all_variables()) ''' """ Compute the KL divergence from q(x) = N(q_mu, q_sqrt^2) to p(x) = N(0, K) We assume num_latent independent distributions, given by the columns of q_mu and the last dimension of q_sqrt. q_mu is a matrix, each column contains a mean. q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root matrix of the covariance of q. K is a positive definite matrix: the covariance of p. num_latent is an integer: the number of independent distributions (equal to the columns of q_mu and the last dim of q_sqrt). q_sqrt=tf.cholesky(K) L = tf.cholesky(q_sq) alpha = tf.matrix_triangular_solve(L, q_mu, lower=True) KL = 0.5 * tf.reduce_sum(tf.square(alpha)) # Mahalanobis term. KL += 0.5 * tf.reduce_sum( tf.log(tf.square(tf.diag_part(L)))) # Prior log-det term. KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0], tf.float64) Lq = tf.batch_matrix_band_part(q_sqrt, -1, 0) # Log determinant of q covariance: KL += -0.5*tf.reduce_sum(tf.log(tf.square(tf.diag_part(Lq)))) LiLq = tf.matrix_triangular_solve(L, Lq, lower=True) KL += 0.5 * tf.reduce_sum(tf.square(LiLq)) # Trace term """ V2=tf.cholesky(K) V1=tf.cholesky(q_sq) KL=h.Mul(tf.transpose(q_mu),tf.cholesky_solve(V2,q_mu)) KL+=tf.trace(tf.cholesky_solve(V2,q_sq)) KL-=h.get_dim(K,0) KL+=tf.reduce_sum(2*tf.log(tf.diag_part(V2))-2*tf.log(tf.diag_part(V1))) return KL/2
def F_bound2_v2(y,S,Kmm,Knm,Kmnnm,Tr_Knn,sigma): #matrices to be used N=get_dim(y,0) Kmm_chol=tf.cholesky(Kmm) Q_nn=tf.square(sigma)*np.eye(N)+Mul(Knm,tf.cholesky_solve(Kmm_chol,tf.transpose(Knm))) bound=-0.5*(Tr_Knn-tf.trace(tf.cholesky_solve(Kmm_chol,Kmnnm)))/tf.square(sigma) bound+=multivariate_normal(y, tf.zeros([N,1],dtype=tf.float32), tf.cholesky(Q_nn)) return bound
def F2_bound(y,Kmm,Knm,Knn,mu,Sigma): Eye=tf.constant(np.eye(N,N), shape=[N,N],dtype=tf.float32) sigEye=tf.mul(1.0,Eye) print(s.run(tf.matrix_inverse(sigEye))) #sigEye=tf.mul(tf.square(sigma),Eye) Kmn=tf.transpose(Knm) prec=Matrix_Inversion_Lemma(sigEye,Knm,Kmm,Kmn) zeros=tf.constant(np.zeros(N),shape=[N,1],dtype=tf.float32) log_den=log_density(y,zeros,prec) Kmm_inv=tf.matrix_inverse(Kmm) trace_term=tf.trace(Knn-Mul(Knm,Kmm_inv,Kmn))*(0.5) return log_den-trace_term
def F_bound(y,Kmm,Knm,Knn,sigma): #matrices to be used N=get_dim(Knn,0) sig_sq=tf.square(sigma) sigEye=lambda_Eye(sig_sq,N) sigEye_I=lambda_Eye(1/sig_sq,N) zeros=tf.constant(np.zeros(N),shape=[N,1],dtype=tf.float32) Kmn=tf.transpose(Knm) #main calcz prec=Matrix_Inversion_Lemma(sigEye_I,Knm,Kmm,Kmn) log_det_cov=log_det_lemma(tf.log(sig_sq)*N,sigEye_I,Knm,Kmm,Kmn) log_den=log_density(y,zeros,prec,log_det_cov) trace_term=tf.trace(Knn-Mul(Knm,safe_chol(Kmm,Kmn))) return log_den-trace_term
def tree_energy_expval_check(isos_012, H): L = len(isos_012) states = all_states_1site(isos_012) ens = [] Hl = H for l in range(L): en = _energy_expval_env(isos_012[l:], *Hl, states[l + 1:]) ens.append(en / (2**L)) if l < L - 1: Hl = ascend_op_local(*Hl, isos_012[l], tf.transpose(isos_012[l], (0, 2, 1))) H_top = ascend_op_local_top(*Hl, isos_012[-1], tf.transpose(isos_012[-1], (0, 2, 1))) en = tf.trace(H_top) ens.append(en / (2**L)) return tf.convert_to_tensor(ens)
def add_confusion_matrix_summaries_(self, outputs_collector, net_out, data_dict): """ This method defines several monitoring metrics that are derived from the confusion matrix """ labels = tf.reshape(tf.cast(data_dict['label'], tf.int64), [-1]) prediction = tf.reshape(tf.argmax(net_out, -1), [-1]) num_classes = self.classification_param.num_classes conf_mat = tf.contrib.metrics.confusion_matrix(labels, prediction, num_classes) conf_mat = tf.to_float(conf_mat) / float(self.net_param.batch_size) if self.classification_param.num_classes == 2: outputs_collector.add_to_collection( var=conf_mat[1][1], name='true_positives', average_over_devices=True, summary_type='scalar', collection=TF_SUMMARIES) outputs_collector.add_to_collection( var=conf_mat[1][0], name='false_negatives', average_over_devices=True, summary_type='scalar', collection=TF_SUMMARIES) outputs_collector.add_to_collection( var=conf_mat[0][1], name='false_positives', average_over_devices=True, summary_type='scalar', collection=TF_SUMMARIES) outputs_collector.add_to_collection( var=conf_mat[0][0], name='true_negatives', average_over_devices=True, summary_type='scalar', collection=TF_SUMMARIES) else: outputs_collector.add_to_collection( var=conf_mat[tf.newaxis, :, :, tf.newaxis], name='confusion_matrix', average_over_devices=True, summary_type='image', collection=TF_SUMMARIES) outputs_collector.add_to_collection( var=tf.trace(conf_mat), name='accuracy', average_over_devices=True, summary_type='scalar', collection=TF_SUMMARIES)
def getSparcityPrior(inputX, C_init=None, lambda1=0.01, lambda2=10000, optimizer='Adam', epochs=10000, learning_rate=0.1, print_step=50): tf.reset_default_graph() n_feat, n_sample = inputX.shape X = tf.placeholder(dtype=tf.float32, shape=[n_feat, n_sample], name='X') if C_init is None: C = tf.Variable(tf.random_uniform([n_sample, n_sample], -1, 1), name='C') else: C = tf.Variable(C_init, name='C') loss = X - tf.matmul(X, C) loss = tf.reduce_mean(tf.square(loss)) # Create sparseness in C reg_lossC = tf.reduce_mean(abs(C)) # L1 loss for C # Force the entries in the diagonal of C to be zero reg_lossD = tf.trace(tf.square(C))/n_sample cost = loss + lambda1 * reg_lossC + lambda2 * reg_lossD optimizer = optimize(cost, learning_rate, optimizer) saver = tf.train.Saver() # Optimizing the function with tf.Session() as sess: sess.run(tf.initialize_all_variables()) print("Calculating C ...") for i in xrange(1, epochs+1): sess.run(optimizer, feed_dict={X: inputX}) loss = sess.run(cost, feed_dict={X: inputX}) if i % print_step == 0: print('epoch {0}: global loss = {1}'.format(i, loss)) if i % 50 == 0: save_path = saver.save(sess, "./model_C_"+str(i)+".ckpt") print("Model saved in file: %s" % save_path) C_val = sess.run(C) return C_val
def Bound2(phi_0,phi_1,phi_2,sigma_noise,K_mm,mean_y): # Preliminary Bound beta=1/tf.square(sigma_noise) bound=0 N=h.get_dim(mean_y,0) M=h.get_dim(K_mm,0) W_inv_part=beta*phi_2+K_mm global phi_200 phi_200=tf.matrix_solve(W_inv_part,tf.transpose(phi_1)) W=beta*np.eye(N)-tf.square(beta)*h.Mul(phi_1,tf.matrix_solve(W_inv_part,tf.transpose(phi_1))) # Computations bound+=N*tf.log(beta) bound+=h.log_det(K_mm+1e-3*np.eye(M)) bound-=h.Mul(tf.transpose(mean_y),W,mean_y) global matrix_determinant matrix_determinant=tf.ones(1) #h.log_det(W_inv_part+1e2*np.eye(M))#-1e-40*tf.exp(h.log_det(W_inv_part)) bound-=h.log_det(W_inv_part+1e-3*tf.reduce_mean(W_inv_part)*np.eye(M)) bound-=beta*phi_0 bound+=beta*tf.trace(tf.cholesky_solve(tf.cholesky(K_mm),phi_2)) bound=bound*0.5 return bound
def prepare_states(rhos, property_name="purity", n_qumodes=1, cutoff=3, n_iters_min=1000, n_iters_max=3000, n_layers=20, property_reg=10, lambda_reg=0, lr=2e-3): if property_name == "purity": property_fct = purity_fct property_mse = purity_mse elif property_name == "entropy": property_fct = entropy_fct property_mse = entropy_mse size_system = n_qumodes * 2 size_hilbert = cutoff**n_qumodes # ================= Placeholders ================= rho_input = tf.placeholder(tf.complex64, [size_hilbert, size_hilbert]) lr_placeholder = tf.placeholder(tf.float32) # ================= Parameters =================== passive_std = 0.1 active_std = 0.001 # squeeze gate sq_r = tf.Variable( tf.random_normal(shape=[n_layers, size_system], stddev=active_std)) sq_phi = tf.Variable( tf.random_normal(shape=[n_layers, size_system], stddev=passive_std)) # displacement gate d_r = tf.Variable( tf.random_normal(shape=[n_layers, size_system], stddev=active_std)) d_phi = tf.Variable( tf.random_normal(shape=[n_layers, size_system], stddev=passive_std)) # interferometer inter_theta = tf.Variable( tf.random_normal( shape=[n_layers * 2, int(size_system * (size_system - 1) / 2)], stddev=passive_std)) inter_phi = tf.Variable( tf.random_normal( shape=[n_layers * 2, int(size_system * (size_system - 1) / 2)], stddev=passive_std)) inter_rphi = tf.Variable( tf.random_normal(shape=[n_layers * 2, size_system - 1], stddev=passive_std)) # kerr gate kappa = tf.Variable( tf.random_normal(shape=[n_layers, size_system], stddev=active_std)) parameters = [ sq_r, sq_phi, d_r, d_phi, inter_theta, inter_phi, inter_rphi, kappa ] # ================== Circuit =================== print("Prepare circuit...") engine, q = sf.Engine(n_qumodes * 2) with engine: state_preparation_network(q, n_layers, parameters) state = engine.run('tf', cutoff_dim=cutoff, eval=False, modes=range(n_qumodes)) if n_qumodes == 1: rho_output = state.dm() if n_qumodes == 2: rho_output = tf.reshape(tf.einsum('ijkl->ikjl', state.dm()), (size_hilbert, size_hilbert)) elif n_qumodes > 2: raise ValueError("n_qumodes > 2 not yet supported") property_output = property_fct(rho_output) trace_output = tf.real(tf.trace(rho_output)) # ============== Cost and optimizer ============= print("Prepare cost and optimizer...") reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) cost = trace_distance(rho_output, rho_input) + property_reg * property_mse( rho_output, rho_input) optimiser = tf.train.AdamOptimizer(learning_rate=lr_placeholder) min_cost = optimiser.minimize(cost) # ================== Training ==================== print("Prepare session...") sess = tf.Session() sess.run(tf.global_variables_initializer()) print("trace before: ", sess.run(tf.trace(rho_output))) print("Start training...") params_list = [] trace_distance_list = [] property_mse_list = [] n_iters = n_iters_max for i, rho in enumerate(rhos): print( "\n\n~~~~~~~~~~~~~~~~~~~~~~ Density Matrix {}/{} ~~~~~~~~~~~~~~~~~~~~~~\n" .format(i + 1, len(rhos))) print("Property: {:.7f}\n\n".format(property_fct(rho, "np"))) cost_list = [] property_list = [] if i == 1: n_iters = n_iters_min for j in range(n_iters): _, curr_cost = sess.run([min_cost, cost], feed_dict={ rho_input: rho, lr_placeholder: lr }) curr_property, curr_rho, trace_dm = sess.run( [property_output, rho_output, trace_output]) cost_list.append(curr_cost) property_list.append(curr_property) print( 'Step {}/{} −− Cost: {: .7f} −− Property: {:.7f} −− Trace: {:.7f}' .format(j, n_iters, cost_list[-1], property_list[-1], trace_dm), end="\r") trace_distance_list.append(sess.run(trace_distance(curr_rho, rho))) property_mse_list.append(sess.run(property_mse(curr_rho, rho, "np"))) print("Property MSE:", property_mse_list[-1]) print("Trace distance:", trace_distance_list[-1]) print('\nCost: {: .7f} −− Property: {:.7f}'.format( cost_list[-1], property_list[-1])) params_list.append(sess.run(parameters)) return params_list, trace_distance_list, property_mse_list
def __init__(self, vocab_size, n_hidden, n_topic, n_sample, learning_rate, batch_size, n_householder, non_linearity): self.vocab_size = vocab_size self.n_hidden = n_hidden self.n_topic = n_topic self.n_sample = n_sample self.non_linearity = non_linearity self.learning_rate = learning_rate self.batch_size = batch_size self.n_householder = n_householder self.x = tf.placeholder(tf.float32, [batch_size, vocab_size], name='input') self.mask = tf.placeholder(tf.float32, [None], name='mask') # mask paddings # encoder with tf.variable_scope('encoder'): self.enc_vec = utils.mlp(self.x, [self.n_hidden], self.non_linearity) self.mean = utils.linear(self.enc_vec, self.n_topic, scope='mean') self.logsigm = utils.linear(self.enc_vec, self.n_topic, bias_start_zero=True, matrix_start_zero=True, scope='logsigm') # -------------------- cal the householder matrix ------------------------------- self.tmp_mean = tf.expand_dims( tf.expand_dims(tf.rsqrt(tf.reduce_sum(tf.square(self.mean), 1)), 1) * self.mean, 2) self.tmp_mean_t = tf.transpose(self.tmp_mean, perm=[0, 2, 1]) self.vk = self.tmp_mean self.Hk = tf.expand_dims(tf.eye(self.n_topic), 0) - \ 2 * tf.matmul(self.tmp_mean, self.tmp_mean_t) self.U = self.Hk self.tmp_vk = self.vk self.invalid = [] self.vk_show = tf.constant(-1.0) for k in range(1, self.n_householder + 1): self.tmp_vk = self.vk self.tmp_vk = tf.expand_dims( tf.rsqrt(tf.reduce_sum(tf.square(self.tmp_vk), 1)) * tf.squeeze(self.tmp_vk, [2, 2]), 2) self.vk = tf.matmul(self.Hk, self.vk) self.Hk = tf.expand_dims(tf.eye(self.n_topic), 0) - \ 2 * tf.matmul(self.tmp_vk, tf.transpose(self.tmp_vk, perm=[0, 2, 1])) self.U = tf.matmul(self.U, self.Hk) self.Umean = tf.squeeze(tf.matmul(self.U, self.tmp_mean), [2, 2]) # ------------------------ KL divergence after Householder ------------------------------------- self.kld = -0.5 * (tf.reduce_sum( 1 - tf.square(self.Umean) + 2 * self.logsigm, 1) - \ tf.trace(tf.matmul(tf.transpose(tf.multiply(tf.expand_dims(tf.exp(2 * self.logsigm), 2), tf.transpose(self.U, perm=[0, 2, 1])), perm=[0, 2, 1]), tf.transpose(self.U, perm=[0, 2, 1])))) # kk = tf.trace(tf.matmul(tf.transpose(tf.multiply(tf.expand_dims(tf.exp(2 * self.logsigm), 2), tf.transpose(self.U, perm=[0,2,1])), perm=[0,2,1]), tf.transpose(self.U, perm=[0,2,1]))) self.log_squre = tf.trace(tf.matmul(tf.transpose( tf.multiply(tf.expand_dims(tf.exp(2 * self.logsigm), 2), tf.transpose(self.U, perm=[0, 2, 1])), perm=[0, 2, 1]), tf.transpose(self.U, perm=[0, 2, 1]))) self.mean_squre = tf.reduce_sum(tf.square(self.Umean), 1) self.kld = self.mask * self.kld # mask paddings if self.n_sample == 1: # single sample eps = tf.random_normal((batch_size, self.n_topic), 0, 1) doc_vec = tf.multiply(tf.exp(self.logsigm), eps) + self.mean else: doc_vec_list = [] for i in range(self.n_sample): epsilon = tf.random_normal((self.batch_size, self.n_topic), 0, 1) doc_vec_list.append(self.mean + tf.multiply(epsilon, tf.exp(self.logsigm))) doc_vec = tf.add_n(doc_vec_list) / self.n_sample doc_vec = tf.squeeze(tf.matmul(self.U, tf.expand_dims(doc_vec, 2))) self.theta = tf.nn.softmax(tf.layers.dense(doc_vec, self.n_topic)) with tf.variable_scope('decoder'): topic_vec = tf.get_variable('topic_vec', shape=[self.n_topic, self.n_hidden]) word_vec = tf.get_variable('word_vec', shape=[self.vocab_size, self.n_hidden]) # self.log_lambd = tf.layers.dense(self.enc_vec, 1) # self.lambd = tf.exp(self.log_lambd) + 1e-5 self.lambd = tf.constant(shape=[self.batch_size, 1], value=.5) # n_topic x vocab_size beta = tf.matmul(topic_vec, tf.transpose(word_vec)) logits = tf.nn.log_softmax(tf.matmul(doc_vec, beta)) self.beta = tf.nn.softmax(beta) mean = tf.reduce_mean(self.theta, -1, keep_dims=True) # bs x 1 self.variance = tf.sqrt( tf.reduce_sum(tf.square(self.theta - tf.tile(mean, [1, self.n_topic])), -1) / self.n_topic) self.log_prob = (-self.n_topic - (1 / self.lambd)) * tf.log( tf.reduce_sum(tf.pow(self.theta, -self.lambd), -1, keep_dims=True) + self.n_topic - 1) # self.log_prob = tf.clip_by_value(self.log_prob, -500, np.inf) constant_term = 0.0 for i in range(self.n_topic): constant_term += tf.log(1 + i * self.lambd) self.log_prob += constant_term self.log_prob += 200 self.log_prob = tf.clip_by_value(self.log_prob, 0, np.inf) self.recons_loss = -tf.reduce_sum(tf.multiply(logits, self.x), 1) self.objective = self.recons_loss + self.kld self.loss_func = self.objective + 1*self.log_prob optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) fullvars = tf.trainable_variables() enc_vars = utils.variable_parser(fullvars, 'encoder') dec_vars = utils.variable_parser(fullvars, 'decoder') enc_grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss_func, enc_vars), 5) dec_grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss_func, dec_vars), 5) self.optim_enc = optimizer.apply_gradients(zip(enc_grads, enc_vars)) self.optim_dec = optimizer.apply_gradients(zip(dec_grads, dec_vars))
def compare(self, x): np_ans = np.trace(x, axis1=-2, axis2=-1) with self.test_session(use_gpu=True): tf_ans = tf.trace(x).eval() self.assertAllClose(tf_ans, np_ans)
def restore_helper_power_method(tensors, init=None, precision=1E-12, nmax=100000, pinv=1E-30): """ Helper function for putting InfiniteMPSCentralGauge into central form using TMeigs_power_method Args: init (tf.tensor): initial guess for the eigenvector precision (float): desired precision of the dominant eigenvalue ncv (int): number of Krylov vectors nmax (int): max number of iterations numeig (int): hyperparameter, passed to scipy.sparse.linalg.eigs; number of eigenvectors to be returned by scipy.sparse.linalg.eigs; leave at 6 to avoid problems with arpack pinv (float): pseudoinverse cutoff Returns: As (list of tf.Tensors): the mps matrices mat (tf.Tensor): center matrix connector (tf.Tensor): connector matrix right_mat (tf.Tensor): right boundary matrix """ As = copy.copy(tensors) #[t for t in tensors] #won't compile without this newAs = [] if not np.all(As[0].dtype == t.dtype for t in As): raise TypeError( 'TMeigs_power_method: all As have to have the same dtype') dtype = As[0].dtype if init: x = init else: x = tf.diag(tf.ones(shape=[As[0].shape[0]], dtype=As[0].dtype)) if not As[0].dtype == x.dtype: raise TypeError( 'TMeigs_power_method: `init` has other dtype than `As`') x /= tf.linalg.norm(x) dtype = x.dtype def do_step_left( n, eta, state, diff, ): newstate = transfer_op(As, As, 'l', state) eta = tf.linalg.norm(newstate) newstate /= eta diff = tf.cast(tf.linalg.norm(state - newstate), dtype.real_dtype) return n + 1, eta, newstate, diff def do_step_right( n, eta, state, diff, ): newstate = transfer_op(As, As, 'r', state) eta = tf.linalg.norm(newstate) newstate /= eta diff = tf.cast(tf.linalg.norm(state - newstate), dtype.real_dtype) return n + 1, eta, newstate, diff def stopping_criterion(n, eta, state, diff): return tf.less(tf.cast(precision, dtype.real_dtype), diff) def cond(n, eta, state, diff): return tf.cond( tf.less(0, n), lambda: tf.cond(tf.less(n, nmax), lambda: stopping_criterion( n, eta, state, diff), lambda: False), lambda: True) _, eta, l, _ = tf.while_loop( cond, do_step_left, (0, tf.cast(0.0, dtype), x, tf.cast(1.0, dtype.real_dtype))) _, eta, r, _ = tf.while_loop( cond, do_step_right, (0, tf.cast(0.0, dtype), x, tf.cast(1.0, dtype.real_dtype))) sqrteta = tf.cast(tf.sqrt(tf.real(eta)), dtype) As[0] /= sqrteta l = l / tf.trace(l) l = (l + tf.conj(tf.transpose(l))) / 2.0 eigvals_left, u_left = tf.linalg.eigh(l) eigvals_left /= tf.reduce_sum(eigvals_left, axis=0) abseigvals_left = tf.abs(eigvals_left) mask = tf.greater(abseigvals_left, pinv) eigvals_left = tf.where(mask, eigvals_left, tf.zeros(eigvals_left.shape, dtype=dtype)) inveigvals_left = tf.where(mask, 1.0 / eigvals_left, tf.zeros(eigvals_left.shape, dtype=dtype)) y = ncon([u_left, tf.diag(tf.sqrt(eigvals_left))], [[-2, 1], [1, -1]]) invy = ncon([tf.diag(tf.sqrt(inveigvals_left)), tf.conj(u_left)], [[-2, 1], [-1, 1]]) r = r / tf.trace(r) r = (r + tf.conj(tf.transpose(r))) / 2.0 eigvals_right, u_right = tf.linalg.eigh(r) eigvals_right /= tf.reduce_sum(eigvals_right, axis=0) abseigvals_right = tf.abs(eigvals_right) mask = tf.greater(abseigvals_right, pinv) eigvals_right = tf.where(mask, eigvals_right, tf.zeros(eigvals_right.shape, dtype=dtype)) inveigvals_right = tf.where(mask, 1.0 / eigvals_right, tf.zeros(eigvals_right.shape, dtype=dtype)) x = ncon([u_right, tf.diag(tf.sqrt(eigvals_right))], [[-1, 1], [1, -2]]) invx = ncon([tf.diag(tf.sqrt(inveigvals_right)), tf.conj(u_right)], [[-1, 1], [-2, 1]]) lam, U, V = tf.linalg.svd(ncon([y, x], [[-1, 1], [1, -2]])) lam = tf.cast(lam, dtype) As[0] = ncon( #absorb everything on the left end [tf.diag(lam), tf.conj(V), invx, As[0]], [[-1, 1], [2, 1], [2, 3], [3, -2, -3]]) As[-1] = ncon([As[-1], invy, U], [[-1, -2, 1], [1, 2], [2, -3]]) for n in range(len(As) - 1): tensor, mat, _ = prepare_tensor_QR(As[n], direction=1) As[n] = tensor As[n + 1] = ncon([mat, As[n + 1]], [[-1, 1], [1, -2, -3]]) Z = ncon([As[-1], tf.conj(As[-1])], [[1, 2, 3], [1, 2, 3]]) / tf.cast( As[-1].shape[2], dtype) As[-1] /= tf.sqrt(Z) lam = lam / tf.linalg.norm(lam) mat = tf.diag(lam) connector = tf.diag(1.0 / lam) right_mat = tf.diag(lam) return As, mat, connector, right_mat
def restore_helper(tensors, init=None, precision=1E-12, ncv=50, nmax=100000, numeig=1, pinv=1E-30): """ Helper function for putting InfiniteMPSCentralGauge into central form using TMeigs Args: init (tf.tensor): initial guess for the eigenvector precision (float): desired precision of the dominant eigenvalue ncv (int): number of Krylov vectors nmax (int): max number of iterations numeig (int): hyperparameter, passed to scipy.sparse.linalg.eigs; number of eigenvectors to be returned by scipy.sparse.linalg.eigs; leave at 6 to avoid problems with arpack pinv (float): pseudoinverse cutoff Returns: As (list of tf.Tensors): the mps matrices mat (tf.Tensor): center matrix connector (tf.Tensor): connector matrix right_mat (tf.Tensor): right boundary matrix """ As = copy.copy(tensors) #[t for t in tensors] #won't compile without this if not np.all(As[0].dtype == t.dtype for t in As): raise TypeError( 'TMeigs_power_method: all As have to have the same dtype') dtype = As[0].dtype eta, l = TMeigs(tensors=As, direction='left', init=init, nmax=nmax, precision=precision, ncv=ncv, numeig=numeig) sqrteta = tf.cast(tf.sqrt(tf.real(eta)), dtype) As[0] /= sqrteta l = l / tf.trace(l) l = (l + tf.conj(tf.transpose(l))) / 2.0 eigvals_left, u_left = tf.linalg.eigh(l) eigvals_left /= tf.reduce_sum(eigvals_left, axis=0) abseigvals_left = tf.abs(eigvals_left) mask = tf.greater(abseigvals_left, pinv) eigvals_left = tf.where(mask, eigvals_left, tf.zeros(eigvals_left.shape, dtype=dtype)) inveigvals_left = tf.where(mask, 1.0 / eigvals_left, tf.zeros(eigvals_left.shape, dtype=dtype)) y = ncon([u_left, tf.diag(tf.sqrt(eigvals_left))], [[-2, 1], [1, -1]]) invy = ncon([tf.diag(tf.sqrt(inveigvals_left)), tf.conj(u_left)], [[-2, 1], [-1, 1]]) eta, r = TMeigs(tensors=As, direction='right', init=init, nmax=nmax, precision=precision, ncv=ncv, numeig=numeig) r = r / tf.trace(r) r = (r + tf.conj(tf.transpose(r))) / 2.0 eigvals_right, u_right = tf.linalg.eigh(r) eigvals_right /= tf.reduce_sum(eigvals_right, axis=0) abseigvals_right = tf.abs(eigvals_right) mask = tf.greater(abseigvals_right, pinv) eigvals_right = tf.where(mask, eigvals_right, tf.zeros(eigvals_right.shape, dtype=dtype)) inveigvals_right = tf.where(mask, 1.0 / eigvals_right, tf.zeros(eigvals_right.shape, dtype=dtype)) x = ncon([u_right, tf.diag(tf.sqrt(eigvals_right))], [[-1, 1], [1, -2]]) invx = ncon([tf.diag(tf.sqrt(inveigvals_right)), tf.conj(u_right)], [[-1, 1], [-2, 1]]) lam, U, V = tf.linalg.svd(ncon([y, x], [[-1, 1], [1, -2]])) lam = tf.cast(lam, dtype) As[0] = ncon( #absorb everything on the left end [tf.diag(lam), tf.conj(V), invx, As[0]], [[-1, 1], [2, 1], [2, 3], [3, -2, -3]]) As[-1] = ncon([As[-1], invy, U], [[-1, -2, 1], [1, 2], [2, -3]]) for n in range(len(As) - 1): tensor, mat, _ = prepare_tensor_QR(As[n], direction=1) As[n] = tensor As[n + 1] = ncon([mat, As[n + 1]], [[-1, 1], [1, -2, -3]]) Z = ncon([As[-1], tf.conj(As[-1])], [[1, 2, 3], [1, 2, 3]]) / tf.cast( As[-1].shape[2], dtype) As[-1] /= tf.sqrt(Z) lam = lam / tf.linalg.norm(lam) mat = tf.diag(lam) connector = tf.diag(1.0 / lam) right_mat = tf.diag(lam) return As, mat, connector, right_mat
mean=h.Mul(K_nm_2,tf.matrix_solve(K_mm_1,mu)) variance=K_nn_2-h.Mul(K_nm_2,h.safe_chol(K_mm_2,tf.transpose(K_nm_2))) var_terms=2*tf.sqrt(tf.reshape(tf.diag_part(variance)+tf.square(sigma_2),[N,1])) return mean, var_terms # layer 1 X_m_1=tf.Variable(tf.random_uniform([M,1],minval=0,maxval=15),name='X_m',dtype=tf.float32) sigma_1=tf.Variable(tf.ones([1,1]),dtype=tf.float32,name='sigma') noise_1=tf.Variable(tf.ones([1,1]),dtype=tf.float32,name='sigma') len_sc_1=tf.square(tf.Variable(tf.ones([1,1]),dtype=tf.float32))+0.3 K_nm_1=h.tf_SE_K(Xtr,X_m_1,len_sc_1,noise_1) K_mm_1=h.tf_SE_K(X_m_1,X_m_1,len_sc_1,noise_1)+h.tol*np.eye(M,M) K_nn_1=h.tf_SE_K(Xtr,Xtr,len_sc_1,noise_1) K_mn_1=h.tf.transpose(K_nm_1) K_mnnm_1=h.Mul(K_mn_1,K_nm_1) Tr_Knn_1=tf.trace(K_nn_1) #mean1,var1=predict(K_mn_1,sigma_1,K_mm_1,K_nn_1) # layer 2 h_mu=tf.Variable(Ytr) h_S_std=tf.Variable(np.ones((N,1)),dtype=tf.float32) h_S=tf.square(h_S_std) X_m_2=tf.Variable(tf.random_uniform([M,1],minval=-20,maxval=20),dtype=tf.float32) sigma_2=tf.Variable(tf.ones([1,1]),dtype=tf.float32) noise_2=tf.Variable(tf.ones([1,1]),dtype=tf.float32) len_sc_2=tf.square(tf.Variable(tf.ones([1,1]),dtype=tf.float32))+0.3 ''' K_nm_2=h.tf_SE_K(h_mu,X_m_2,len_sc_2,noise_2) K_mm_2=h.tf_SE_K(X_m_2,X_m_2,len_sc_2,noise_2)+h.tol*np.eye(M,M) K_nn_2=h.tf_SE_K(h_mu,h_mu,len_sc_2,noise_2)
def uncertain_conditional(Xnew_mu, Xnew_var, feat, kern, q_mu, q_sqrt, *, mean_function=None, full_output_cov=False, full_cov=False, white=False): """ Calculates the conditional for uncertain inputs Xnew, p(Xnew) = N(Xnew_mu, Xnew_var). See ``conditional`` documentation for further reference. :param Xnew_mu: mean of the inputs, size N x Din :param Xnew_var: covariance matrix of the inputs, size N x Din x Din :param feat: gpflow.InducingFeature object, only InducingPoints is supported :param kern: gpflow kernel or ekernel object. :param q_mu: mean inducing points, size M x Dout :param q_sqrt: cholesky of the covariance matrix of the inducing points, size Dout x M x M :param full_output_cov: boolean wheter to compute covariance between output dimension. Influences the shape of return value ``fvar``. Default is False :param white: boolean whether to use whitened representation. Default is False. :return fmean, fvar: mean and covariance of the conditional, size ``fmean`` is N x Dout, size ``fvar`` depends on ``full_output_cov``: if True ``f_var`` is N x Dout x Dout, if False then ``f_var`` is N x Dout """ # TODO(VD): Tensorflow 1.7 doesn't support broadcasting in``tf.matmul`` and # ``tf.matrix_triangular_solve``. This is reported in issue 216. # As a temporary workaround, we are using ``tf.einsum`` for the matrix # multiplications and tiling in the triangular solves. # The code that should be used once the bug is resolved is added in comments. if not isinstance(feat, InducingPoints): raise NotImplementedError if full_cov: # TODO(VD): ``full_cov`` True would return a ``fvar`` of shape N x N x D x D, # encoding the covariance between input datapoints as well. # This is not implemented as this feature is only used for plotting purposes. raise NotImplementedError pXnew = Gaussian(Xnew_mu, Xnew_var) num_data = tf.shape(Xnew_mu)[0] # number of new inputs (N) num_ind = tf.shape(q_mu)[0] # number of inducing points (M) num_func = tf.shape(q_mu)[1] # output dimension (D) q_sqrt_r = tf.matrix_band_part(q_sqrt, -1, 0) # D x M x M eKuf = tf.transpose(expectation(pXnew, (kern, feat))) # M x N (psi1) Kuu = feat.Kuu(kern, jitter=settings.numerics.jitter_level) # M x M Luu = tf.cholesky(Kuu) # M x M if not white: q_mu = tf.matrix_triangular_solve(Luu, q_mu, lower=True) Luu_tiled = tf.tile(Luu[None, :, :], [num_func, 1, 1]) # remove line once issue 216 is fixed q_sqrt_r = tf.matrix_triangular_solve(Luu_tiled, q_sqrt_r, lower=True) Li_eKuf = tf.matrix_triangular_solve(Luu, eKuf, lower=True) # M x N fmean = tf.matmul(Li_eKuf, q_mu, transpose_a=True) eKff = expectation(pXnew, kern) # N (psi0) eKuffu = expectation(pXnew, (kern, feat), (kern, feat)) # N x M x M (psi2) Luu_tiled = tf.tile(Luu[None, :, :], [num_data, 1, 1]) # remove this line, once issue 216 is fixed Li_eKuffu = tf.matrix_triangular_solve(Luu_tiled, eKuffu, lower=True) Li_eKuffu_Lit = tf.matrix_triangular_solve(Luu_tiled, tf.matrix_transpose(Li_eKuffu), lower=True) # N x M x M cov = tf.matmul(q_sqrt_r, q_sqrt_r, transpose_b=True) # D x M x M if mean_function is None or isinstance(mean_function, mean_functions.Zero): e_related_to_mean = tf.zeros((num_data, num_func, num_func), dtype=settings.float_type) else: # Update mean: \mu(x) + m(x) fmean = fmean + expectation(pXnew, mean_function) # Calculate: m(x) m(x)^T + m(x) \mu(x)^T + \mu(x) m(x)^T, # where m(x) is the mean_function and \mu(x) is fmean e_mean_mean = expectation(pXnew, mean_function, mean_function) # N x D x D Lit_q_mu = tf.matrix_triangular_solve(Luu, q_mu, adjoint=True) e_mean_Kuf = expectation(pXnew, mean_function, (kern, feat)) # N x D x M # einsum isn't able to infer the rank of e_mean_Kuf, hence we explicitly set the rank of the tensor: e_mean_Kuf = tf.reshape(e_mean_Kuf, [num_data, num_func, num_ind]) e_fmean_mean = tf.einsum("nqm,mz->nqz", e_mean_Kuf, Lit_q_mu) # N x D x D e_related_to_mean = e_fmean_mean + tf.matrix_transpose(e_fmean_mean) + e_mean_mean if full_output_cov: fvar = ( tf.matrix_diag(tf.tile((eKff - tf.trace(Li_eKuffu_Lit))[:, None], [1, num_func])) + tf.matrix_diag(tf.einsum("nij,dji->nd", Li_eKuffu_Lit, cov)) + # tf.matrix_diag(tf.trace(tf.matmul(Li_eKuffu_Lit, cov))) + tf.einsum("ig,nij,jh->ngh", q_mu, Li_eKuffu_Lit, q_mu) - # tf.matmul(q_mu, tf.matmul(Li_eKuffu_Lit, q_mu), transpose_a=True) - fmean[:, :, None] * fmean[:, None, :] + e_related_to_mean ) else: fvar = ( (eKff - tf.trace(Li_eKuffu_Lit))[:, None] + tf.einsum("nij,dji->nd", Li_eKuffu_Lit, cov) + tf.einsum("ig,nij,jg->ng", q_mu, Li_eKuffu_Lit, q_mu) - fmean ** 2 + tf.matrix_diag_part(e_related_to_mean) ) return fmean, fvar
def _build_likelihood(self): """ Construct a tensorflow function to compute the bound on the marginal likelihood. """ Kt = self.dynamic_kern.K(self.T) iKt = tf.matrix_inverse(Kt) # Have to reform how we calculate expectation. # Reparmaterize Mq = tf.matmul(Kt, self.mu_bar_q) # N x Q Sq = tf.matrix_inverse(iKt + tf.matrix_diag( tf.transpose(tf.square(self.lambda_q)))) # Q x N x N qX = TGaussian(Mq, Sq) num_inducing = len(self.feature) # Compute Psi statistics psi0 = tf.reduce_sum(expectation(qX, self.kern)) psi1 = expectation(qX, (self.kern, self.feature)) # N X M psi2 = tf.reduce_sum(expectation(qX, (self.kern, self.feature), (self.kern, self.feature)), axis=0) # M x M Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level) # M x M L = tf.cholesky(Kuu) # K_mm ^ 1/2 sigma2 = self.likelihood.variance sigma = tf.sqrt(sigma2) # Compute intermediate matrices A = tf.matrix_triangular_solve(L, tf.transpose(psi1), lower=True) / sigma tmp = tf.matrix_triangular_solve(L, psi2, lower=True) AAT = tf.matrix_triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2 B = AAT + tf.eye(num_inducing, dtype=settings.float_type) LB = tf.cholesky(B) log_det_B = 2. * tf.reduce_sum(tf.log(tf.matrix_diag_part(LB))) c = tf.matrix_triangular_solve(LB, tf.matmul(A, self.Y), lower=True) / sigma # Compute log marginal Lower Bound # Which is exactly like in standard model # The lower bound only involves data. In essence, we are computing # bound \leq \int q(X)log(Y|X) dX D = tf.cast(tf.shape(self.Y)[1], settings.float_type) ND = tf.cast(tf.size(self.Y), settings.float_type) bound = -0.5 * ND * tf.log(2 * np.pi * sigma2) bound += -0.5 * D * log_det_B bound += -0.5 * tf.reduce_sum(tf.square(self.Y)) / sigma2 bound += 0.5 * tf.reduce_sum(tf.square(c)) bound += -0.5 * D * (tf.reduce_sum(psi0) / sigma2 - tf.reduce_sum(tf.matrix_diag_part(AAT))) # KL[q(x) || p(x|t)] # Only this term involves the dynamical prior log_det_Kt = tf.log(tf.matrix_determinant(Kt)) log_det_Sq = tf.log(tf.matrix_determinant(Sq)) NQ = tf.cast(tf.size(Mq), settings.float_type) KL = self.num_latent * log_det_Kt + tf.reduce_sum(log_det_Sq) - NQ for i in range(self.num_latent): KL += tf.trace( tf.matmul(iKt, Sq[i, :, :]) + tf.matmul( iKt, tf.matmul(tf.expand_dims(Mq[:, i], 1), tf.expand_dims(Mq[:, i], 0)))) KL = 0.5 * KL return bound - KL
def __init__(self, M, Lr, Lc, Odata, Otraining, Otest, order_chebyshev_col = 5, order_chebyshev_row = 5, num_iterations = 10, gamma=1.0, learning_rate=1e-4, idx_gpu = '/gpu:2'): #order of the spectral filters self.ord_col = order_chebyshev_col self.ord_row = order_chebyshev_row self.num_iterations = num_iterations self.n_conv_feat = 32 with tf.Graph().as_default() as g: tf.logging.set_verbosity(tf.logging.ERROR) self.graph = g tf.set_random_seed(0) with tf.device(idx_gpu): #loading of the laplacians self.Lr = tf.constant(Lr.astype('float32')) self.Lc = tf.constant(Lc.astype('float32')) self.norm_Lr = self.Lr - tf.diag(tf.ones([Lr.shape[0], ])) self.norm_Lc = self.Lc - tf.diag(tf.ones([Lc.shape[0], ])) #compute all chebyshev polynomials a priori self.list_row_cheb_pol = list() self.compute_cheb_polynomials(self.norm_Lr, self.ord_row, self.list_row_cheb_pol) self.list_col_cheb_pol = list() self.compute_cheb_polynomials(self.norm_Lc, self.ord_col, self.list_col_cheb_pol) #definition of constant matrices self.M = tf.constant(M, dtype=tf.float32) self.Odata = tf.constant(Odata, dtype=tf.float32) self.Otraining = tf.constant(Otraining, dtype=tf.float32) #training mask self.Otest = tf.constant(Otest, dtype=tf.float32) #test mask #definition of the NN variables self.W_conv = tf.get_variable("W_conv", shape=[self.ord_col*self.ord_row, self.n_conv_feat], initializer=tf.contrib.layers.xavier_initializer()) self.b_conv = tf.Variable(tf.zeros([self.n_conv_feat,])) #recurrent N parameters self.W_f = tf.get_variable("W_f", shape=[self.n_conv_feat, self.n_conv_feat], initializer=tf.contrib.layers.xavier_initializer()) self.W_i = tf.get_variable("W_i", shape=[self.n_conv_feat, self.n_conv_feat], initializer=tf.contrib.layers.xavier_initializer()) self.W_o = tf.get_variable("W_o", shape=[self.n_conv_feat, self.n_conv_feat], initializer=tf.contrib.layers.xavier_initializer()) self.W_c = tf.get_variable("W_c", shape=[self.n_conv_feat, self.n_conv_feat], initializer=tf.contrib.layers.xavier_initializer()) self.U_f = tf.get_variable("U_f", shape=[self.n_conv_feat, self.n_conv_feat], initializer=tf.contrib.layers.xavier_initializer()) self.U_i = tf.get_variable("U_i", shape=[self.n_conv_feat, self.n_conv_feat], initializer=tf.contrib.layers.xavier_initializer()) self.U_o = tf.get_variable("U_o", shape=[self.n_conv_feat, self.n_conv_feat], initializer=tf.contrib.layers.xavier_initializer()) self.U_c = tf.get_variable("U_c", shape=[self.n_conv_feat, self.n_conv_feat], initializer=tf.contrib.layers.xavier_initializer()) self.b_f = tf.Variable(tf.zeros([self.n_conv_feat,])) self.b_i = tf.Variable(tf.zeros([self.n_conv_feat,])) self.b_o = tf.Variable(tf.zeros([self.n_conv_feat,])) self.b_c = tf.Variable(tf.zeros([self.n_conv_feat,])) #output parameters self.W_out = tf.get_variable("W_out", shape=[self.n_conv_feat,1], initializer=tf.contrib.layers.xavier_initializer()) self.b_out = tf.Variable(tf.zeros([1,1])) #########definition of the NN self.X = tf.multiply(self.M, self.Odata) #we may initialize it at random here self.list_X = list() self.list_X.append(tf.identity(self.X)) self.X = tf.reshape(self.X, [-1,]) #RNN self.h = tf.zeros([M.shape[0]*M.shape[1], self.n_conv_feat]) self.c = tf.zeros([M.shape[0]*M.shape[1], self.n_conv_feat]) for k in range(self.num_iterations): #bidimensional convolution self.x_conv = self.bid_conv(self.W_conv, self.b_conv) #N, N, n_conv_feat self.x_conv = tf.reshape(self.x_conv, [-1, self.n_conv_feat]) self.f = tf.sigmoid(tf.matmul(self.x_conv, self.W_f) + tf.matmul(self.h, self.U_f) + self.b_f) self.i = tf.sigmoid(tf.matmul(self.x_conv, self.W_i) + tf.matmul(self.h, self.U_i) + self.b_i) self.o = tf.sigmoid(tf.matmul(self.x_conv, self.W_o) + tf.matmul(self.h, self.U_o) + self.b_o) self.update_c = tf.sigmoid(tf.matmul(self.x_conv, self.W_c) + tf.matmul(self.h, self.U_c) + self.b_c) self.c = tf.multiply(self.f, self.c) + tf.multiply(self.i, self.update_c) self.h = tf.multiply(self.o, tf.sigmoid(self.c)) #compute update of matrix X self.delta_x = tf.tanh(tf.matmul(self.c, self.W_out) + self.b_out) self.X += tf.squeeze(self.delta_x) self.list_X.append(tf.identity(tf.reshape(self.X, [tf.shape(self.M)[0], tf.shape(self.M)[1]]))) self.X = tf.reshape(self.X, [tf.shape(self.M)[0], tf.shape(self.M)[1]]) #########loss definition #computation of the accuracy term self.norm_X = 1+4*(self.X-tf.reduce_min(self.X))/(tf.reduce_max(self.X-tf.reduce_min(self.X))) frob_tensor = tf.multiply(self.Otraining + self.Odata, self.norm_X - M) self.loss_frob = tf.square(self.frobenius_norm(frob_tensor))/np.sum(Otraining+Odata) #computation of the regularization terms trace_col_tensor = tf.matmul(tf.matmul(self.X, self.Lc), self.X, transpose_b=True) self.loss_trace_col = tf.trace(trace_col_tensor) trace_row_tensor = tf.matmul(tf.matmul(self.X, self.Lr, transpose_a=True), self.X) self.loss_trace_row = tf.trace(trace_row_tensor) #training loss definition self.loss = self.loss_frob + (gamma/2)*(self.loss_trace_col + self.loss_trace_row) #test loss definition self.predictions = tf.multiply(self.Otest, self.norm_X - self.M) self.predictions_error = self.frobenius_norm(self.predictions) #definition of the solver self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss) self.var_grad = tf.gradients(self.loss, tf.trainable_variables()) self.norm_grad = self.frobenius_norm(tf.concat([tf.reshape(g, [-1]) for g in self.var_grad], 0)) # Create a session for running Ops on the Graph. config = tf.ConfigProto(allow_soft_placement = True) config.gpu_options.allow_growth = True self.session = tf.Session(config=config) # Run the Op to initialize the variables. init = tf.initialize_all_variables() self.session.run(init) variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) self.vars = {var.name: var for var in variables}
import tensorflow as tf import numpy as np g = tf.Variable(tf.truncated_normal([10, 10])) h = tf.placeholder(shape=[10, 10], dtype=tf.float32, name='Y') f = tf.trace(g) Sess = tf.Session() initialiser = tf.global_variables_initializer() Sess.run(initialiser) optimiser = tf.train.AdamOptimizer(learning_rate=0.02).minimize(f) for i in range(100): rand_arr = np.random.random((10, 10)) loss, _ = Sess.run([f, optimiser], feed_dict={h: rand_arr}) print(' f ' + str(loss))
def restore_helper(tensors, init=None, precision=1E-12, ncv=50, nmax=100000, numeig=1, pinv=1E-30): """ Helper function for putting InfiniteMPSCentralGauge into central form Parameters: ------------------------------ init: tf.tensor initial guess for the eigenvector precision: float desired precision of the dominant eigenvalue ncv: int number of Krylov vectors nmax: int max number of iterations numeig: int hyperparameter, passed to scipy.sparse.linalg.eigs; number of eigenvectors to be returned by scipy.sparse.linalg.eigs; leave at 6 to avoid problems with arpack pinv: float pseudoinverse cutoff Returns: ---------------------------------- (As,mat,connector,right_mat) As: list of tf.Tensors mat: tf.Tensor connector: tf.Tensor right_mat: tf.Tensor """ As=copy.copy(tensors)#[t for t in tensors] #won't compile without this if not np.all(As[0].dtype==t.dtype for t in As): raise TypeError('TMeigs_power_method: all As have to have the same dtype') dtype=As[0].dtype eta, l = TMeigs(tensors=As, direction='left', init=init, nmax=nmax, precision=precision, ncv=ncv, numeig=numeig) sqrteta = tf.cast(tf.sqrt(tf.real(eta)), dtype) As[0] /= sqrteta l = l / tf.trace(l) l = (l + tf.conj(tf.transpose(l))) / 2.0 eigvals_left, u_left = tf.linalg.eigh(l) eigvals_left /= tf.reduce_sum(eigvals_left,axis=0) abseigvals_left = tf.abs(eigvals_left) mask = tf.greater(abseigvals_left, pinv) eigvals_left = tf.where(mask, eigvals_left, tf.zeros(eigvals_left.shape, dtype=dtype)) inveigvals_left = tf.where( mask, 1.0 / eigvals_left, tf.zeros(eigvals_left.shape, dtype=dtype)) y = ncon([u_left, tf.diag(tf.sqrt(eigvals_left))], [[-2, 1], [1, -1]]) invy = ncon([tf.diag(tf.sqrt(inveigvals_left)), tf.conj(u_left)], [[-2, 1], [-1, 1]]) eta, r = TMeigs(tensors=As, direction='right', init=init, nmax=nmax, precision=precision, ncv=ncv, numeig=numeig) r = r / tf.trace(r) r = (r + tf.conj(tf.transpose(r))) / 2.0 eigvals_right, u_right = tf.linalg.eigh(r) eigvals_right /= tf.reduce_sum(eigvals_right,axis=0) abseigvals_right = tf.abs(eigvals_right) mask = tf.greater(abseigvals_right, pinv) eigvals_right = tf.where( mask, eigvals_right, tf.zeros( eigvals_right.shape, dtype=dtype)) inveigvals_right = tf.where( mask, 1.0 / eigvals_right, tf.zeros(eigvals_right.shape, dtype=dtype)) x = ncon([u_right, tf.diag(tf.sqrt(eigvals_right))], [[-1, 1], [1, -2]]) invx = ncon([tf.diag(tf.sqrt(inveigvals_right)), tf.conj(u_right)], [[-1, 1], [-2, 1]]) lam, U, V = tf.linalg.svd(ncon([y, x], [[-1, 1], [1, -2]])) lam = tf.cast(lam, dtype) As[0] = ncon(#absorb everything on the left end [tf.diag(lam), tf.conj(V), invx, As[0]], [[-1, 1], [2, 1], [2, 3],[3, -2, -3]]) As[-1] = ncon( [As[-1], invy, U], [[-1, -2, 1], [1, 2], [2, -3]]) for n in range(len(As)-1): tensor, mat, _ = prepare_tensor_QR( As[n], direction=1) As[n] = tensor As[n + 1] = ncon( [mat, As[n + 1]], [[-1, 1], [1, -2, -3]]) Z = ncon( [As[-1], tf.conj(As[-1])], [[1, 2, 3], [1, 2, 3]]) / tf.cast(As[-1].shape[2], dtype) As[-1] /= tf.sqrt(Z) lam = lam / tf.linalg.norm(lam) mat = tf.diag(lam) connector = tf.diag(1.0 / lam) right_mat = tf.diag(lam) return As,mat,connector,right_mat
def free_energy_stable(y, x_mean, x_var, x_u, gamma, alpha, beta): """ TODO :param y: :param x_mean: :param x_var: :param x_u: :param gamma: :param alpha: :param beta: :return: """ # Determine number of samples, number of inducing points, and number of latent dimensions. assert np.shape(x_mean) == np.shape( x_var), 'Shape of mean and variance of q(X) must be the same.' [n, q] = np.shape(x_mean) [m, qu] = np.shape(x_u) [ny, d] = np.shape(y) assert q == qu, 'Latent dimensionality of X and inducing input must be the same.' assert n == ny, 'Number of observations in X and Y must be equal.' assert n > m, 'Number of observations must be greater than number of inducing points.' assert q == np.size( gamma), 'ARD weights must be same size as latent dimensionality.' # Calculate covariance matrices and psi-statistics. k_uu = k_ard_rbf_covariance_matrix_naive(input_0=x_u, gamma=gamma, alpha=alpha, beta=beta, include_noise=False, include_jitter=True) # [M x M]. psi_0 = k_ard_rbf_psi_0_naive(num_samples=n, alpha=alpha) # Scalar. psi_1 = k_ard_rbf_psi_1_naive(x_mean=x_mean, x_var=x_var, x_u=x_u, gamma=gamma, alpha=alpha) # [N x M]. psi_2 = k_ard_rbf_psi_2_naive(x_mean=x_mean, x_var=x_var, x_u=x_u, gamma=gamma, alpha=alpha) # [M x M]. # Calculate f_hat term from the evidence lower bound (ELBO). Using stable calculation for f_hat. beta_11 = tf.expand_dims(beta, axis=-1) # [1 x 1]. l_uu = tf.cholesky(k_uu) # [M x M]. l_uu_inv_psi_2 = tf.matrix_triangular_solve(l_uu, psi_2, lower=True) # [M x M]. l_uu_inv_psi_2_inv_transpose = tf.transpose( tf.matrix_triangular_solve(l_uu, tf.transpose(l_uu_inv_psi_2), lower=True)) # [M x M]. a = beta_11 * l_uu_inv_psi_2_inv_transpose + tf.eye( m, dtype=TF_DTYPE) # [M x M]. l_a = tf.cholesky(a) # [M x M]. log_det_l_a = tf.reduce_sum(tf.log(tf.diag_part(l_a))) # Scalar. # [M x N]. l_uu_inv_psi_1_transpose = tf.matrix_triangular_solve(l_uu, tf.transpose(psi_1), lower=True) c = tf.matrix_triangular_solve(l_a, l_uu_inv_psi_1_transpose, lower=True) # [M x N]. c_transpose_c = tf.matmul(c, c, transpose_a=True) # [N x N]. yy_transpose = tf.matmul(y, y, transpose_b=True) # [N x N]. f_hat = 0.5 * n * d * (tf.reduce_sum(tf.log(beta)) - np.log(2.0 * np.pi)) - \ d * log_det_l_a + \ 0.5 * d * tf.reduce_sum(beta * (tf.trace(l_uu_inv_psi_2_inv_transpose) - psi_0)) + \ 0.5 * tf.reduce_sum(tf.square(beta) * tf.trace(tf.matmul(c_transpose_c, yy_transpose))) - \ 0.5 * tf.reduce_sum(beta * tf.trace(yy_transpose)) # # Cool way to evaluate node within graph. # sess = tf.Session() # with sess.as_default(): # assert tf.get_default_session() is sess # print('Stable: {}'.format(f_hat.eval())) return f_hat
def call(self, x): r1 = tf.constant([1e-4]) r2 = tf.constant([1e-4]) eps = tf.constant([1e-12]) o1 = o2 = tf.shape(x)[1] // 2 H1 = T.transpose(x[:, 0:o1]) H2 = T.transpose(x[:, o1:o1 + o2]) one = tf.constant([1.0]) m = tf.shape(H1)[1] m_float = tf.cast(m, 'float') # minus the mean value partition = tf.divide(one, m_float) H1bar = H1 - partition * tf.matmul(H1, tf.ones([m, m])) H2bar = H2 - partition * tf.matmul(H2, tf.ones([m, m])) # calculate the auto-covariance and cross-covariance partition2 = tf.divide(one, (m_float - 1)) SigmaHat12 = partition2 * tf.matmul(H1bar, tf.transpose(H2bar)) SigmaHat11 = partition2 * tf.matmul( H1bar, tf.transpose(H1bar)) + r1 * tf.eye(o1) SigmaHat22 = partition2 * tf.matmul( H2bar, tf.transpose(H2bar)) + r2 * tf.eye(o2) # calculate the root inverse of covariance matrices by using eigen decomposition [D1, V1] = tf.py_func(my_eigen, [SigmaHat11], [tf.float32, tf.float32]) [D2, V2] = tf.py_func(my_eigen, [SigmaHat22], [tf.float32, tf.float32]) # for stability D1_indices = tf.where(D1 > eps) D1_indices = tf.squeeze(D1_indices) V1 = tf.gather(V1, D1_indices) D1 = tf.gather(D1, D1_indices) D2_indices = tf.where(D2 > eps) D2_indices = tf.squeeze(D2_indices) V2 = tf.gather(V2, D2_indices) D2 = tf.gather(D2, D2_indices) pow_value = tf.constant([-0.5]) SigmaHat11RootInv = tf.matmul( tf.matmul(V1, tf.diag(tf.pow(D1, pow_value))), tf.transpose(V1)) SigmaHat22RootInv = tf.matmul( tf.matmul(V2, tf.diag(tf.pow(D2, pow_value))), tf.transpose(V2)) Tval = tf.matmul(tf.matmul(SigmaHat11RootInv, SigmaHat12), SigmaHat22RootInv) if self.use_all_singular_values: # all singular values are used to calculate the correlation corr = tf.trace(T.sqrt(tf.matmul(tf.transpose(Tval), Tval))) else: # just the top outdim_size singular values are used TT = tf.matmul(tf.transpose(Tval), Tval) U, V = tf.self_adjoint_eig(TT) U_sort, _ = tf.nn.top_k(U, self.cca_space_dim) corr = T.sum(T.sqrt(U_sort)) return -corr
def measure_homodyne(self, phi, mode, select=None, **kwargs): """ Measures 'modes' in the basis of quadrature eigenstates (rotated by phi) and updates remaining modes conditioned on this result. After measurement, the states in 'modes' are reset to the vacuum. Args: phi (float): phase angle of quadrature to measure mode (int): which mode to measure. select (float): user-specified measurement value (used instead of random sampling) **kwargs: can be used to pass a session or a feed_dict. Otherwise a temporary session and no feed_dict will be used. Returns: The measured value (or a list of measured values when running in batch mode). """ if not isinstance(mode, int): raise ValueError("Specified modes are not valid.") else: if mode < 0 or mode >= self._num_modes: raise ValueError("Specified modes are not valid.") m_omega_over_hbar = 1 / self._hbar if self._state_is_pure: mode_size = 1 else: mode_size = 2 if self._batched: batch_offset = 1 batch_size = self._batch_size else: batch_offset = 0 batch_size = 1 with self.graph.as_default(): phi = tf.cast(phi, ops.def_type) phi = self._maybe_batch(phi) evaluate_results, session, feed_dict, close_session = ops._check_for_eval( kwargs) if select is not None: meas_result = self._maybe_batch(select) homodyne_sample = tf.cast(meas_result, tf.float64, name="Meas_result") else: # create reduced state on mode to be measured reduced_state = ops.reduced_density_matrix( self._state, mode, self._state_is_pure, self._batched) # rotate to homodyne basis # pylint: disable=invalid-unary-operand-type reduced_state = ops.phase_shifter(-phi, 0, reduced_state, self._cutoff_dim, False, self._batched) # create pdf for homodyne measurement # We use the following quadrature wavefunction for the Fock states: # \psi_n(x) = 1/sqrt[2^n n!](\frac{m \omega}{\pi \hbar})^{1/4} # \exp{-\frac{m \omega}{2\hbar} x^2} H_n(\sqrt{\frac{m \omega}{\pi}} x) # where H_n(x) is the (physicists) nth Hermite polynomial if "max" in kwargs: q_mag = kwargs["max"] else: q_mag = 10 if "num_bins" in kwargs: num_bins = kwargs["num_bins"] else: num_bins = 100000 if "q_tensor" in self._cache: # use cached q_tensor q_tensor = self._cache["q_tensor"] else: q_tensor = tf.constant(np.linspace(-q_mag, q_mag, num_bins)) self._cache["q_tensor"] = q_tensor x = np.sqrt(m_omega_over_hbar) * q_tensor if "hermite_polys" in self._cache: # use cached polynomials hermite_polys = self._cache["hermite_polys"] else: H0 = 0 * x + 1.0 H1 = 2 * x hermite_polys = [H0, H1] Hn = H1 Hn_m1 = H0 for n in range(1, self._cutoff_dim - 1): Hn_p1 = ops.H_n_plus_1(Hn, Hn_m1, n, x) hermite_polys.append(Hn_p1) Hn_m1 = Hn Hn = Hn_p1 self._cache["hermite_polys"] = hermite_polys number_state_indices = [ k for k in product(range(self._cutoff_dim), repeat=2) ] terms = [ 1 / np.sqrt(2**n * factorial(n) * 2**m * factorial(m)) * hermite_polys[n] * hermite_polys[m] for n, m in number_state_indices ] hermite_matrix = tf.scatter_nd( number_state_indices, terms, [self._cutoff_dim, self._cutoff_dim, num_bins]) hermite_terms = tf.multiply( tf.expand_dims(reduced_state, -1), tf.expand_dims(tf.cast(hermite_matrix, ops.def_type), 0)) rho_dist = tf.cast(tf.reduce_sum(hermite_terms, axis=[1, 2]), tf.float64) \ * (m_omega_over_hbar / np.pi) ** 0.5 \ * tf.exp(- x ** 2) \ * (q_tensor[1] - q_tensor[0]) # Delta_q for normalization (only works if the bins are equally spaced) # use tf.multinomial to sample logprobs = tf.log(rho_dist) samples_idx = tf.multinomial(logprobs, 1) homodyne_sample = tf.gather(q_tensor, samples_idx) homodyne_sample = tf.squeeze(homodyne_sample) if evaluate_results: meas_result = homodyne_sample.eval(feed_dict, session) if close_session: session.close() else: meas_result = tf.identity(homodyne_sample, name="Meas_result") # project remaining modes into conditional state if self._num_modes == 1: # in this case, all modes were measured and we we put everything into vacuum self.reset(pure=self._state_is_pure) else: # only some modes were measured: put unmeasured modes in conditional state, while reseting measured modes to vac inf_squeezed_vac = tf.convert_to_tensor( [(-0.5)**(m // 2) * np.sqrt(factorial(m)) / factorial(m // 2) if m % 2 == 0 else 0. for m in range(self._cutoff_dim)], dtype=ops.def_type) if self._batched: inf_squeezed_vac = tf.tile( tf.expand_dims(inf_squeezed_vac, 0), [batch_size, 1]) displacement_size = tf.stack( tf.convert_to_tensor(meas_result * np.sqrt(m_omega_over_hbar / 2))) quad_eigenstate = ops.displacement(displacement_size, 0, inf_squeezed_vac, self._cutoff_dim, True, self._batched) homodyne_eigenstate = ops.phase_shifter( phi, 0, quad_eigenstate, self._cutoff_dim, True, self._batched) conditional_state = ops.conditional_state( self._state, homodyne_eigenstate, mode, self._state_is_pure, batched=self._batched) # normalize if self._state_is_pure: norm = tf.norm(tf.reshape(conditional_state, [batch_size, -1]), axis=1) else: # calculate norm of conditional_state # cheap hack since tensorflow doesn't allow einsum equation for trace: r = conditional_state for _ in range(self._num_modes - 2): r = ops.partial_trace(r, 0, False, self._batched) norm = tf.trace(r) # for broadcasting norm_reshape = [1] * len( conditional_state.shape[batch_offset:]) if self._batched: norm_reshape = [self._batch_size] + norm_reshape normalized_conditional_state = conditional_state / tf.reshape( norm, norm_reshape) # reset measured modes into vacuum meas_mode_vac = self._single_mode_pure_vac if self._state_is_pure else self._single_mode_mixed_vac batch_index = indices[:batch_offset] meas_mode_indices = indices[batch_offset:batch_offset + mode_size] conditional_indices = indices[batch_offset + mode_size:batch_offset + mode_size * self._num_modes] eqn_lhs = batch_index + meas_mode_indices + "," + batch_index + conditional_indices eqn_rhs = '' meas_ctr = 0 cond_ctr = 0 for m in range(self._num_modes): if m == mode: # use measured_indices eqn_rhs += meas_mode_indices[mode_size * meas_ctr:mode_size * (meas_ctr + 1)] meas_ctr += 1 else: # use conditional indices eqn_rhs += conditional_indices[mode_size * cond_ctr:mode_size * (cond_ctr + 1)] cond_ctr += 1 eqn = eqn_lhs + "->" + batch_index + eqn_rhs new_state = tf.einsum(eqn, meas_mode_vac, normalized_conditional_state) self._update_state(new_state) return meas_result
def measure_fock(self, modes, select=None, **kwargs): """ Measures 'modes' in the Fock basis and updates remaining modes conditioned on this result. After measurement, the states in 'modes' are reset to the vacuum. Args: modes (Sequence[int]): which modes to measure (in increasing order). select (Sequence[int]): user-specified measurement value (used instead of random sampling) **kwargs: can be used to pass a session or a feed_dict. Otherwise a temporary session and no feed_dict will be used. Returns: A list with the Fock number measurement results for each mode. """ # allow integer (non-list) arguments # not part of the API, but provided for convenience if isinstance(modes, int): modes = [modes] if isinstance(select, int): select = [select] # convert lists to np arrays if isinstance(modes, list): modes = np.array(modes) if isinstance(select, list): select = np.array(select) # check for valid 'modes' argument if len(modes) == 0 or len(modes) > self._num_modes or len( modes) != len(set(modes)): #pylint: disable=len-as-condition raise ValueError("Specified modes are not valid.") if np.any(modes != sorted(modes)): raise ValueError("'modes' must be sorted in increasing order.") # check for valid 'select' argument if select is not None: if np.any(select == None): #pylint: disable=singleton-comparison raise NotImplementedError( "Post-selection lists must only contain numerical values.") if self._batched: num_meas_modes = len(modes) # in this case, select must either be: # np array of shape (M,), or # np array of shape (B,M) # where B is the batch_size and M is the number of measured modes shape_err = False if len(select.shape) == 1: # non-batched list, must broadcast if select.shape[0] != num_meas_modes: shape_err = True else: select = np.vstack([select] * self._batch_size) elif len(select.shape) == 2: # batch of lists, no need to broadcast if select.shape != (self._batch_size, num_meas_modes): shape_err = True else: shape_err = True if shape_err: raise ValueError( "The shape of 'select' is incompatible with 'modes'.") else: # in this case, select should be a vector if select.shape != modes.shape: raise ValueError( "'select' must be have the same shape as 'modes'") # carry out the operation with self.graph.as_default(): evaluate_results, session, feed_dict, close_session = ops._check_for_eval( kwargs) num_reduced_state_modes = len(modes) reduced_state = self._state if self._state_is_pure: mode_size = 1 else: mode_size = 2 if self._batched: batch_size = self._batch_size batch_offset = 1 else: batch_size = 1 batch_offset = 0 if select is not None: # just use the supplied measurement results meas_result = select else: # compute and sample measurement result if self._state_is_pure and len(modes) == self._num_modes: # in this case, measure directly on the pure state probs = tf.abs(self._state)**2 logprobs = tf.log(probs) sample = tf.multinomial( tf.reshape(logprobs, [batch_size, -1]), 1) sample_tensor = tf.squeeze(sample) else: # otherwise, trace out unmeasured modes and sample using diagonal of reduced state removed_ctr = 0 red_state_is_pure = self._state_is_pure for m in range(self._num_modes): if m not in modes: new_mode_idx = m - removed_ctr reduced_state = ops.partial_trace( reduced_state, new_mode_idx, red_state_is_pure, self._batched) red_state_is_pure = False removed_ctr += 1 # go from bra_A,ket_A,bra_B,ket_B,... -> bra_A,bra_B,ket_A,ket_B,... since this is what diag_part expects # workaround for getting multi-index diagonal since tensorflow doesn't support getting diag of more than one subsystem at once if num_reduced_state_modes > 1: state_indices = np.arange(batch_offset + 2 * num_reduced_state_modes) batch_index = state_indices[:batch_offset] bra_indices = state_indices[batch_offset::2] ket_indices = state_indices[batch_offset + 1::2] transpose_list = np.concatenate( [batch_index, bra_indices, ket_indices]) reduced_state_reshuffled = tf.transpose( reduced_state, transpose_list) else: reduced_state_reshuffled = reduced_state diag_indices = [self._cutoff_dim**num_reduced_state_modes ] * 2 if self._batched: diag_indices = [self._batch_size] + diag_indices diag_tensor = tf.reshape(reduced_state_reshuffled, diag_indices) diag_entries = tf.matrix_diag_part(diag_tensor) # hack so we can use tf.multinomial for sampling logprobs = tf.log(tf.cast(diag_entries, tf.float64)) sample = tf.multinomial( tf.reshape(logprobs, [batch_size, -1]), 1) # sample is a single integer; we need to convert it to the corresponding [n0,n1,n2,...] sample_tensor = tf.squeeze(sample) # sample_val is a single integer for each batch entry; # we need to convert it to the corresponding [n0,n1,n2,...] meas_result = ops.unravel_index(sample_tensor, [self._cutoff_dim] * num_reduced_state_modes) if not self._batched: meas_result = meas_result[ 0] # no batch index, can get rid of first axis # unstack this here because that's how it should be returned meas_result = tf.unstack(meas_result, axis=-1, name="Meas_result") # project remaining modes into conditional state if len(modes) == self._num_modes: # in this case, all modes were measured and we can put everything in vacuum by reseting self.reset(pure=self._state_is_pure) else: # only some modes were measured: put unmeasured modes in conditional state, while reseting measured modes to vac fock_state = tf.one_hot(tf.stack(meas_result, axis=-1), depth=self._cutoff_dim, dtype=ops.def_type) conditional_state = self._state for idx, mode in enumerate(modes): if self._batched: f = fock_state[:, idx] else: f = fock_state[idx] conditional_state = ops.conditional_state( conditional_state, f, mode, self._state_is_pure, batched=self._batched) if self._state_is_pure: norm = tf.norm(tf.reshape(conditional_state, [batch_size, -1]), axis=1) else: # calculate norm of conditional_state # use a cheap hack since tensorflow doesn't allow einsum equation for trace: r = conditional_state for _ in range(self._num_modes - num_reduced_state_modes - 1): r = ops.partial_trace(r, 0, False, self._batched) norm = tf.trace(r) # for broadcasting norm_reshape = [1] * len( conditional_state.shape[batch_offset:]) if self._batched: norm_reshape = [self._batch_size] + norm_reshape normalized_conditional_state = conditional_state / tf.reshape( norm, norm_reshape) # reset measured modes into vacuum single_mode_vac = self._single_mode_pure_vac if self._state_is_pure else self._single_mode_mixed_vac if len(modes) == 1: meas_modes_vac = single_mode_vac else: meas_modes_vac = ops.combine_single_modes( [single_mode_vac] * len(modes), self._batched) batch_index = indices[:batch_offset] meas_mode_indices = indices[batch_offset:batch_offset + mode_size * len(modes)] conditional_indices = indices[batch_offset + mode_size * len(modes):batch_offset + mode_size * self._num_modes] eqn_lhs = batch_index + meas_mode_indices + "," + batch_index + conditional_indices eqn_rhs = '' meas_ctr = 0 cond_ctr = 0 for m in range(self._num_modes): if m in modes: # use measured_indices eqn_rhs += meas_mode_indices[mode_size * meas_ctr:mode_size * (meas_ctr + 1)] meas_ctr += 1 else: # use conditional indices eqn_rhs += conditional_indices[mode_size * cond_ctr:mode_size * (cond_ctr + 1)] cond_ctr += 1 eqn = eqn_lhs + "->" + batch_index + eqn_rhs new_state = tf.einsum(eqn, meas_modes_vac, normalized_conditional_state) self._update_state(new_state) # return measurement result if evaluate_results: _meas = [t.eval(feed_dict, session) for t in meas_result] if close_session: session.close() else: _meas = meas_result return tuple(_meas)
ytt = tf.constant([[0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 0, 1]]) yss = tf.argmax(yss, axis=1) ytt = tf.argmax(ytt, axis=1) ns = BATCH_SIZE nt = BATCH_SIZE N = 0 z = tf.constant(value=0, shape=[BATCH_SIZE, 1], dtype=tf.float32) s = tf.constant(value=1 / ns, shape=[ns, 1]) t = tf.constant(value=1 / nt, shape=[nt, 1]) for c in range(4): es = tf.where(tf.equal(yss, c), s, z) et = tf.where(tf.equal(ytt, c), t, z) e = tf.concat([es, et], 0) N = N + tf.matmul(e, tf.transpose(e)) mmd_con = tf.trace(tf.matmul(tf.matmul(X, N), tf.transpose(X))) c = 1 ns = 6 a = tf.boolean_mask(yss, tf.equal(yss, c)) b = tf.shape(a)[0] s_con = tf.cast(tf.Variable(1 / b, trainable=False), tf.float32) c = tf.reshape(s_con, [1, 1]) multi = tf.tile(input=c, multiples=[ns, 1]) a = tf.Variable(0, trainable=False, name='temp_') with tf.Session() as sess: sess.run(tf.global_variables_initializer()) print(a.eval()) tf.global_variables_initializer().run() print(c.eval()) print(multi.eval())
def traceOp(self, x, dtype, expected_ans, use_gpu=False): with self.test_session(use_gpu=use_gpu): tf_ans = tf.trace(x.astype(dtype)) out = tf_ans.eval() self.assertAllClose(out, expected_ans)
def __init__(self, params_values, n_pair): self.params_values = params_values G1 = np.squeeze(params_values['G1']) G2 = np.squeeze(params_values['G2']) Q0 = params_values['Q0'] Q1 = params_values['Q1'] R0 = params_values['R0'] R1 = params_values['R1'] # R2 = params_values['R2'] n_act = G1.shape[1] # number of active actuators on the DM n_pix = G1.shape[0] # number of pixels in the dark hole n_image = 2 * n_pair + 1 # number of probe images in each control step # define the placeholders for the computation graph u1c = tf.placeholder(tf.float64, shape=(None, n_act)) u2c = tf.placeholder(tf.float64, shape=(None, n_act)) u1p = tf.placeholder(tf.float64, shape=(None, n_image, n_act)) u2p = tf.placeholder(tf.float64, shape=(None, n_image, n_act)) Enp_old = tf.placeholder(tf.complex128, shape=(None, n_pix)) Ip = tf.placeholder(tf.float64, shape=(None, n_image, n_pix)) P_old = tf.placeholder(tf.float64, shape=(None, n_pix, 2, 2)) learning_rate = tf.placeholder(tf.float64, shape=()) learning_rate2 = tf.placeholder(tf.float64, shape=()) # define the optical model as a state space model (SSM) or a neural network # model = SSM(G1, G2, Q0, Q1, R0, R1, R2, n_image) model = SSM(G1, G2, Q0, Q1, R0, R1, n_image) # define the relations of the control/probe inputs, camera images and hidden electric fields Enp_pred = model.transition(Enp_old, u1c, u2c) Qco = model.transition_covariance(Enp_old, u1c, u2c) Enp_est, P_est, H = LSEnet(model, Ip, u1p, u2p) Enp_est2, P_est2, _ = LSEnet(model, Ip, u1p, u2p) # Enp_est, P_est = KFnet(model, Ip, Enp_old, P_old, u1c, u2c, u1p, u2p) _, Ip_pred = model.observation(Enp_est, u1p, u2p) Ip_pred_err = tf.tile(tf.expand_dims(tf.trace(P_est), 1), [1, n_image, 1]) obs_cov = 0. #4 * tf.tensordot(tf.expand_dims(tf.reduce_mean(Ip, -1), -1), tf.ones((1, n_pix), dtype=tf.float64), [-1, 0]) * tf.tile(tf.expand_dims(tf.trace(P_est), 1), [1, n_image, 1]) Rp = model.observation_covariance(Ip, u1p, u2p) # Ip_pred_diff = Ip_pred[:, 1::2, :] - Ip_pred[:, 2::2, :] # Ip_diff = Ip[:, 1::2, :] - Ip[:, 2::2, :] # Rp_diff = Rp[:, 1::2, :] + Rp[:, 2::2, :] # HPHt = tf.matmul(tf.matmul(H, P_est), tf.transpose(H, [0, 1, 3, 2])) # obs_bias = tf.transpose(tf.linalg.diag_part(HPHt), [0, 2, 1]) # evidence lower bound (elbo): cost function for system identification # we need to maximize the elbo for system ID elbo = - tf.reduce_sum((tf.abs(Ip-Ip_pred-Ip_pred_err)**2 + obs_cov) / Rp) - tf.reduce_sum(tf.log(2*np.pi*Rp)) - \ (tf.reduce_sum(tf.abs(Enp_pred-Enp_est)**2 / Qco) + tf.reduce_sum(2 * tf.log(Qco)) - \ tf.reduce_sum(tf.linalg.logdet(P_est)) + tf.reduce_sum(tf.trace(P_est) / Qco)) # elbo = - (tf.reduce_sum(tf.abs(Enp_pred-Enp_est)**2 / Qco) + tf.reduce_sum(2 * tf.log(Qco)) - \ # tf.reduce_sum(tf.linalg.logdet(P_est)) + tf.reduce_sum(tf.trace(P_est) / Qco)) # elbo = - tf.reduce_sum((tf.abs(Ip_diff-Ip_pred_diff)**2 + obs_bias) / Rp_diff) - tf.reduce_sum(tf.log(2*np.pi*Rp)) - \ # (tf.reduce_sum(tf.abs(Enp_pred-Enp_est)**2 / Qco) + tf.reduce_sum(2 * tf.log(Qco)) - \ # tf.reduce_sum(tf.linalg.logdet(P_est)) + tf.reduce_sum(tf.trace(P_est) / Qco)) params_list = model.get_params() # parameters to be identified self.model = model self.Enp_est2 = Enp_est2 self.P_est2 = P_est2 self.Enp_est = Enp_est self.P_est = P_est self.Ip = Ip self.u1p = u1p self.u2p = u2p self.u1c = u1c self.u2c = u2c self.Enp_old = Enp_old self.P_old = P_old self.learning_rate = learning_rate self.learning_rate2 = learning_rate2 self.elbo = elbo # mean squared error (MSE): a metric for checking the system ID results self.MSE = tf.reduce_sum(tf.abs(Ip - Ip_pred)**2) # start identifying/learning the model parameters self.train_Jacobian = tf.train.AdamOptimizer( learning_rate=learning_rate, beta1=0.99, beta2=0.9999, epsilon=1e-08).minimize(-elbo, var_list=params_list[0:4]) # self.train_noise_coef = tf.train.AdamOptimizer(learning_rate=learning_rate2, # beta1=0.99, beta2=0.9999, epsilon=1e-08).minimize(-elbo, var_list=params_list[4::]) self.train_noise_coef = tf.train.AdamOptimizer( learning_rate=learning_rate2, beta1=0.99, beta2=0.9999, epsilon=1e-08).minimize(-elbo, var_list=[params_list[4], params_list[5]]) self.train_group = tf.group(self.train_Jacobian, self.train_noise_coef) self.init = tf.global_variables_initializer()
def opt_tree_energy(isos_012, H, itr, itr_l, verbose=0, graphed=False, decomp_mode="svd_full_iso", decomp_device=None, envsq_dtype=None, ham_shift="auto", callback=None): """Variationally minimize the energy of a binary tree tensor network. Spatial uniformity is assumed: The tree tensor network consists of a single isometric tensor per layer. The Hamiltonian, assumed to be translation invariant, is provided as a single nearest-neighbor term `H`. See for example `get_ham_ising()`, which constructs an appropriate object for the Ising model. The size of the second and third dimensions of the first-layer tensor `isos_012[0]` must match the physical dimension of the Hamiltonian. A number `itr` of variational sweeps are carried out. For each sweep, the tensor specifying each layer is optimized using a linear approximation, with `itr_l` iterations per layer. Args: isos_012: List of tensors specifying the tree tensor network; one tensor for each layer. Assumed to be isometries. H: The local term of the Hamiltonian as an MPO. itr: The number of variational sweeps to perform. itr_l: The number of iterations per layer. Typically, 1 is enough. verbose: Set to >0 to print some status information. graphed: If `True`, build a graph for a complete sweep for best performance. decomp_mode: Which decomposition scheme to use for tensor updates. decomp_device: TensorFlow device on which to perform decompositions. envsq_dtype: Data type to use for the squared environment. Only applicable if `decomp_mode` is `"svd"` or `"eigh"`. ham_shift: Amount by which to shift the energies of the local Hamiltonian term. A small positive value typically improves convergence. callback: A function to be called after each sweep. Takes 7 arguments. Returns: isos_012: The optimized tensors of the tree tensor network. """ with tf.device(decomp_device): H, shift = shift_ham(H, ham_shift) print("Hamiltonian shift:", shift) L = len(isos_012) # Ascend through any trivial layers only once bottom = 0 for l in range(L): shp = isos_012[l].shape if shp[0] == shp[1] * shp[2]: if graphed: H = ascend_op_local_graph(*H, isos_012[l], tf.transpose(isos_012[l], (0, 2, 1))) else: H = ascend_op_local(*H, isos_012[l], tf.transpose(isos_012[l], (0, 2, 1))) bottom = l + 1 else: break t0 = time.time() for j in range(itr): if graphed: states = all_states_1site_graph(isos_012[bottom:]) else: states = all_states_1site(isos_012[bottom:]) states = [None] * bottom + states Hl = H svs = [None] * L tes_sweep = 0.0 tds_sweep = 0.0 for l in range(bottom, L): if verbose > 1: print("Optimizing level {}".format(l)) isos_012[l], s, tes, tds = opt_energy_layer( isos_012[l:], *Hl, states[l + 1:], itr_l, graphed=graphed, decomp_mode=decomp_mode, decomp_device=decomp_device, envsq_dtype=envsq_dtype) svs[l] = s tes_sweep += tes tds_sweep += tds if l < L - 1: if graphed: Hl = ascend_op_local_graph( *Hl, isos_012[l], tf.transpose(isos_012[l], (0, 2, 1))) else: Hl = ascend_op_local(*Hl, isos_012[l], tf.transpose(isos_012[l], (0, 2, 1))) if graphed: H_top = ascend_op_local_top_graph( *Hl, isos_012[-1], tf.transpose(isos_012[-1], (0, 2, 1))) else: H_top = ascend_op_local_top(*Hl, isos_012[-1], tf.transpose(isos_012[-1], (0, 2, 1))) en = tf.trace(H_top) / (2**L) + shift * H_top.shape[0] tes_sweep = tes_sweep / (L + 1 - bottom) tds_sweep = tds_sweep / (L + 1 - bottom) if verbose > 0: minsv = np.min([sv.numpy().min() for sv in svs[bottom:]]) print("sweeps: {}, energy density: {}, min_sv: {}, run-time: {}". format(j, en.numpy().real, minsv, time.time() - t0)) if callback is not None: stop_request = callback(isos_012, svs, j, en, time.time() - t0, tes_sweep, tds_sweep) if stop_request: break return isos_012
def get_1st_loss(H, adj_mini_batch): D = tf.diag(tf.reduce_sum(adj_mini_batch, 1)) L = D - adj_mini_batch ## L is laplation-matriX return 2 * tf.trace(tf.matmul(tf.matmul(tf.transpose(H), L), H))
beta=1/tf.square(sigma) A_I=beta*K_mnnm+K_mm Sig=h.Mul(K_mm,tf.matrix_solve(A_I,K_mm)) mu=beta*h.Mul(K_mm,tf.matrix_solve(A_I,K_mn),Y) return mu,Sig # layer 1 X_m_1=tf.Variable(tf.random_uniform([M,1],minval=0,maxval=100),name='X_m',dtype=tf.float32) sigma_1=tf.Variable(tf.ones([1,1]),dtype=tf.float32,name='sigma') noise_1=tf.Variable(tf.ones([1,1]),dtype=tf.float32,name='sigma') len_sc_1=tf.Variable(tf.ones([1,1]),dtype=tf.float32) K_nm_1=h.tf_SE_K(Xtr,X_m_1,len_sc_1,noise_1) K_mm_1=h.tf_SE_K(X_m_1,X_m_1,len_sc_1,noise_1) K_nn_1=h.tf_SE_K(Xtr,Xtr,len_sc_1,noise_1) K_mn_1=h.tf.transpose(K_nm_1) Tr_Knn_1=tf.trace(K_nn_1) #mean1,var1=predict(K_mn_1,sigma_1,K_mm_1,K_nn_1) # layer 2 h_mu=tf.Variable(np.ones((N,1)),dtype=tf.float32) #h_mu_odd=[tf.slice(h_mu,i) for i in ] h_S_std=tf.Variable(np.ones((N,1)),dtype=tf.float32) h_S=tf.square(h_S_std) X_m_2=tf.Variable(tf.random_uniform([M,1],minval=-5,maxval=5),dtype=tf.float32) sigma_2=tf.Variable(tf.ones([1,1]),dtype=tf.float32) noise_2=tf.Variable(tf.ones([1,1]),dtype=tf.float32) len_sc_2=tf.Variable(tf.ones([1,1]),dtype=tf.float32) Tr_Knn, K_nm_2, K_mnnm_2 = KS.build_psi_stats_rbf(X_m_2,tf.square(noise_2), len_sc_2, h_mu, h_S)
def get_link_loss(H, adj_mini_batch): D = tf.diag(tf.reduce_sum(adj_mini_batch, 1)) L = D - adj_mini_batch return 2 * tf.trace(tf.matmul(tf.matmul( tf.transpose(H), L), H)) #need to change,maybe ,lapalican loss
def restore_helper_power_method(tensors, init=None, precision=1E-12, nmax=100000, pinv=1E-30): """ Helper function for putting InfiniteMPSCentralGauge into central form using the power method Parameters: ------------------------------ init: tf.tensor initial guess for the eigenvector precision: float desired precision of the dominant eigenvalue nmax: int max number of iterations pinv: float pseudoinverse cutoff Returns: ---------------------------------- (As,mat,connector,right_mat) As: list of tf.Tensors mat: tf.Tensor connector: tf.Tensor right_mat: tf.Tensor """ As=copy.copy(tensors)#[t for t in tensors] #won't compile without this newAs=[] if not np.all(As[0].dtype==t.dtype for t in As): raise TypeError('TMeigs_power_method: all As have to have the same dtype') dtype=As[0].dtype if init: x = init else: x = tf.diag(tf.ones(shape=[As[0].shape[0]],dtype=As[0].dtype)) if not As[0].dtype==x.dtype: raise TypeError('TMeigs_power_method: `init` has other dtype than `As`') x/=tf.linalg.norm(x) dtype=x.dtype def do_step_left(n,eta,state,diff,): newstate=transfer_op(As, As, 'l', state) eta=tf.linalg.norm(newstate) newstate/=eta diff=tf.cast(tf.linalg.norm(state-newstate),dtype.real_dtype) return n+1,eta,newstate,diff def do_step_right(n,eta,state,diff,): newstate=transfer_op(As, As, 'r', state) eta=tf.linalg.norm(newstate) newstate/=eta diff=tf.cast(tf.linalg.norm(state-newstate),dtype.real_dtype) return n+1,eta,newstate,diff def stopping_criterion(n,eta,state,diff): return tf.less(tf.cast(precision,dtype.real_dtype),diff) def cond(n,eta,state,diff): return tf.cond(tf.less(0,n),lambda: tf.cond(tf.less(n,nmax),lambda: stopping_criterion(n,eta,state,diff),lambda:False),lambda:True) _,eta,l,_=tf.while_loop(cond,do_step_left,(0,tf.cast(0.0,dtype),x,tf.cast(1.0,dtype.real_dtype))) _,eta,r,_=tf.while_loop(cond,do_step_right,(0,tf.cast(0.0,dtype),x,tf.cast(1.0,dtype.real_dtype))) sqrteta = tf.cast(tf.sqrt(tf.real(eta)), dtype) As[0] /= sqrteta l = l / tf.trace(l) l = (l + tf.conj(tf.transpose(l))) / 2.0 eigvals_left, u_left = tf.linalg.eigh(l) eigvals_left /= tf.reduce_sum(eigvals_left,axis=0) abseigvals_left = tf.abs(eigvals_left) mask = tf.greater(abseigvals_left, pinv) eigvals_left = tf.where(mask, eigvals_left, tf.zeros(eigvals_left.shape, dtype=dtype)) inveigvals_left = tf.where( mask, 1.0 / eigvals_left, tf.zeros(eigvals_left.shape, dtype=dtype)) y = ncon([u_left, tf.diag(tf.sqrt(eigvals_left))], [[-2, 1], [1, -1]]) invy = ncon([tf.diag(tf.sqrt(inveigvals_left)), tf.conj(u_left)], [[-2, 1], [-1, 1]]) r = r / tf.trace(r) r = (r + tf.conj(tf.transpose(r))) / 2.0 eigvals_right, u_right = tf.linalg.eigh(r) eigvals_right /= tf.reduce_sum(eigvals_right,axis=0) abseigvals_right = tf.abs(eigvals_right) mask = tf.greater(abseigvals_right, pinv) eigvals_right = tf.where( mask, eigvals_right, tf.zeros( eigvals_right.shape, dtype=dtype)) inveigvals_right = tf.where( mask, 1.0 / eigvals_right, tf.zeros(eigvals_right.shape, dtype=dtype)) x = ncon([u_right, tf.diag(tf.sqrt(eigvals_right))], [[-1, 1], [1, -2]]) invx = ncon([tf.diag(tf.sqrt(inveigvals_right)), tf.conj(u_right)], [[-1, 1], [-2, 1]]) lam, U, V = tf.linalg.svd(ncon([y, x], [[-1, 1], [1, -2]])) lam = tf.cast(lam, dtype) As[0] = ncon(#absorb everything on the left end [tf.diag(lam), tf.conj(V), invx, As[0]], [[-1, 1], [2, 1], [2, 3],[3, -2, -3]]) As[-1] = ncon( [As[-1], invy, U], [[-1, -2, 1], [1, 2], [2, -3]]) for n in range(len(As)-1): tensor, mat, _ = prepare_tensor_QR( As[n], direction=1) As[n] = tensor As[n + 1] = ncon( [mat, As[n + 1]], [[-1, 1], [1, -2, -3]]) Z = ncon( [As[-1], tf.conj(As[-1])], [[1, 2, 3], [1, 2, 3]]) / tf.cast(As[-1].shape[2], dtype) As[-1] /= tf.sqrt(Z) lam = lam / tf.linalg.norm(lam) mat = tf.diag(lam) connector = tf.diag(1.0 / lam) right_mat = tf.diag(lam) return As,mat,connector,right_mat
def build_OAMP(prob,T,savefile,Mr=4,Nt=4,mu=2,version=0,lr=1e-3,\ maxit=1000,better_wait=100,total_batch=50,batch_size=100,\ union_test=False,savefileCE='',trainOAMP=False,SNR=20,input_holder=None,output=None): layers = [] #layerinfo:(name,xhat_,newvars) H_ = prob.H_ x_ = prob.x_ y_ = prob.y_ sigma2 = prob.sigma2_ sample_size = prob.sample_size_ # precompute some tensorflow constants OneOver2N = tf.constant(float(1) / (2 * Nt), dtype=tf.float32) NormalConstant = tf.constant(float(1) / (2 * np.pi)**0.5, dtype=tf.float32) epsilon = tf.constant(1e-2, dtype=tf.float32) HT_ = tf.transpose(H_, perm=[0, 2, 1]) HHT = tf.matmul(H_, HT_) OneOver_trHTH = tf.reshape(1 / tf.trace(tf.matmul(HT_, H_)), [sample_size, 1, 1]) sigma2Over4N = sigma2 / (4 * Nt) sigma2_I = sigma2 / 2 * tf.eye( 2 * Mr, batch_shape=[sample_size], dtype=tf.float32) I = tf.eye(2 * Nt, batch_shape=[sample_size], dtype=tf.float32) x_hat = tf.zeros_like(x_, dtype=tf.float32) for t in range(T): theta_ = tf.Variable(float(1), dtype=tf.float32, name='theta_' + str(t)) gamma_ = tf.Variable(float(1), dtype=tf.float32, name='gamma_' + str(t)) if version == 1: phi_ = tf.Variable(float(1), dtype=tf.float32, name='phi_' + str(t)) xi_ = tf.Variable(float(0), dtype=tf.float32, name='xi_' + str(t)) p_noise = y_ - tf.matmul(H_, x_hat) v_sqr = (tf.reshape(tf.square(tf.norm(p_noise, axis=(1, 2))), [sample_size, 1, 1]) - Mr * sigma2) * OneOver_trHTH v_sqr = tf.maximum(v_sqr, epsilon) #with tf.device("/cpu:0"): w_hat = v_sqr * tf.matmul(HT_, tf.linalg.inv(v_sqr * HHT + sigma2_I)) w = 2 * Nt / tf.reshape(tf.trace(tf.matmul(w_hat, H_)), [sample_size, 1, 1]) * w_hat r = x_hat + gamma_ * tf.matmul(w, p_noise) C = I - theta_ * tf.matmul(w, H_) tau_sqr = OneOver2N * tf.reshape( tf.trace(tf.matmul(C, tf.transpose(C, perm=[0, 2, 1]))), [sample_size, 1, 1 ]) * v_sqr + tf.square(theta_) * sigma2Over4N * tf.reshape( tf.trace(tf.matmul(w, tf.transpose(w, perm=[0, 2, 1]))), [sample_size, 1, 1]) tau_sqr = tf.maximum(tau_sqr, epsilon) if mu == 2: #{-1,+1} #clipping r = tf.maximum(r, -2. * tf.ones_like(r, dtype=tf.float32)) r = tf.minimum(r, 2. * tf.ones_like(r, dtype=tf.float32)) P0 = tf.exp(-tf.square(-1 - r) / (2 * tau_sqr)) * NormalConstant / tf.sqrt(tau_sqr) P1 = tf.exp(-tf.square(1 - r) / (2 * tau_sqr)) * NormalConstant / tf.sqrt(tau_sqr) x_hat = (P1 - P0) / (P1 + P0) if version == 1: x_hat = phi_ * (x_hat - xi_ * r) #(18) elif mu == 4: #{-3,-1,+1,+3} #clipping r = tf.maximum(r, -4. * tf.ones_like(r, dtype=tf.float32)) r = tf.minimum(r, 4. * tf.ones_like(r, dtype=tf.float32)) # P_3 = tf.minimum(\ # tf.maximum(tf.exp(-tf.square(-3-r)/(2*tau_sqr)) * NormalConstant / tf.sqrt(tau_sqr),\ # -3.e+38*tf.ones_like(r,dtype=tf.float32)),\ # 3.e+38*tf.ones_like(r,dtype=tf.float32)) P_3 = tf.exp(-tf.square(-3 - r) / (2 * tau_sqr)) * NormalConstant / tf.sqrt(tau_sqr) P_1 = tf.exp(-tf.square(-1 - r) / (2 * tau_sqr)) * NormalConstant / tf.sqrt(tau_sqr) P1 = tf.exp(-tf.square(1 - r) / (2 * tau_sqr)) * NormalConstant / tf.sqrt(tau_sqr) P3 = tf.exp(-tf.square(3 - r) / (2 * tau_sqr)) * NormalConstant / tf.sqrt(tau_sqr) x_hat = (-3 * P_3 - P_1 + P1 + 3 * P3) / (P_3 + P_1 + P1 + P3) if version == 1: x_hat = phi_ * (x_hat - xi_ * r) #(18) else: #{-1,+1} r = tf.maximum(r, -8. * tf.ones_like(r, dtype=tf.float32)) r = tf.minimum(r, 8. * tf.ones_like(r, dtype=tf.float32)) P_7 = tf.exp(-tf.square(-7 - r) / (2 * tau_sqr)) * NormalConstant / tf.sqrt(tau_sqr) P_5 = tf.exp(-tf.square(-5 - r) / (2 * tau_sqr)) * NormalConstant / tf.sqrt(tau_sqr) P_3 = tf.exp(-tf.square(-3 - r) / (2 * tau_sqr)) * NormalConstant / tf.sqrt(tau_sqr) P_1 = tf.exp(-tf.square(-1 - r) / (2 * tau_sqr)) * NormalConstant / tf.sqrt(tau_sqr) P1 = tf.exp(-tf.square(1 - r) / (2 * tau_sqr)) * NormalConstant / tf.sqrt(tau_sqr) P3 = tf.exp(-tf.square(3 - r) / (2 * tau_sqr)) * NormalConstant / tf.sqrt(tau_sqr) P5 = tf.exp(-tf.square(5 - r) / (2 * tau_sqr)) * NormalConstant / tf.sqrt(tau_sqr) P7 = tf.exp(-tf.square(7 - r) / (2 * tau_sqr)) * NormalConstant / tf.sqrt(tau_sqr) x_hat = (-7 * P_7 - 5 * P_5 - 3 * P_3 - P_1 + P1 + 3 * P3 + 5 * P5 + 7 * P7) / (P_7 + P_5 + P_3 + P_1 + P1 + P3 + P5 + P7) if version == 1: x_hat = phi_ * (x_hat - xi_ * r) #(18) #layers.append(('OAMP T={0}'.format(t),x_hat,(theta_,gamma_,),P0,P1,tau_sqr,r)) if version == 0: layers.append(('OAMP T={0}'.format(t), x_hat, ( theta_, gamma_, ))) else: layers.append(('OAMP T={0}'.format(t), x_hat, ( theta_, gamma_, phi_, xi_, ))) loss_ = tf.nn.l2_loss(x_hat - x_) lr_ = tf.Variable(lr, name='lr', trainable=False) if tf.trainable_variables() is not None: train = tf.train.AdamOptimizer(lr_).minimize( loss_, var_list=tf.trainable_variables()) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) state = load_trainable_vars(sess, savefile) done = state.get('done', []) log = str(state.get('log', '')) for name, _, var_list in layers: if len(var_list): describe_var_list = 'extending ' + ','.join( [v.name for v in var_list]) else: describe_var_list = 'fine tuning all ' + ','.join( [v.name for v in tf.trainable_variables()]) done = np.append(done, name) print(name + ' ' + describe_var_list) print(log) if union_test: state = load_trainable_vars(sess, savefileCE) log = str(state.get('log', '')) print(log) return sess, x_hat if trainOAMP: #load other = {} try: tv = dict([(str(v.name), v) for v in tf.global_variables()]) for k, d in np.load(savefileCE).items(): if k in tv: print('restoring ' + k) sess.run(tf.assign(tv[k], d)) else: other[k] = d #print('err') log = str(other.get('log', '')) print(log) except IOError: pass loss_history = [] save = {} #for the best model ivl = 1 #y,x,H,sigma2 = prob(sess) #prob是TFGenerator的实例,prob(sess)即运行sess.run( ( self.ygen_,self.xgen_ ) ) yval,xval,Hval,sigma2val = sample_gen_for_OAMP(batch_size*total_batch,SNR,\ sess, input_holder,output, training_flag=False) y, x, H, sigma2 = sample_gen_for_OAMP(batch_size * total_batch, SNR, sess, input_holder, output) for i in range(maxit + 1): # if i%1000 == 0: # yval,xval,Hval,sigma2val = sample_gen_for_OAMP(batch_size*total_batch,SNR,\ # sess, input_holder,output, training_flag=False) # y,x,H,sigma2 = sample_gen_for_OAMP(batch_size*total_batch,SNR, sess, input_holder,output) if i % ivl == 0: #validation:don't use optimizer loss = sess.run(loss_, feed_dict={ prob.y_: yval, prob.x_: xval, prob.H_: Hval, prob.sigma2_: sigma2val, prob.sample_size_: batch_size * total_batch }) #1000 samples and labels # loss = sess.run(loss_,feed_dict={prob.y_:prob.yval, # prob.x_:prob.xval,prob.H_:prob.Hval,prob.sigma2_:prob.sigma2val, # prob.sample_size_:prob.sample_sizeval}) #1000 samples and labels if np.isnan(loss): #print(np.amin(P_3_)) raise RuntimeError('loss is NaN') loss_history = np.append(loss_history, loss) loss_best = loss_history.min() #for the best model if loss == loss_best: for v in tf.trainable_variables(): save[str(v.name)] = sess.run(v) # sys.stdout.write( '\ri={i:<6d} loss={loss:.9f} (best={best:.9f})'.format( i=i, loss=loss, best=loss_best)) sys.stdout.flush() if i % (100 * ivl) == 0: print('') age_of_best = len(loss_history) - loss_history.argmin( ) - 1 # how long ago was the best nmse? if age_of_best * ivl >= better_wait: print('move along') break # if it has not improved on the best answer for quite some time, then move along for m in range(total_batch): #5 batch, batch_size = 1000 sample # sess.run(train,feed_dict={prob.y_:y,prob.x_:x,prob.H_:H, # prob.sigma2_:sigma2,prob.sample_size_:batch_size}) #1000 samples and labels sess.run(train, feed_dict={ prob.y_: y[m * batch_size:(m + 1) * batch_size], prob.x_: x[m * batch_size:(m + 1) * batch_size], prob.H_: H[m * batch_size:(m + 1) * batch_size], prob.sigma2_: sigma2[m * batch_size:(m + 1) * batch_size], prob.sample_size_: batch_size }) #1000 samples and labels #done = np.append(done,name) #for the best model----it's for the strange phenomenon tv = dict([(str(v.name), v) for v in tf.trainable_variables()]) for k, d in save.items(): if k in tv: sess.run(tf.assign(tv[k], d)) print('restoring ' + k + ' = ' + str(d)) # #log = log+'\nloss={loss:.9f} in {i} iterations'.format(loss=loss,i=i) log = log + '\nloss={loss:.9f} in {i} iterations best={best:.9f} in {j} iterations'.format( loss=loss, i=i, best=loss_best, j=loss_history.argmin()) state['done'] = done state['log'] = log save_trainable_vars(sess, savefile, **state) return sess, x_hat