def __init__(self, dim, input_dim=0, kern=None, Z=None, n_ind_pts=100, mean_fn=None, Q_diag=None, Umu=None, Ucov_chol=None, jitter=gps.numerics.jitter_level, name=None): super().__init__(name=name) self.OBSERVATIONS_AS_INPUT = False self.dim = dim self.input_dim = input_dim self.jitter = jitter self.Q_sqrt = Param(np.ones(self.dim) if Q_diag is None else Q_diag ** 0.5, transform=gtf.positive) self.n_ind_pts = n_ind_pts if Z is None else (Z[0].shape[-2] if isinstance(Z, list) else Z.shape[-2]) if isinstance(Z, np.ndarray) and Z.ndim == 2: self.Z = mf.SharedIndependentMof(gp.features.InducingPoints(Z)) else: Z_list = [np.random.randn(self.n_ind_pts, self.dim + self.input_dim) for _ in range(self.dim)] if Z is None else [z for z in Z] self.Z = mf.SeparateIndependentMof([gp.features.InducingPoints(z) for z in Z_list]) if isinstance(kern, gp.kernels.Kernel): self.kern = mk.SharedIndependentMok(kern, self.dim) else: kern_list = kern or [gp.kernels.Matern32(self.dim + self.input_dim, ARD=True) for _ in range(self.dim)] self.kern = mk.SeparateIndependentMok(kern_list) self.mean_fn = mean_fn or mean_fns.Identity(self.dim) self.Umu = Param(np.zeros((self.dim, self.n_ind_pts)) if Umu is None else Umu) # Lm^-1(Umu - m(Z)) transform = gtf.LowerTriangular(self.n_ind_pts, num_matrices=self.dim, squeeze=False) self.Ucov_chol = Param(np.tile(np.eye(self.n_ind_pts)[None, ...], [self.dim, 1, 1]) if Ucov_chol is None else Ucov_chol, transform=transform) # Lm^-1(Ucov_chol) self._Kzz = None
def test_mixed_mok_with_Id_vs_independent_mok(session_tf): data = DataMixedKernelWithEye # Independent model k1 = mk.SharedIndependentMok(RBF(data.D, variance=0.5, lengthscales=1.2), data.L) f1 = InducingPoints(data.X[:data.M, ...].copy()) m1 = SVGP(data.X, data.Y, k1, Gaussian(), f1, q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full) m1.set_trainable(False) m1.q_sqrt.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m1, maxiter=data.MAXITER) # Mixed Model kern_list = [ RBF(data.D, variance=0.5, lengthscales=1.2) for _ in range(data.L) ] k2 = mk.SeparateMixedMok(kern_list, data.W) f2 = InducingPoints(data.X[:data.M, ...].copy()) m2 = SVGP(data.X, data.Y, k2, Gaussian(), f2, q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full) m2.set_trainable(False) m2.q_sqrt.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m2, maxiter=data.MAXITER) check_equality_predictions(session_tf, [m1, m2])
def main(): X = np.loadtxt("../data/neur.X.txt") Y = np.loadtxt("../data/neur.Y.txt") gpflow.reset_default_graph_and_session() name = 'test' minibatch_size = 500 W1_init = normalize(np.random.random(size=(C, K1))) W2_init = normalize(np.random.random(size=(G, K2))) with gpflow.defer_build(): kernel = mk.SharedIndependentMok( gpflow.kernels.RBF(1, active_dims=[0]), K1 * K2) Z = np.linspace(0, 1, T)[:, None].astype(np.float64) feature = gpflow.features.InducingPoints(Z) feature = mf.SharedIndependentMof(feature) model = SplitGPM(X, Y, np.log(W1_init + 1e-5), np.log(W2_init + 1e-5), kernel, gpflow.likelihoods.Gaussian(), feat=feature, minibatch_size=minibatch_size, name=name) model.compile() model.W1.set_trainable(True) # learn cell assignments model.W2.set_trainable(True) # learn gene assignments model.feature.set_trainable(True) # move inducing points model.kern.set_trainable(True) # learn kernel parameters model.likelihood.set_trainable(True) # lear likelihood parameters adam = gpflow.train.AdamOptimizer(0.005) adam.minimize(model, maxiter=10000) save_model(model)
def train(self, verbose=True, maxiter=1000): with gpflow.settings.temp_settings(self.gpflow_config): # Default parameters if self.kern is None: self.kern = gpflow.kernels.SquaredExponential( input_dim=self.n_in_dims, variance=self.dtype(0.2), lengthscales=self.dtype(1.0)) if self.lh is None: self.lh = gpflow.likelihoods.Gaussian( variance=self.dtype(0.02)) if self.feature is None: self.feature = gpf.features.InducingPoints(self.Z) if self.multi_output: self.kern = mok.SharedIndependentMok( self.kern, output_dimensionality=self.n_out_dims) self.feature = mof.SharedIndependentMof(self.feature) self.m = gpflow.models.SVGP(self.X, self.Y, self.kern, likelihood=self.lh, feat=self.feature, mean_function=self.mf, minibatch_size=self.batch_size) opt = gpflow.train.tensorflow_optimizer.AdamOptimizer( learning_rate=0.1, beta1=0.9, beta2=0.999, epsilon=1e-8) opt.minimize(self.m, maxiter=maxiter) if verbose: pd.set_option('display.max_rows', 20) pd.set_option('display.max_columns', 10) print(self.m.as_pandas_table()) print('Log likelihood: ', self.m.compute_log_likelihood())
def test_separate_independent_mof(session_tf): """ Same test as above but we use different (i.e. separate) inducing features for each of the output dimensions. """ np.random.seed(0) # Model 1 (INefficient) q_mu_1 = np.random.randn(Data.M * Data.P, 1) q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P, Data.M * Data.P))[None, ...] # 1 x MP x MP kernel_1 = mk.SharedIndependentMok( RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P) feature_1 = InducingPoints(Data.X[:Data.M, ...].copy()) m1 = SVGP(Data.X, Data.Y, kernel_1, Gaussian(), feature_1, q_mu=q_mu_1, q_sqrt=q_sqrt_1) m1.set_trainable(False) m1.q_sqrt.set_trainable(True) m1.q_mu.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m1, maxiter=Data.MAXITER) # Model 2 (efficient) q_mu_2 = np.random.randn(Data.M, Data.P) q_sqrt_2 = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P) ]) # P x M x M kernel_2 = mk.SharedIndependentMok( RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P) feat_list_2 = [ InducingPoints(Data.X[:Data.M, ...].copy()) for _ in range(Data.P) ] feature_2 = mf.SeparateIndependentMof(feat_list_2) m2 = SVGP(Data.X, Data.Y, kernel_2, Gaussian(), feature_2, q_mu=q_mu_2, q_sqrt=q_sqrt_2) m2.set_trainable(False) m2.q_sqrt.set_trainable(True) m2.q_mu.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m2, maxiter=Data.MAXITER) # Model 3 (Inefficient): an idenitical feature is used P times, # and treated as a separate feature. q_mu_3 = np.random.randn(Data.M, Data.P) q_sqrt_3 = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P) ]) # P x M x M kern_list = [ RBF(Data.D, variance=0.5, lengthscales=1.2) for _ in range(Data.P) ] kernel_3 = mk.SeparateIndependentMok(kern_list) feat_list_3 = [ InducingPoints(Data.X[:Data.M, ...].copy()) for _ in range(Data.P) ] feature_3 = mf.SeparateIndependentMof(feat_list_3) m3 = SVGP(Data.X, Data.Y, kernel_3, Gaussian(), feature_3, q_mu=q_mu_3, q_sqrt=q_sqrt_3) m3.set_trainable(False) m3.q_sqrt.set_trainable(True) m3.q_mu.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m3, maxiter=Data.MAXITER) check_equality_predictions(session_tf, [m1, m2, m3])
def test_shared_independent_mok(session_tf): """ In this test we use the same kernel and the same inducing features for each of the outputs. The outputs are considered to be uncorrelated. This is how GPflow handled multiple outputs before the multioutput framework was added. We compare three models here: 1) an ineffient one, where we use a SharedIndepedentMok with InducingPoints. This combination will uses a Kff of size N x P x N x P, Kfu if size N x P x M x P which is extremely inefficient as most of the elements are zero. 2) efficient: SharedIndependentMok and SharedIndependentMof This combinations uses the most efficient form of matrices 3) the old way, efficient way: using Kernel and InducingPoints Model 2) and 3) follow more or less the same code path. """ # Model 1 q_mu_1 = np.random.randn(Data.M * Data.P, 1) # MP x 1 q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P, Data.M * Data.P))[None, ...] # 1 x MP x MP kernel_1 = mk.SharedIndependentMok( RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P) feature_1 = InducingPoints(Data.X[:Data.M, ...].copy()) m1 = SVGP(Data.X, Data.Y, kernel_1, Gaussian(), feature_1, q_mu=q_mu_1, q_sqrt=q_sqrt_1) m1.set_trainable(False) m1.q_sqrt.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m1, maxiter=Data.MAXITER) # Model 2 q_mu_2 = np.reshape(q_mu_1, [Data.M, Data.P]) # M x P q_sqrt_2 = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P) ]) # P x M x M kernel_2 = RBF(Data.D, variance=0.5, lengthscales=1.2) feature_2 = InducingPoints(Data.X[:Data.M, ...].copy()) m2 = SVGP(Data.X, Data.Y, kernel_2, Gaussian(), feature_2, q_mu=q_mu_2, q_sqrt=q_sqrt_2) m2.set_trainable(False) m2.q_sqrt.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m2, maxiter=Data.MAXITER) # Model 3 q_mu_3 = np.reshape(q_mu_1, [Data.M, Data.P]) # M x P q_sqrt_3 = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P) ]) # P x M x M kernel_3 = mk.SharedIndependentMok( RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P) feature_3 = mf.SharedIndependentMof( InducingPoints(Data.X[:Data.M, ...].copy())) m3 = SVGP(Data.X, Data.Y, kernel_3, Gaussian(), feature_3, q_mu=q_mu_3, q_sqrt=q_sqrt_3) m3.set_trainable(False) m3.q_sqrt.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m3, maxiter=Data.MAXITER) check_equality_predictions(session_tf, [m1, m2, m3])
def __init__(self, args, is_training=True): # Store the arguments self.args = args # args.rnn_size contains the dimension of the hidden state of the LSTM # TODO: (resolve) Do we need to use a fixed seq_length? # Input data contains sequence of (x,y) points self.input_data = tf.placeholder(tf.float32, [ args.batch_size, args.target_image_size[0], args.target_image_size[1], 1 ]) self.pred_data = tf.placeholder( tf.float32, [None, args.target_image_size[0], args.target_image_size[1], 1]) # target data contains sequences of (x,y) points as well self.target_data = tf.placeholder(tf.float32, [args.batch_size, 7]) self.trans_target, self.rot_target = tf.split(self.target_data, [3, 4], axis=1) if args.is_normalization: with tf.variable_scope('param'): self.norm_mean = tf.Variable(args.norm_mean, dtype=tf.float32, trainable=False, name="norm_mean") self.norm_std = tf.Variable(args.norm_std, dtype=tf.float32, trainable=False, name="norm_std") target_trans, target_rot = tf.split(self.target_data, [3, 4], axis=1) target_trans_centered = target_trans - tf.tile( tf.reshape(self.norm_mean, [1, 3]), [tf.shape(target_trans)[0], 1]) target_trans_normed = target_trans_centered / tf.tile( tf.reshape(self.norm_std, [1, 3]), [tf.shape(target_trans)[0], 1]) target_normed = tf.concat([target_trans_normed, target_rot], axis=1) else: target_normed = self.target_data network = networks.catalogue[args.network](args) output, trans_feat, rot_feat = network.build(self.input_data, is_training) _, rot_pred = tf.split(output, [3, 4], axis=1) pose_feat = tf.concat([trans_feat, rot_feat], axis=1) trans_target, rot_target = tf.split(target_normed, [3, 4], axis=1) f_X = tf.cast(trans_feat, dtype=float_type) Y = tf.cast(trans_target, dtype=float_type) '''Gaussian Process for translation regression''' with tf.variable_scope('gp'): kernel = mk.SharedIndependentMok( gpflow.kernels.RBF(args.feat_dim, ARD=False, name="rbf_ard"), args.output_dim) # kernel = mk.SeparateIndependentMok([gpflow.kernels.RBF(128, ARD=True, name="rbf_ard"+str(i)) for i in range(3)]) q_mu = np.zeros( (args.batch_size, args.output_dim)).reshape(args.batch_size * args.output_dim, 1) q_sqrt = np.eye(args.batch_size * args.output_dim).reshape( 1, args.batch_size * args.output_dim, args.batch_size * args.output_dim) # feature = gpflow.features.InducingPoints(np.zeros((args.batch_size, 128))) self.gp_model = gpflow.models.SVGP( X=f_X, Y=Y, kern=kernel, likelihood=gpflow.likelihoods.Gaussian(name="lik"), Z=np.zeros((args.batch_size, args.feat_dim)), q_mu=q_mu, q_sqrt=q_sqrt, name="svgp") #self.gp_model = gpflow.models.SVGP(X=f_X, Y=Y, kern=kernel, Z=np.zeros((args.batch_size, 128), dtype=float_type), likelihood=gpflow.likelihoods.Gaussian(), num_latent=3) if is_training: with tf.variable_scope('adam'): cnn_tvars = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='vggnet_localization/regressor') gp_tvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='gp') # Learning rate self.lr = tf.Variable(args.learning_rate, trainable=False, name="learning_rate") # Global step self.global_step = tf.Variable(0, trainable=False, name="global_step") self.lamda_weights = tf.Variable(args.lamda_weights, trainable=False, name="category_weights", dtype=float_type) self.trans_loss = -self.gp_model.likelihood_tensor / args.batch_size '''Rotation loss''' rot_loss_1 = tf.reduce_mean(tf.square(rot_pred - rot_target), axis=1) rot_loss_2 = tf.reduce_mean(tf.square(rot_pred + rot_target), axis=1) tmp = tf.stack([rot_loss_1, rot_loss_2], axis=1) tmp = tf.reduce_min(tmp, axis=1) self.rot_loss = tf.cast(tf.reduce_mean(tmp), dtype=float_type) self.total_loss = self.trans_loss + self.lamda_weights * self.rot_loss gp_optimizer = tf.train.AdamOptimizer(self.lr) gp_grad_vars = gp_optimizer.compute_gradients( loss=self.total_loss, var_list=gp_tvars) cnn_optimizer = tf.train.AdamOptimizer(self.lr * 0.1) cnn_grad_vars = cnn_optimizer.compute_gradients( loss=self.total_loss, var_list=cnn_tvars) self.train_op = tf.group( gp_optimizer.apply_gradients(gp_grad_vars, global_step=self.global_step), cnn_optimizer.apply_gradients(cnn_grad_vars)) #self.train_op = gp_optimizer.apply_gradients(gp_grad_vars, global_step=self.global_step) else: pred_output, pred_trans_feat, pred_rot_feat = network.build( self.pred_data, is_training) pred_feat = tf.concat([pred_trans_feat, pred_rot_feat], axis=1) c_mean, c_var = self.gp_model._build_predict(tf.cast( pred_trans_feat, dtype=float_type), full_cov=False, full_output_cov=False) y_mean, y_var = self.gp_model.likelihood.predict_mean_and_var( c_mean, c_var) trans_pred = tf.cast(y_mean, dtype=tf.float32) _, rot_pred = tf.split(pred_output, [3, 4], axis=1) dist = tfd.Normal(loc=tf.reshape(c_mean, [1, 3]), scale=tf.reshape(c_var * 1000., [1, 3])) samples = tf.cast(tf.reshape(dist.sample([100]), [100, 3]), dtype=tf.float32) # samples = [] # for i in range(100): # samples.append(tf.random.normal(shape=[1, 3], mean=tf.reshape(tf.cast(c_mean, dtype=tf.float32), [1,3]), stddev=tf.reshape(tf.cast(c_var, dtype=tf.float32)*1000., [1, 3]))) # # samples = tf.reshape(tf.stack(samples, axis=0), [100, 3]) self.distribution_mean = tf.cast(c_mean, dtype=tf.float32) self.distribution_cov = tf.cast(c_var, dtype=tf.float32) if args.is_normalization: target_trans_unscaled = trans_pred * tf.tile( tf.reshape(self.norm_std, [1, 3]), [tf.shape(trans_pred)[0], 1]) target_trans_uncentered = target_trans_unscaled + tf.tile( tf.reshape(self.norm_mean, [1, 3]), [tf.shape(trans_pred)[0], 1]) samples_unscaled = samples * tf.tile( tf.reshape(self.norm_std, [1, 3]), [tf.shape(samples)[0], 1]) samples_uncentered = samples_unscaled + tf.tile( tf.reshape(self.norm_mean, [1, 3]), [tf.shape(samples)[0], 1]) self.samples = samples_uncentered self.trans_prediction = target_trans_uncentered self.rot_prediction = rot_pred else: self.trans_prediction = trans_pred self.rot_prediction = rot_pred self.samples = samples
x = x[:, :, :G] y = y[:, :, :G] mask = ~np.isnan(y) Y = y[mask][:, None] X = x[mask][:, None] weight_idx = np.tile(np.arange(N * G).reshape(N, G)[None], (T, 1, 1))[mask] # gp objects if global_trajectories: num_clusters = K * L + L else: num_clusters = K * L kernel = mk.SharedIndependentMok(gpflow.kernels.RBF(1), num_clusters) feature = mf.SharedIndependentMof( gpflow.features.InducingPoints( np.arange(T).astype(np.float64).reshape(-1, 1))) likelihood = gpflow.likelihoods.Gaussian() # model -- for hyperparameter learning with gpflow.defer_build(): m = MixtureSVGP(X, Y, weight_idx, kern=kernel, num_latent=num_clusters, num_data=X.shape[0], likelihood=likelihood, feat=feature,
def __init__(self, latent_dim, Y, inputs=None, emissions=None, px1_mu=None, px1_cov=None, kern=None, Z=None, n_ind_pts=100, mean_fn=None, Q_diag=None, Umu=None, Ucov_chol=None, qx1_mu=None, qx1_cov=None, As=None, bs=None, Ss=None, n_samples=100, seed=None, parallel_iterations=10, jitter=gps.numerics.jitter_level, name=None): super().__init__(name=name) self.latent_dim = latent_dim self.T, self.obs_dim = Y.shape self.Y = Param(Y, trainable=False) self.inputs = None if inputs is None else Param(inputs, trainable=False) self.input_dim = 0 if self.inputs is None else self.inputs.shape[1] self.qx1_mu = Param( np.zeros(self.latent_dim) if qx1_mu is None else qx1_mu) self.qx1_cov_chol = Param( np.eye(self.latent_dim) if qx1_cov is None else np.linalg.cholesky(qx1_cov), transform=gtf.LowerTriangular(self.latent_dim, squeeze=True)) self.As = Param( np.ones((self.T - 1, self.latent_dim)) if As is None else As) self.bs = Param( np.zeros((self.T - 1, self.latent_dim)) if bs is None else bs) self.Q_sqrt = Param( np.ones(self.latent_dim) if Q_diag is None else Q_diag**0.5, transform=gtf.positive) if Ss is False: self._S_chols = None else: self.S_chols = Param( np.tile(self.Q_sqrt.value.copy()[None, ...], [self.T - 1, 1]) if Ss is None else (np.sqrt(Ss) if Ss.ndim == 2 else np.linalg.cholesky(Ss)), transform=gtf.positive if (Ss is None or Ss.ndim == 2) else gtf.LowerTriangular( self.latent_dim, num_matrices=self.T - 1, squeeze=False)) self.emissions = emissions or GaussianEmissions( latent_dim=self.latent_dim, obs_dim=self.obs_dim) self.px1_mu = Param( np.zeros(self.latent_dim) if px1_mu is None else px1_mu, trainable=False) self.px1_cov_chol = None if px1_cov is None else \ Param(np.sqrt(px1_cov) if px1_cov.ndim == 1 else np.linalg.cholesky(px1_cov), trainable=False, transform=gtf.positive if px1_cov.ndim == 1 else gtf.LowerTriangular(self.latent_dim, squeeze=True)) self.n_samples = n_samples self.seed = seed self.parallel_iterations = parallel_iterations self.jitter = jitter # Inference-specific attributes (see gpssm_models.py for appropriate choices): nans = tf.constant(np.zeros( (self.T, self.n_samples, self.latent_dim)) * np.nan, dtype=gps.float_type) self.sample_fn = lambda **kwargs: (nans, None) self.sample_kwargs = {} self.KL_fn = lambda *fs: tf.constant(np.nan, dtype=gps.float_type) # GP Transitions: self.n_ind_pts = n_ind_pts if Z is None else ( Z[0].shape[-2] if isinstance(Z, list) else Z.shape[-2]) if isinstance(Z, np.ndarray) and Z.ndim == 2: self.Z = mf.SharedIndependentMof(gp.features.InducingPoints(Z)) else: Z_list = [ np.random.randn(self.n_ind_pts, self.latent_dim + self.input_dim) for _ in range(self.latent_dim) ] if Z is None else [z for z in Z] self.Z = mf.SeparateIndependentMof( [gp.features.InducingPoints(z) for z in Z_list]) if isinstance(kern, gp.kernels.Kernel): self.kern = mk.SharedIndependentMok(kern, self.latent_dim) else: kern_list = kern or [ gp.kernels.Matern32(self.latent_dim + self.input_dim, ARD=True) for _ in range(self.latent_dim) ] self.kern = mk.SeparateIndependentMok(kern_list) self.mean_fn = mean_fn or mean_fns.Identity(self.latent_dim) self.Umu = Param( np.zeros((self.latent_dim, self.n_ind_pts)) if Umu is None else Umu) # (Lm^-1)(Umu - m(Z)) LT_transform = gtf.LowerTriangular(self.n_ind_pts, num_matrices=self.latent_dim, squeeze=False) self.Ucov_chol = Param(np.tile( np.eye(self.n_ind_pts)[None, ...], [self.latent_dim, 1, 1]) if Ucov_chol is None else Ucov_chol, transform=LT_transform) # (Lm^-1)Lu self._Kzz = None
def shared_independent(self): return mk.SharedIndependentMok(make_kernel(), Datum.P)
def __init__(self, args, is_training=True): # Store the arguments self.args = args with tf.variable_scope("param"): self.norm_mean = tf.Variable(args.norm_mean, dtype=tf.float32, trainable=False, name="norm_mean") self.norm_std = tf.Variable(args.norm_std, dtype=tf.float32, trainable=False, name="norm_std") if not is_training: batch_size = 1 else: batch_size = args.batch_size # input_data_t_1 refers to data at time <t-1> self.input_data = tf.placeholder(tf.float32, [ batch_size, args.target_image_size[0], args.target_image_size[1], 1 ]) # target data self.target_data = tf.placeholder(tf.float32, [batch_size, 7]) target_data_normed = self.normalize(self.target_data, self.norm_mean, self.norm_std) # dense_feat_net = networks.catalogue[args.network](args, name="dense_feat") # dense_feat = dense_feat_net.build(self.input_data, is_training, opt="base") dense_feat, _ = networks.resnet.resnet_v1_50(self.input_data, global_pool=False, num_classes=None, is_training=is_training, reuse=tf.AUTO_REUSE, scope="dense_feat") # context stack for global pose learning global_context = networks.catalogue[args.network]( args, name="context_stack_global") global_context_feat = global_context.build(dense_feat, is_training, opt="context") # regreesor for global pose learning global_regressor = networks.catalogue[args.network]( args, name="regressor_global") global_output, trans_feat, rot_feat = global_regressor.build( global_context_feat, is_training, opt="regressor") _, rot_pred = tf.split(global_output, [3, 4], axis=1) pose_feat = tf.concat([trans_feat, rot_feat], axis=1) trans_target, rot_target = tf.split(target_data_normed, [3, 4], axis=1) f_X_t = tf.cast(trans_feat, dtype=float_type) Y_t = tf.cast(trans_target, dtype=float_type) f_X_r = tf.cast(trans_feat, dtype=float_type) Y_r = tf.cast(trans_target, dtype=float_type) '''Gaussian Process for translation regression''' with tf.variable_scope('gp'): # GP for translation learning kernel_t = mk.SharedIndependentMok( gpflow.kernels.RBF(args.feat_dim, ARD=False, name="rbf_ard"), args.output_dim) q_mu_t = np.zeros( (args.batch_size, args.output_dim)).reshape(args.batch_size * args.output_dim, 1) q_sqrt_t = np.eye(args.batch_size * args.output_dim).reshape( 1, args.batch_size * args.output_dim, args.batch_size * args.output_dim) self.gp_model_t = gpflow.models.SVGP( X=f_X_t, Y=Y_t, kern=kernel_t, likelihood=gpflow.likelihoods.Gaussian(name="lik"), Z=np.zeros((args.batch_size, args.feat_dim)), q_mu=q_mu_t, q_sqrt=q_sqrt_t, name="svgp") # GP for rotation learning kernel_r = mk.SharedIndependentMok( gpflow.kernels.RBF(args.feat_dim, ARD=False, name="rbf_ard"), args.output_dim) q_mu_r = np.zeros( (args.batch_size, args.output_dim)).reshape(args.batch_size * args.output_dim, 1) q_sqrt_r = np.eye(args.batch_size * args.output_dim).reshape( 1, args.batch_size * args.output_dim, args.batch_size * args.output_dim) self.gp_model_r = gpflow.models.SVGP( X=f_X_r, Y=Y_r, kern=kernel_r, likelihood=gpflow.likelihoods.Gaussian(name="lik"), Z=np.zeros((args.batch_size, args.feat_dim)), q_mu=q_mu_r, q_sqrt=q_sqrt_r, name="svgp") if is_training: with tf.variable_scope('adam'): cnn_tvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='regressor_global') gp_tvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='gp') # Learning rate self.lr = tf.Variable(args.learning_rate, trainable=False, name="learning_rate") # Global step self.global_step = tf.Variable(0, trainable=False, name="global_step") self.lamda_weights = tf.Variable(args.lamda_weights, trainable=False, name="category_weights", dtype=float_type) self.trans_loss = -self.gp_model_t.likelihood_tensor / args.batch_size '''Rotation loss''' self.rot_loss = -self.gp_model_r.likelihood_tensor / args.batch_size self.total_loss = self.trans_loss + self.rot_loss gp_optimizer = tf.train.AdamOptimizer(self.lr) gp_grad_vars = gp_optimizer.compute_gradients( loss=self.total_loss, var_list=gp_tvars) cnn_optimizer = tf.train.AdamOptimizer(self.lr * 0.1) cnn_grad_vars = cnn_optimizer.compute_gradients( loss=self.total_loss, var_list=cnn_tvars) self.train_op = tf.group( gp_optimizer.apply_gradients(gp_grad_vars, global_step=self.global_step), cnn_optimizer.apply_gradients(cnn_grad_vars)) else: c_mean_t, c_var_t = self.gp_model_t._build_predict( tf.cast(trans_feat, dtype=float_type), full_cov=False, full_output_cov=False) y_mean_t, y_var_t = self.gp_model_t.likelihood.predict_mean_and_var( c_mean_t, c_var_t) trans_pred = tf.cast(y_mean_t, dtype=tf.float32) c_mean_r, c_var_r = self.gp_model_r._build_predict( tf.cast(rot_feat, dtype=float_type), full_cov=False, full_output_cov=False) y_mean_r, y_var_r = self.gp_model_r.likelihood.predict_mean_and_var( c_mean_r, c_var_r) rot_pred = tf.cast(y_mean_r, dtype=tf.float32) dist = tfd.Normal(loc=tf.reshape(c_mean_t, [1, 3]), scale=tf.reshape(c_var_t * 1000., [1, 3])) samples = tf.cast(tf.reshape(dist.sample([100]), [100, 3]), dtype=tf.float32) self.distribution_mean = tf.cast(c_mean_t, dtype=tf.float32) self.distribution_cov = tf.cast(c_var_t, dtype=tf.float32) trans_pred_demormed = self.denomalize_navie( trans_pred, self.norm_mean, self.norm_std) samples_demormed = self.denomalize_navie(samples, self.norm_mean, self.norm_std) self.trans_prediction = trans_pred_demormed self.rot_prediction = rot_pred self.samples = samples_demormed