def initialize_variances_no_Z(self, F, G): # Initializing co-variances batch_size = F.size()[0] div = float(self.train_size) / batch_size self.FF = (t(F)).mm(F).mul(div) self.GG = (t(G)).mm(G).mul(div) self.FF = symmetric_average(self.FF) # Ensuring matrix is symmetric self.GG = symmetric_average(self.GG) # Ensuring matrix is symmetric
def update_variances_no_Z(self, F, G): # Updating co-variances batch_size = F.size()[0] rho = self.rho div = float(self.train_size) / batch_size one_minus_rho_times_div = (1 - rho) * div self.FF = add(((self.FF).mul(rho)).detach(), t(F).mm(F).mul(one_minus_rho_times_div)) self.GG = add(((self.GG).mul(rho)).detach(), t(G).mm(G).mul(one_minus_rho_times_div)) self.FF = symmetric_average(self.FF) # Ensuring matrix is symmetric self.GG = symmetric_average(self.GG) # Ensuring matrix is symmetric
def update_conditional_variables(self, F, G, Z): # Calculating conditional variables; F_Z, G_Z, FF_Z, GG_Z self.ZZ_inverse = compute_mat_pow(self.ZZ, -1, self.epsilon) # Computing inverse of Sigma_ZZ self.ZZ_inverse = symmetric_average(self.ZZ_inverse) # Ensuring matrix is symmetric self.ZZ_inverse_mul_ZF = (self.ZZ_inverse).mm(self.ZF) self.ZZ_inverse_mul_ZG = (self.ZZ_inverse).mm(self.ZG) self.mu_F_Z = Z.mm(self.ZZ_inverse_mul_ZF) self.mu_G_Z = Z.mm(self.ZZ_inverse_mul_ZG) self.F_Z = F.sub(self.mu_F_Z) # F given Z self.G_Z = G.sub(self.mu_G_Z) # G given Z self.mu_F_Z_mu_F_Z = (t(self.ZF)).mm(self.ZZ_inverse_mul_ZF) self.mu_G_Z_mu_G_Z = (t(self.ZG)).mm(self.ZZ_inverse_mul_ZG) self.mu_F_z_mu_F_Z = symmetric_average(self.mu_F_Z_mu_F_Z) self.mu_G_Z_mu_G_Z = symmetric_average(self.mu_G_Z_mu_G_Z) self.FF_Z = (self.FF).sub(self.mu_F_Z_mu_F_Z) # Sigma_FF given Z self.GG_Z = (self.GG).sub(self.mu_G_Z_mu_G_Z) # Sigma_GG given Z self.FF_Z = symmetric_average(self.FF_Z) # Ensuring matrix is symmetric self.GG_Z = symmetric_average(self.GG_Z) # Ensuring matrix is symmetric
'train', cfg.feats, cfg.batch_size_train, train_mode=True): # Forward pass F_train = model_F(Variable(from_numpy(batch[0]))) G_train = model_G(Variable(from_numpy(batch[1]))) Z_train = model_Z(Variable(from_numpy(batch[2]))) # Updating co-variances cfg.update_variances(F_train, G_train, Z_train) # Computing conditional variables and co-variances cfg.update_conditional_variables(F_train, G_train, Z_train) # Computing right side of the loss FF_Z_inv_half = compute_mat_pow(cfg.FF_Z, -0.5, cfg.epsilon) GG_Z_inv_half = compute_mat_pow(cfg.GG_Z, -0.5, cfg.epsilon) FF_Z_inv_half = symmetric_average(FF_Z_inv_half) GG_Z_inv_half = symmetric_average(GG_Z_inv_half) # Fixing right side of the loss F_pred = (cfg.F_Z).mm(FF_Z_inv_half).detach() G_pred = (cfg.G_Z).mm(GG_Z_inv_half).detach() # Computing loss loss_F = loss_function(cfg.F_Z, G_pred) loss_G = loss_function(cfg.G_Z, F_pred) # Checking for nan's if np.isnan(loss_F.data.numpy()) or np.isnan( loss_G.data.numpy()): raise SystemExit('loss is Nan') # Reseting gradients, performing a backward pass, and updating the weights optimizer_F.zero_grad() loss_F.backward(retain_graph=True) optimizer_F.step()