y_pred = generative_Net(X_test, statistics) loss, KLD = criterion(y_pred, y_test, mu = statistics_mu, logvar = statistics_logvar) KLD_total = KLD_total + KLD else: if is_uncertainty_net: statistics_mu, statistics_logvar = statistics_Net(torch.cat([X_train, y_train], 1)) y_pred = generative_Net(X_test, statistics_mu) y_pred_logstd = generative_Net_logstd(X_test, statistics_logvar) loss = criterion(y_pred, y_test, log_std = y_pred_logstd) else: statistics = statistics_Net(torch.cat([X_train, y_train], 1)) if is_regulated_net: statistics = get_regulated_statistics(generative_Net, statistics) y_pred = generative_Net(X_test, statistics) loss = criterion(y_pred, y_test) reg = get_reg(reg_dict, statistics_Net = statistics_Net, generative_Net = generative_Net, is_cuda = is_cuda) loss = loss + reg loss.backward(retain_graph = True) optimizer.step() # Perform gradient on the KL-divergence: if is_VAE: KLD_total = KLD_total / batch_size_task optimizer.zero_grad() KLD_total.backward() optimizer.step() record_data(data_record, [KLD_total], ["KLD_total"]) elif optim_mode == "sum": optimizer.zero_grad() loss_total = Variable(torch.FloatTensor([0]), requires_grad = False) if is_cuda: loss_total = loss_total.cuda()
# Training: for i in range(num_iter + 1): chosen_task_keys = np.random.choice(list(tasks_train.keys()), batch_size_task, replace=False).tolist() if optim_mode == "indi": for task_key, task in tasks_train.items(): if task_key not in chosen_task_keys: continue ((X_train, y_train), (X_test, y_test)), _ = task for k in range(num_backwards): optimizer.zero_grad() y_pred = net(X_train) loss = criterion(y_pred, y_train) reg = get_reg(reg_dict, net=net, is_cuda=is_cuda) loss = loss + reg loss.backward(retain_graph=True) optimizer.step() elif optim_mode == "sum": optimizer.zero_grad() loss_total = Variable(torch.FloatTensor([0]), requires_grad=False) if is_cuda: loss_total = loss_total.cuda() for task_key, task in tasks_train.items(): if task_key not in chosen_task_keys: continue ((X_train, y_train), (X_test, y_test)), _ = task y_pred = net(X_train) loss = criterion(y_pred, y_train) reg = get_reg(reg_dict, net=net, is_cuda=is_cuda)
if is_uncertainty_net: statistics_mu, statistics_logvar = statistics_Net( torch.cat([X_train, y_train], 1)) y_pred = generative_Net(X_train, statistics_mu) y_pred_logstd = generative_Net_logstd( X_train, statistics_logvar) loss = criterion(y_pred, y_train, log_std=y_pred_logstd) else: statistics = statistics_Net( torch.cat([X_train, y_train], 1)) y_pred = generative_Net(X_train, statistics) loss = criterion(y_pred, y_train) reg = get_reg(reg_dict, statistics_Net=statistics_Net, generative_Net=generative_Net) loss = loss + reg loss.backward(retain_graph=True) optimizer.step() # Perform gradient on the KL-divergence: if is_VAE: KLD_total = KLD_total / batch_size_task optimizer.zero_grad() KLD_total.backward() optimizer.step() record_data(data_record, [KLD_total], ["KLD_total"]) elif optim_mode == "sum": optimizer.zero_grad() loss_total = Variable(torch.FloatTensor([0]), requires_grad=False) for task_key, task in tasks_train.items():
statistics_mu, statistics_logvar = statistics_Net(torch.cat([X_train, y_train], 1)) y_pred = generative_Net(X_train, statistics_mu) y_pred_logstd = generative_Net_logstd(X_train, statistics_logvar) loss = criterion(y_pred, y_train, log_std = y_pred_logstd) else: statistics = statistics_Net(torch.cat([X_train, y_train], 1)) if is_regulated_net: statistics = get_regulated_statistics(generative_Net, statistics) if is_autoencoder: generative_Net.set_latent_param(statistics) y_pred = get_forward_pred(generative_Net, X_train, forward_steps) loss = criterion(X_train, y_pred, X_train_obs, y_train_obs, autoencoder, verbose = True) else: y_pred = generative_Net(X_train, statistics) loss = criterion(y_pred, y_train) reg = get_reg(reg_dict, statistics_Net = statistics_Net, generative_Net = generative_Net, autoencoder = autoencoder, is_cuda = is_cuda) loss = loss + reg * reg_multiplier[i] loss.backward(retain_graph = True) optimizer.step() # Perform gradient on the KL-divergence: if is_VAE: KLD_total = KLD_total / batch_size_task optimizer.zero_grad() KLD_total.backward() optimizer.step() record_data(data_record, [KLD_total], ["KLD_total"]) elif optim_mode == "sum": optimizer.zero_grad() loss_total = Variable(torch.FloatTensor([0]), requires_grad = False) if is_cuda: loss_total = loss_total.cuda()