def train(args): # set random seed torch.manual_seed(args.seed) np.random.seed(args.seed) # init model and optimizer if args.verbose: print("Training baseline model:" if args.baseline else "Training HNN model:") S_net = MLP(int(args.input_dim/2), 140, int(args.input_dim/2)**2, args.nonlinearity) U_net = MLP(int(args.input_dim/2), 140, 1, args.nonlinearity) model = Lagrangian(int(args.input_dim/2), S_net, U_net, dt=1e-3) num_parm = get_model_parm_nums(model) print('model contains {} parameters'.format(num_parm)) optim = torch.optim.Adam(model.parameters(), args.learn_rate, weight_decay=1e-4) # arrange data data = get_lag_dataset(seed=args.seed) x = torch.tensor( data['x'], requires_grad=False, dtype=torch.float32) # append zero control u = torch.zeros_like(x[:,0]).unsqueeze(-1) x = torch.cat((x, u), -1) test_x = torch.tensor( data['test_x'], requires_grad=False, dtype=torch.float32) # append zero control test_x = torch.cat((test_x, u), -1) dxdt = torch.Tensor(data['dx']) test_dxdt = torch.Tensor(data['test_dx']) # vanilla train loop stats = {'train_loss': [], 'test_loss': []} for step in range(args.total_steps+1): # train step dq, dp, du = model.time_derivative(x).split(1,1) dxdt_hat = torch.cat((dq, dp), -1) loss = L2_loss(dxdt, dxdt_hat) loss.backward() ; optim.step() ; optim.zero_grad() # run test data dq_test, dp_test, du_test = model.time_derivative(test_x).split(1,1) test_dxdt_hat = torch.cat((dq_test, dp_test), -1) test_loss = L2_loss(test_dxdt, test_dxdt_hat) # logging stats['train_loss'].append(loss.item()) stats['test_loss'].append(test_loss.item()) if args.verbose and step % args.print_every == 0: print("step {}, train_loss {:.4e}, test_loss {:.4e}".format(step, loss.item(), test_loss.item())) train_dq, train_dp, train_du = model.time_derivative(x).split(1,1) train_dxdt_hat = torch.cat((train_dq, train_dp), -1) train_dist = (dxdt - train_dxdt_hat)**2 test_dq, test_dp, test_du = model.time_derivative(test_x).split(1,1) test_dxdt_hat = torch.cat((test_dq, test_dp), -1) test_dist = (test_dxdt - test_dxdt_hat)**2 print('Final train loss {:.4e} +/- {:.4e}\nFinal test loss {:.4e} +/- {:.4e}' .format(train_dist.mean().item(), train_dist.std().item()/np.sqrt(train_dist.shape[0]), test_dist.mean().item(), test_dist.std().item()/np.sqrt(test_dist.shape[0]))) return model, stats
def train(args): # set random seed torch.manual_seed(args.seed) np.random.seed(args.seed) device = torch.device( 'cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu') # init model and optimizer if args.conv: autoencoder = ConvAutoencoder().to(device) else: autoencoder = MLP_VAE(args.input_dim, args.hidden_dim, args.latent_dim, nonlinearity='relu').to(device) model = PixelSymODEN_R(int(args.latent_dim / 2), autoencoder=autoencoder, nonlinearity=args.nonlinearity, dt=1e-3, device=device) if args.verbose: print("Training baseline model:" if args. baseline else "Training HNN model:") num_parm = get_model_parm_nums(model) print('model contains {} parameters'.format(num_parm)) optim = torch.optim.Adam(model.parameters(), args.learn_rate, weight_decay=1e-5) # get dataset data = get_dataset('pendulum', args.save_dir, verbose=True, seed=args.seed) x = torch.tensor(data['pixels'], dtype=torch.float32).to(device) test_x = torch.tensor(data['test_pixels'], dtype=torch.float32).to(device) next_x = torch.tensor(data['next_pixels'], dtype=torch.float32).to(device) test_next_x = torch.tensor(data['test_next_pixels'], dtype=torch.float32).to(device) # vanilla ae train loop stats = {'train_loss': [], 'test_loss': []} for step in tqdm(range(args.total_steps + 1)): # train step ixs = torch.randperm(x.shape[0])[:args.batch_size] loss = pixelhnn_loss(x[ixs], next_x[ixs], model, device) loss.backward() optim.step() optim.zero_grad() stats['train_loss'].append(loss.item()) if args.verbose and step % args.print_every == 0: # run validation test_ixs = torch.randperm(test_x.shape[0])[:args.batch_size] test_loss = pixelhnn_loss(test_x[test_ixs], test_next_x[test_ixs], model) stats['test_loss'].append(test_loss.item()) print("step {}, train_loss {:.4e}, test_loss {:.4e}".format( step, loss.item(), test_loss.item())) # this stuff was done because # the job kept being killed for memory use # the generators seem to kee that from happening # TODO: clean train_ind = list(range(0, x.shape[0], args.batch_size)) train_ind.append(x.shape[0] - 1) train_dist1, train_dist2 = tee( pixelhnn_loss(x[i].unsqueeze(0), next_x[i].unsqueeze(0), model, device).detach().cpu().numpy() for i in train_ind) train_avg = sum(train_dist1) / x.shape[0] train_std = sum((v - train_avg)**2 for v in train_dist2) / x.shape[0] test_ind = list(range(0, test_x.shape[0], args.batch_size)) test_ind.append(test_x.shape[0] - 1) test_dist1, test_dist2 = tee( pixelhnn_loss(test_x[i].unsqueeze(0), test_next_x[i].unsqueeze(0), model, device).detach().cpu().numpy() for i in test_ind) test_avg = sum(test_dist1) / test_x.shape[0] test_std = sum((v - test_avg)**2 for v in test_dist2) / test_x.shape[0] print( 'Final train loss {:.4e} +/- {:.4e}\nFinal test loss {:.4e} +/- {:.4e}' .format(train_avg, train_std, test_avg, test_std)) return model, stats
def train(args): device = torch.device( 'cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu') # reproducibility: set random seed torch.manual_seed(args.seed) np.random.seed(args.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # init model and optimizer if args.verbose: print("Start training with num of points = {} and solver {}.".format( args.num_points, args.solver)) if args.structure == False and args.baseline == True: nn_model = MLP(args.input_dim, 600, args.input_dim, args.nonlinearity) model = SymODEN_R(args.input_dim, H_net=nn_model, device=device, baseline=True) elif args.structure == False and args.baseline == False: H_net = MLP(args.input_dim, 400, 1, args.nonlinearity) g_net = MLP(int(args.input_dim / 2), 200, int(args.input_dim / 2)) model = SymODEN_R(args.input_dim, H_net=H_net, g_net=g_net, device=device, baseline=False) elif args.structure == True and args.baseline == False: M_net = MLP(int(args.input_dim / 2), 300, int(args.input_dim / 2)) V_net = MLP(int(args.input_dim / 2), 50, 1) g_net = MLP(int(args.input_dim / 2), 200, int(args.input_dim / 2)) model = SymODEN_R(args.input_dim, M_net=M_net, V_net=V_net, g_net=g_net, device=device, baseline=False, structure=True) else: raise RuntimeError( 'argument *baseline* and *structure* cannot both be true') num_parm = get_model_parm_nums(model) print('model contains {} parameters'.format(num_parm)) optim = torch.optim.Adam(model.parameters(), args.learn_rate, weight_decay=1e-4) data = get_dataset(seed=args.seed) # modified to use the hnn stuff x = torch.tensor(data['x'], requires_grad=True, dtype=torch.float32) # [1125, 2] Bx2 # append zero control u = torch.zeros_like(x[:, 0]).unsqueeze(-1) x = torch.cat((x, u), -1) test_x = torch.tensor(data['test_x'], requires_grad=True, dtype=torch.float32) # append zero control test_x = torch.cat((test_x, u), -1) dxdt = torch.Tensor(data['dx']) # [1125, 2] Bx2 test_dxdt = torch.Tensor(data['test_dx']) # training loop stats = {'train_loss': [], 'test_loss': []} for step in range(args.total_steps + 1): # modified to match hnn dq, dp, du = model.time_derivative(x).split(1, 1) dxdt_hat = torch.cat((dq, dp), -1) loss = L2_loss(dxdt, dxdt_hat) loss.backward() optim.step() optim.zero_grad() # run test data dq_test, dp_test, du_test = model.time_derivative(test_x).split(1, 1) test_dxdt_hat = torch.cat((dq_test, dp_test), -1) test_loss = L2_loss(test_dxdt, test_dxdt_hat) # logging stats['train_loss'].append(loss.item()) stats['test_loss'].append(test_loss.item()) if args.verbose and step % args.print_every == 0: print("step {}, train_loss {:.4e}, test_loss {:.4e}".format( step, loss.item(), test_loss.item())) train_dq, train_dp, train_du = model.time_derivative(x).split(1, 1) train_dxdt_hat = torch.cat((train_dq, train_dp), -1) train_dist = (dxdt - train_dxdt_hat)**2 test_dq, test_dp, test_du = model.time_derivative(test_x).split(1, 1) test_dxdt_hat = torch.cat((test_dq, test_dp), -1) test_dist = (test_dxdt - test_dxdt_hat)**2 print( 'Final train loss {:.4e} +/- {:.4e}\nFinal test loss {:.4e} +/- {:.4e}' .format(train_dist.mean().item(), train_dist.std().item() / np.sqrt(train_dist.shape[0]), test_dist.mean().item(), test_dist.std().item() / np.sqrt(test_dist.shape[0]))) return model, stats