def train_hnn(args): # set random seed torch.manual_seed(args.seed) np.random.seed(args.seed) # init model and optimizer autoencoder = MLPAutoencoder(args.input_dim, args.hidden_dim, args.latent_dim, nonlinearity='relu') model = PixelHNN(args.latent_dim, args.hidden_dim, autoencoder=autoencoder, nonlinearity=args.nonlinearity, baseline=False) print("HNN has {} paramerters in total".format(sum(x.numel() for x in model.parameters() if x.requires_grad))) # if args.verbose: # print("Training baseline model:" if args.baseline else "Training HNN model:") optim = torch.optim.Adam(model.parameters(), args.learn_rate, weight_decay=1e-5) # get dataset data = get_dataset('pendulum', args.save_dir, verbose=True, seed=args.seed) x = torch.tensor(data['pixels'], dtype=torch.float32) test_x = torch.tensor(data['test_pixels'], dtype=torch.float32) next_x = torch.tensor(data['next_pixels'], dtype=torch.float32) test_next_x = torch.tensor(data['test_next_pixels'], dtype=torch.float32) # vanilla ae train loop stats = {'train_loss': [], 'test_loss': []} with tqdm(total=args.total_steps) as t: for step in range(args.total_steps): # train step ixs = torch.randperm(x.shape[0])[:args.batch_size] loss = pixelhnn_loss(x[ixs], next_x[ixs], model) loss.backward() optim.step() optim.zero_grad() train_loss = model.get_l2_loss(x, next_x).cpu().numpy() test_loss = model.get_l2_loss(test_x, test_next_x).cpu().numpy() stats['train_loss'].append([train_loss.mean(), train_loss.std()]) stats['test_loss'].append([test_loss.mean(), test_loss.std()]) t.set_postfix(train_loss='{:.9f}'.format(train_loss.mean()), test_loss='{:.9f}'.format(test_loss.mean())) if args.verbose and step % args.print_every == 0: # run validation test_ixs = torch.randperm(test_x.shape[0])[:args.batch_size] test_loss = pixelhnn_loss(test_x[test_ixs], test_next_x[test_ixs], model) print("step {}, train_loss {:.4e}, test_loss {:.4e}".format(step, loss.item(), test_loss.item())) t.update() train_dist = pixelhnn_loss(x, next_x, model, return_scalar=False) test_dist = pixelhnn_loss(test_x, test_next_x, model, return_scalar=False) print('Final train loss {:.4e} +/- {:.4e}\nFinal test loss {:.4e} +/- {:.4e}'. format(train_dist.mean().item(), train_dist.std().item() / np.sqrt(train_dist.shape[0]), test_dist.mean().item(), test_dist.std().item() / np.sqrt(test_dist.shape[0]))) return model, stats
def train(args): if torch.cuda.is_available() and not args.cpu: device = torch.device("cuda:0") torch.set_default_tensor_type('torch.cuda.FloatTensor') torch.cuda.empty_cache() print("Running on the GPU") else: device = torch.device("cpu") print("Running on the CPU") # set random seed torch.manual_seed(args.seed) np.random.seed(args.seed) # get dataset (no test data for now) angular_velo, acc_1, acc_2, sound = get_dataset_split( args.folder, args.speed, scaled=args.scaled, experiment_dir=args.experiment_dir) sub_col = { 0: [angular_velo, 1, 'v'], 1: [acc_1, 3, 'a1'], 2: [acc_2, 3, 'a2'], 3: [sound, 1, 's'] } col2use = sub_col[args.sub_columns][0] print("Data from {} {}, column: {}".format(args.folder, args.speed, sub_col[args.sub_columns][2])) x = torch.tensor(col2use[:-1], dtype=torch.float) x_next = torch.tensor(col2use[1:], dtype=torch.float) autoencoder = MLPAutoencoder(sub_col[args.sub_columns][1], args.hidden_dim, args.latent_dim * 2, dropout_rate=args.dropout_rate_ae) model = PixelHNN(args.latent_dim * 2, args.hidden_dim, autoencoder=autoencoder, nonlinearity=args.nonlinearity, baseline=args.baseline, dropout_rate=args.dropout_rate) model.to(device) optim = torch.optim.Adam(model.parameters(), args.learn_rate, weight_decay=args.weight_decay) # vanilla ae train loop stats = {'train_loss': []} for step in range(args.total_steps + 1): # train step ixs = torch.randperm(x.shape[0])[:args.batch_size] x_train, x_next_train = x[ixs].to(device), x_next[ixs].to(device) loss = hnn_ae_loss(x_train, x_next_train, model) loss.backward() optim.step() optim.zero_grad() stats['train_loss'].append(loss.item()) if step % args.print_every == 0: print("step {}, train_loss {:.4e}".format(step, loss.item())) # train_dist = hnn_ae_loss(x, x_next, model, return_scalar=False) # print('Final train loss {:.4e} +/- {:.4e}' # .format(train_dist.mean().item(), train_dist.std().item() / np.sqrt(train_dist.shape[0]))) return model
def train(args): if torch.cuda.is_available() and not args.cpu: device = torch.device("cuda:0") torch.set_default_tensor_type('torch.cuda.FloatTensor') torch.cuda.empty_cache() print("Running on the GPU") else: device = torch.device("cpu") print("Running on the CPU") # set random seed torch.manual_seed(args.seed) np.random.seed(args.seed) print("{} {}".format(args.folder, args.speed)) print("Training scaled model:" if args.scaled else "Training noisy model:") print('{} pairs of coords in latent space '.format(args.latent_dim)) # get dataset (no test data for now) x_m = get_dataset(args.folder, args.speed, scaled=args.scaled, split=args.split_data, experiment_dir=args.experiment_dir) x = torch.tensor(x_m[:-1], dtype=torch.float) x_next = torch.tensor(x_m[1:], dtype=torch.float) autoencoder = MLPAutoencoder(args.input_dim, args.hidden_dim, args.latent_dim * 2, dropout_rate=args.dropout_rate_ae) model = PixelHNN(args.latent_dim * 2, args.hidden_dim, autoencoder=autoencoder, nonlinearity=args.nonlinearity, baseline=args.baseline, dropout_rate=args.dropout_rate) model.to(device) optim = torch.optim.Adam(model.parameters(), args.learn_rate, weight_decay=args.weight_decay) # vanilla ae train loop stats = {'train_loss': [], 'test_loss': []} for step in range(args.total_steps + 1): # train step ixs = torch.randperm(x.shape[0])[:args.batch_size] x_train, x_next_train = x[ixs].to(device), x_next[ixs].to(device) loss = hnn_ae_loss(x_train, x_next_train, model) loss.backward() optim.step() optim.zero_grad() stats['train_loss'].append(loss.item()) if step % args.print_every == 0: print("step {}, train_loss {:.4e}".format(step, loss.item())) # train_dist = hnn_ae_loss(x, x_next, model, return_scalar=False) # print('Final train loss {:.4e} +/- {:.4e}' # .format(train_dist.mean().item(), train_dist.std().item() / np.sqrt(train_dist.shape[0]))) return model
def train(args): # set random seed torch.manual_seed(args.seed) np.random.seed(args.seed) if args.gpu == -1: device = 'cpu' else: device = torch.device( 'cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu') # init model and optimizer autoencoder = MLPAutoencoder(args.input_dim, args.hidden_dim, args.latent_dim, nonlinearity='relu').to(device) model = PixelSymODEN_R(int(args.latent_dim / 2), autoencoder=autoencoder, nonlinearity=args.nonlinearity, dt=1e-3, device=device) if args.verbose: print("Training baseline model:" if args. baseline else "Training HNN model:") num_parm = get_model_parm_nums(model) print('model contains {} parameters'.format(num_parm)) optim = torch.optim.Adam(model.parameters(), args.learn_rate, weight_decay=1e-5) # get dataset u = [[0.0, 0.0], [0.0, 1.0], [0.0, -1.0], [0.0, 2.0], [0.0, -2.0], [1.0, 0.0], [-1.0, 0.0], [2.0, 0.0], [-2.0, 0.0]] data = get_dataset('cartpole', args.save_dir, u, verbose=True, seed=args.seed) x = torch.tensor(data['pixels'], dtype=torch.float32).to(device) test_x = torch.tensor(data['test_pixels'], dtype=torch.float32).to(device) next_x = torch.tensor(data['next_pixels'], dtype=torch.float32).to(device) test_next_x = torch.tensor(data['test_next_pixels'], dtype=torch.float32).to(device) ctrl = torch.tensor(data['ctrls'], dtype=torch.float32).to(device) test_ctrl = torch.tensor(data['test_ctrls'], dtype=torch.float32).to(device) # vanilla ae train loop stats = {'train_loss': [], 'test_loss': []} for step in tqdm(range(args.total_steps + 1)): # train step ixs = torch.randperm(x.shape[0])[:args.batch_size] loss = pixelhnn_loss(x[ixs], next_x[ixs], ctrl[ixs], model, device) loss.backward() optim.step() optim.zero_grad() stats['train_loss'].append(loss.item()) if args.verbose and step % args.print_every == 0: # run validation test_ixs = torch.randperm(test_x.shape[0])[:args.batch_size] test_loss = pixelhnn_loss(test_x[test_ixs], test_next_x[test_ixs], test_ctrl[test_ixs], model, device) stats['test_loss'].append(test_loss.item()) print("step {}, train_loss {:.4e}, test_loss {:.4e}".format( step, loss.item(), test_loss.item())) # this stuff was done because # the job kept being killed for memory use # the generators seem to kee that from happening # TODO: clean train_ind = list(range(0, x.shape[0], args.batch_size)) train_ind.append(x.shape[0] - 1) train_dist1, train_dist2 = tee( pixelhnn_loss(x[i].unsqueeze(0), next_x[i].unsqueeze( 0), ctrl[i].unsqueeze(0), model, device).detach().cpu().numpy() for i in train_ind) train_avg = sum(train_dist1) / x.shape[0] train_std = sum((v - train_avg)**2 for v in train_dist2) / x.shape[0] test_ind = list(range(0, test_x.shape[0], args.batch_size)) test_ind.append(test_x.shape[0] - 1) test_dist1, test_dist2 = tee( pixelhnn_loss(test_x[i].unsqueeze(0), test_next_x[i].unsqueeze( 0), test_ctrl[i].unsqueeze(0), model, device).detach().cpu().numpy() for i in test_ind) test_avg = sum(test_dist1) / test_x.shape[0] test_std = sum((v - test_avg)**2 for v in test_dist2) / test_x.shape[0] print( 'Final train loss {:.4e} +/- {:.4e}\nFinal test loss {:.4e} +/- {:.4e}' .format(train_avg, train_std, test_avg, test_std)) return model, stats
def train(args): if torch.cuda.is_available() and not args.cpu: device = torch.device("cuda:0") torch.set_default_tensor_type('torch.cuda.FloatTensor') torch.cuda.empty_cache() print("Running on the GPU") else: device = torch.device("cpu") print("Running on the CPU") # set random seed torch.manual_seed(args.seed) np.random.seed(args.seed) print("{} {}".format(args.folder, args.speed)) print("Training scaled model:" if args.scaled else "Training noisy model:") print('{} pairs of coords in latent space '.format(args.latent_dim)) #using universal autoencoder, pre-encode the training points autoencoder = MLPAutoencoder(args.input_dim_ae, args.hidden_dim, args.latent_dim * 2, nonlinearity='relu') full_model = PixelHNN(args.latent_dim * 2, args.hidden_dim, autoencoder=autoencoder, nonlinearity=args.nonlinearity, baseline=args.baseline) path = "{}/saved_models/{}.tar".format(args.save_dir, args.ae_path) full_model.load_state_dict(torch.load(path)) full_model.eval() autoencoder_model = full_model.autoencoder # get dataset (no test data for now) data = get_dataset(args.folder, args.speed, scaled=args.scaled, split=args.split_data, experiment_dir=args.experiment_dir, tensor=True) gcoords = autoencoder_model.encode(data).cpu().detach().numpy() x = torch.tensor(gcoords, dtype=torch.float, requires_grad=True) dx_np = full_model.time_derivative( torch.tensor(gcoords, dtype=torch.float, requires_grad=True)).cpu().detach().numpy() dx = torch.tensor(dx_np, dtype=torch.float) nnmodel = MLP(args.input_dim, args.hidden_dim, args.output_dim) model = HNN(2, nnmodel) model.to(device) optim = torch.optim.Adam(model.parameters(), args.learn_rate, weight_decay=args.weight_decay) # vanilla ae train loop stats = {'train_loss': [], 'test_loss': []} for step in range(args.total_steps + 1): # train step ixs = torch.randperm(x.shape[0])[:args.batch_size] x_train, dxdt = x[ixs].to(device), dx[ixs].to(device) dxdt_hat = model.time_derivative(x_train) loss = L2_loss(dxdt, dxdt_hat) loss.backward() optim.step() optim.zero_grad() stats['train_loss'].append(loss.item()) if step % args.print_every == 0: print("step {}, train_loss {:.4e}".format(step, loss.item())) # train_dist = hnn_ae_loss(x, x_next, model, return_scalar=False) # print('Final train loss {:.4e} +/- {:.4e}' # .format(train_dist.mean().item(), train_dist.std().item() / np.sqrt(train_dist.shape[0]))) return model
def train(args): # set random seed torch.manual_seed(args.seed) np.random.seed(args.seed) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) # init model and optimizer if args.verbose: print("Training baseline model:" if args.baseline else "Training HNN model:") output_dim = args.input_dim if args.baseline else 2 nn_model = MLPAutoencoder(args.input_dim, args.hidden_dim, args.latent_dim, args.nonlinearity) nn_model.to(device) model = HNN(args.input_dim, differentiable_model=nn_model, field_type=args.field_type, baseline=args.baseline, device=device) model.to(device) optim = torch.optim.Adam(model.parameters(), args.learn_rate, weight_decay=0) # arrange data X = np.load('statrectinputs.npy') Y = np.load('statrectoutputs.npy') Y[~np.isfinite(Y)] = 0 xm, xd = give_min_and_dist(X) ym, yd= give_min_and_dist(Y) X = scale(X, xm, xd) Y = scale(Y, ym, yd) n_egs = X.shape[0] x = X[0:int(0.8*n_egs),:] test_x = torch.tensor(X[:-int(0.2*n_egs),:], requires_grad=True, dtype=torch.float32) dxdt = Y[0:int(0.8*n_egs),:] test_dxdt = torch.tensor(Y[:-int(0.2*n_egs),:]) # vanilla train loop stats = {'train_loss': [], 'test_loss': []} for step in range(args.total_steps+1): # train step ixs = torch.randperm(x.shape[0])[:args.batch_size] x = torch.tensor(x[ixs], requires_grad=True, dtype=torch.float32) x.to(device) dxdt_hat = model.time_derivative(x) y = torch.tensor(dxdt[ixs]) y.to(device) loss = L2_loss(y, dxdt_hat) loss.backward() grad = torch.cat([p.grad.flatten() for p in model.parameters()]).clone() optim.step() ; optim.zero_grad() # run test data test_ixs = torch.randperm(test_x.shape[0])[:args.batch_size] test_dxdt_hat = model.time_derivative(test_x[test_ixs]) #test_dxdt_hat += args.input_noise * torch.randn(*test_x[test_ixs].shape) # add noise, maybe test_loss = L2_loss(test_dxdt[test_ixs], test_dxdt_hat) # logging stats['train_loss'].append(loss.item()) stats['test_loss'].append(test_loss.item()) if args.verbose and step % args.print_every == 0: print("step {}, train_loss {:.4e}, test_loss {:.4e}, grad norm {:.4e}, grad std {:.4e}" .format(step, loss.item(), test_loss.item(), grad@grad, grad.std())) ixs = torch.randperm(x.shape[0])[:10000] x = torch.tensor(x[ixs], requires_grad=True, dtype=torch.float32) x.to(device) enc = model.encoding(x).detach().numpy() print(x.shape) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') x = x.detach().numpy() img = ax.scatter(enc[:,0], enc[:,3], enc[:,2], c=enc[:,1], cmap=plt.hot()) fig.colorbar(img) plt.savefig('lrep.png') y0 = torch.tensor([0.4, 0.3, 1/np.sqrt(2), 1/np.sqrt(2)], dtype=torch.float32) update_fn = lambda t, y0: model_update(t, y0, model) orbit, settings = get_orbit(y0, t_points=10, t_span=[0, 10], update_fn=update_fn) print(orbit) plt.scatter(orbit[:,0], orbit[:, 1]) plt.savefig('orbit.png') return model, stats
def train(args): if torch.cuda.is_available() and not args.cpu: device = torch.device("cuda:0") torch.set_default_tensor_type('torch.cuda.FloatTensor') torch.cuda.empty_cache() print("Running on the GPU") else: device = torch.device("cpu") print("Running on the CPU") # set random seed torch.manual_seed(args.seed) np.random.seed(args.seed) # get dataset (no test data for now) angular_velo, acc_1, acc_2, sound = get_dataset_split( args.folder, args.speed, scaled=args.scaled, experiment_dir=args.experiment_dir, tensor=True) sub_col = { 0: [angular_velo, 1, 'v'], 1: [acc_1, 3, 'a1'], 2: [acc_2, 3, 'a2'], 3: [sound, 1, 's'] } col2use = sub_col[args.sub_columns][0] # using universal autoencoder, pre-encode the training points autoencoder = MLPAutoencoder(sub_col[args.sub_columns][1], args.hidden_dim, args.latent_dim * 2, dropout_rate=args.dropout_rate_ae) full_model = PixelHNN(args.latent_dim * 2, args.hidden_dim, autoencoder=autoencoder, nonlinearity=args.nonlinearity, baseline=args.baseline, dropout_rate=args.dropout_rate) path = "{}/saved_models/{}-{}.tar".format(args.save_dir, args.ae_path, sub_col[args.sub_columns][2]) full_model.load_state_dict(torch.load(path)) full_model.eval() autoencoder_model = full_model.autoencoder gcoords = autoencoder_model.encode(col2use).cpu().detach().numpy() x = torch.tensor(gcoords, dtype=torch.float, requires_grad=True) dx_np = full_model.time_derivative( torch.tensor(gcoords, dtype=torch.float, requires_grad=True)).cpu().detach().numpy() dx = torch.tensor(dx_np, dtype=torch.float) nnmodel = MLP(args.input_dim, args.hidden_dim, args.output_dim) model = HNN(2, nnmodel) model.to(device) optim = torch.optim.Adam(model.parameters(), args.learn_rate, weight_decay=args.weight_decay) print("Data from {} {}, column: {}".format(args.folder, args.speed, sub_col[args.sub_columns][2])) # x = torch.tensor(col2use[:-1], dtype=torch.float) # x_next = torch.tensor(col2use[1:], dtype=torch.float) # # autoencoder = MLPAutoencoder(sub_col[args.sub_columns][1], args.hidden_dim, args.latent_dim * 2, dropout_rate=args.dropout_rate_ae) # model = PixelHNN(args.latent_dim * 2, args.hidden_dim, # autoencoder=autoencoder, nonlinearity=args.nonlinearity, baseline=args.baseline, dropout_rate=args.dropout_rate) # model.to(device) # optim = torch.optim.Adam(model.parameters(), args.learn_rate, weight_decay=args.weight_decay) # vanilla ae train loop stats = {'train_loss': []} for step in range(args.total_steps + 1): # train step ixs = torch.randperm(x.shape[0])[:args.batch_size] x_train, dxdt = x[ixs].to(device), dx[ixs].to(device) dxdt_hat = model.time_derivative(x_train) loss = L2_loss(dxdt, dxdt_hat) loss.backward() optim.step() optim.zero_grad() stats['train_loss'].append(loss.item()) if step % args.print_every == 0: print("step {}, train_loss {:.4e}".format(step, loss.item())) # train_dist = hnn_ae_loss(x, x_next, model, return_scalar=False) # print('Final train loss {:.4e} +/- {:.4e}' # .format(train_dist.mean().item(), train_dist.std().item() / np.sqrt(train_dist.shape[0]))) return model