Ejemplo n.º 1
0
def train_hnn(args):
    # set random seed
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # init model and optimizer
    autoencoder = MLPAutoencoder(args.input_dim, args.hidden_dim, args.latent_dim, nonlinearity='relu')
    model = PixelHNN(args.latent_dim, args.hidden_dim, autoencoder=autoencoder, nonlinearity=args.nonlinearity,
                     baseline=False)
    print("HNN has {} paramerters in total".format(sum(x.numel() for x in model.parameters() if x.requires_grad)))
    # if args.verbose:
    #     print("Training baseline model:" if args.baseline else "Training HNN model:")
    optim = torch.optim.Adam(model.parameters(), args.learn_rate, weight_decay=1e-5)

    # get dataset
    data = get_dataset('pendulum', args.save_dir, verbose=True, seed=args.seed)

    x = torch.tensor(data['pixels'], dtype=torch.float32)
    test_x = torch.tensor(data['test_pixels'], dtype=torch.float32)
    next_x = torch.tensor(data['next_pixels'], dtype=torch.float32)
    test_next_x = torch.tensor(data['test_next_pixels'], dtype=torch.float32)

    # vanilla ae train loop
    stats = {'train_loss': [], 'test_loss': []}
    with tqdm(total=args.total_steps) as t:
        for step in range(args.total_steps):
            # train step
            ixs = torch.randperm(x.shape[0])[:args.batch_size]
            loss = pixelhnn_loss(x[ixs], next_x[ixs], model)
            loss.backward()
            optim.step()
            optim.zero_grad()

            train_loss = model.get_l2_loss(x, next_x).cpu().numpy()
            test_loss = model.get_l2_loss(test_x, test_next_x).cpu().numpy()
            stats['train_loss'].append([train_loss.mean(), train_loss.std()])
            stats['test_loss'].append([test_loss.mean(), test_loss.std()])
            t.set_postfix(train_loss='{:.9f}'.format(train_loss.mean()),
                          test_loss='{:.9f}'.format(test_loss.mean()))
            if args.verbose and step % args.print_every == 0:
                # run validation
                test_ixs = torch.randperm(test_x.shape[0])[:args.batch_size]
                test_loss = pixelhnn_loss(test_x[test_ixs], test_next_x[test_ixs], model)
                print("step {}, train_loss {:.4e}, test_loss {:.4e}".format(step, loss.item(), test_loss.item()))
            t.update()

    train_dist = pixelhnn_loss(x, next_x, model, return_scalar=False)
    test_dist = pixelhnn_loss(test_x, test_next_x, model, return_scalar=False)
    print('Final train loss {:.4e} +/- {:.4e}\nFinal test loss {:.4e} +/- {:.4e}'.
          format(train_dist.mean().item(), train_dist.std().item() / np.sqrt(train_dist.shape[0]),
                 test_dist.mean().item(), test_dist.std().item() / np.sqrt(test_dist.shape[0])))
    return model, stats
Ejemplo n.º 2
0
def train(args):
    if torch.cuda.is_available() and not args.cpu:
        device = torch.device("cuda:0")
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        torch.cuda.empty_cache()
        print("Running on the GPU")
    else:
        device = torch.device("cpu")
        print("Running on the CPU")

    # set random seed
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # get dataset (no test data for now)
    angular_velo, acc_1, acc_2, sound = get_dataset_split(
        args.folder,
        args.speed,
        scaled=args.scaled,
        experiment_dir=args.experiment_dir)
    sub_col = {
        0: [angular_velo, 1, 'v'],
        1: [acc_1, 3, 'a1'],
        2: [acc_2, 3, 'a2'],
        3: [sound, 1, 's']
    }
    col2use = sub_col[args.sub_columns][0]

    print("Data from {} {}, column: {}".format(args.folder, args.speed,
                                               sub_col[args.sub_columns][2]))

    x = torch.tensor(col2use[:-1], dtype=torch.float)
    x_next = torch.tensor(col2use[1:], dtype=torch.float)

    autoencoder = MLPAutoencoder(sub_col[args.sub_columns][1],
                                 args.hidden_dim,
                                 args.latent_dim * 2,
                                 dropout_rate=args.dropout_rate_ae)
    model = PixelHNN(args.latent_dim * 2,
                     args.hidden_dim,
                     autoencoder=autoencoder,
                     nonlinearity=args.nonlinearity,
                     baseline=args.baseline,
                     dropout_rate=args.dropout_rate)
    model.to(device)
    optim = torch.optim.Adam(model.parameters(),
                             args.learn_rate,
                             weight_decay=args.weight_decay)

    # vanilla ae train loop
    stats = {'train_loss': []}
    for step in range(args.total_steps + 1):
        # train step
        ixs = torch.randperm(x.shape[0])[:args.batch_size]
        x_train, x_next_train = x[ixs].to(device), x_next[ixs].to(device)
        loss = hnn_ae_loss(x_train, x_next_train, model)
        loss.backward()
        optim.step()
        optim.zero_grad()

        stats['train_loss'].append(loss.item())
        if step % args.print_every == 0:
            print("step {}, train_loss {:.4e}".format(step, loss.item()))

    # train_dist = hnn_ae_loss(x, x_next, model, return_scalar=False)
    # print('Final train loss {:.4e} +/- {:.4e}'
    #       .format(train_dist.mean().item(), train_dist.std().item() / np.sqrt(train_dist.shape[0])))
    return model
Ejemplo n.º 3
0
def train(args):
    if torch.cuda.is_available() and not args.cpu:
        device = torch.device("cuda:0")
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        torch.cuda.empty_cache()
        print("Running on the GPU")
    else:
        device = torch.device("cpu")
        print("Running on the CPU")

    # set random seed
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    print("{} {}".format(args.folder, args.speed))
    print("Training scaled model:" if args.scaled else "Training noisy model:")
    print('{} pairs of coords in latent space '.format(args.latent_dim))

    # get dataset (no test data for now)
    x_m = get_dataset(args.folder,
                      args.speed,
                      scaled=args.scaled,
                      split=args.split_data,
                      experiment_dir=args.experiment_dir)
    x = torch.tensor(x_m[:-1], dtype=torch.float)
    x_next = torch.tensor(x_m[1:], dtype=torch.float)

    autoencoder = MLPAutoencoder(args.input_dim,
                                 args.hidden_dim,
                                 args.latent_dim * 2,
                                 dropout_rate=args.dropout_rate_ae)
    model = PixelHNN(args.latent_dim * 2,
                     args.hidden_dim,
                     autoencoder=autoencoder,
                     nonlinearity=args.nonlinearity,
                     baseline=args.baseline,
                     dropout_rate=args.dropout_rate)
    model.to(device)
    optim = torch.optim.Adam(model.parameters(),
                             args.learn_rate,
                             weight_decay=args.weight_decay)

    # vanilla ae train loop
    stats = {'train_loss': [], 'test_loss': []}
    for step in range(args.total_steps + 1):

        # train step
        ixs = torch.randperm(x.shape[0])[:args.batch_size]
        x_train, x_next_train = x[ixs].to(device), x_next[ixs].to(device)
        loss = hnn_ae_loss(x_train, x_next_train, model)
        loss.backward()
        optim.step()
        optim.zero_grad()

        stats['train_loss'].append(loss.item())

        if step % args.print_every == 0:
            print("step {}, train_loss {:.4e}".format(step, loss.item()))

    # train_dist = hnn_ae_loss(x, x_next, model, return_scalar=False)
    # print('Final train loss {:.4e} +/- {:.4e}'
    #       .format(train_dist.mean().item(), train_dist.std().item() / np.sqrt(train_dist.shape[0])))
    return model
Ejemplo n.º 4
0
def train(args):
    # set random seed
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    if args.gpu == -1:
        device = 'cpu'
    else:
        device = torch.device(
            'cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu')

    # init model and optimizer
    autoencoder = MLPAutoencoder(args.input_dim,
                                 args.hidden_dim,
                                 args.latent_dim,
                                 nonlinearity='relu').to(device)
    model = PixelSymODEN_R(int(args.latent_dim / 2),
                           autoencoder=autoencoder,
                           nonlinearity=args.nonlinearity,
                           dt=1e-3,
                           device=device)
    if args.verbose:
        print("Training baseline model:" if args.
              baseline else "Training HNN model:")

    num_parm = get_model_parm_nums(model)
    print('model contains {} parameters'.format(num_parm))

    optim = torch.optim.Adam(model.parameters(),
                             args.learn_rate,
                             weight_decay=1e-5)

    # get dataset
    u = [[0.0, 0.0], [0.0, 1.0], [0.0, -1.0], [0.0, 2.0], [0.0, -2.0],
         [1.0, 0.0], [-1.0, 0.0], [2.0, 0.0], [-2.0, 0.0]]
    data = get_dataset('cartpole',
                       args.save_dir,
                       u,
                       verbose=True,
                       seed=args.seed)

    x = torch.tensor(data['pixels'], dtype=torch.float32).to(device)
    test_x = torch.tensor(data['test_pixels'], dtype=torch.float32).to(device)
    next_x = torch.tensor(data['next_pixels'], dtype=torch.float32).to(device)
    test_next_x = torch.tensor(data['test_next_pixels'],
                               dtype=torch.float32).to(device)
    ctrl = torch.tensor(data['ctrls'], dtype=torch.float32).to(device)
    test_ctrl = torch.tensor(data['test_ctrls'],
                             dtype=torch.float32).to(device)

    # vanilla ae train loop
    stats = {'train_loss': [], 'test_loss': []}
    for step in tqdm(range(args.total_steps + 1)):

        # train step
        ixs = torch.randperm(x.shape[0])[:args.batch_size]
        loss = pixelhnn_loss(x[ixs], next_x[ixs], ctrl[ixs], model, device)
        loss.backward()
        optim.step()
        optim.zero_grad()

        stats['train_loss'].append(loss.item())
        if args.verbose and step % args.print_every == 0:
            # run validation
            test_ixs = torch.randperm(test_x.shape[0])[:args.batch_size]
            test_loss = pixelhnn_loss(test_x[test_ixs], test_next_x[test_ixs],
                                      test_ctrl[test_ixs], model, device)
            stats['test_loss'].append(test_loss.item())

            print("step {}, train_loss {:.4e}, test_loss {:.4e}".format(
                step, loss.item(), test_loss.item()))

    # this stuff was done because
    # the job kept being killed for memory use
    # the generators seem to kee that from happening
    # TODO: clean
    train_ind = list(range(0, x.shape[0], args.batch_size))
    train_ind.append(x.shape[0] - 1)

    train_dist1, train_dist2 = tee(
        pixelhnn_loss(x[i].unsqueeze(0), next_x[i].unsqueeze(
            0), ctrl[i].unsqueeze(0), model, device).detach().cpu().numpy()
        for i in train_ind)
    train_avg = sum(train_dist1) / x.shape[0]
    train_std = sum((v - train_avg)**2 for v in train_dist2) / x.shape[0]

    test_ind = list(range(0, test_x.shape[0], args.batch_size))
    test_ind.append(test_x.shape[0] - 1)

    test_dist1, test_dist2 = tee(
        pixelhnn_loss(test_x[i].unsqueeze(0), test_next_x[i].unsqueeze(
            0), test_ctrl[i].unsqueeze(0), model,
                      device).detach().cpu().numpy() for i in test_ind)
    test_avg = sum(test_dist1) / test_x.shape[0]
    test_std = sum((v - test_avg)**2 for v in test_dist2) / test_x.shape[0]

    print(
        'Final train loss {:.4e} +/- {:.4e}\nFinal test loss {:.4e} +/- {:.4e}'
        .format(train_avg, train_std, test_avg, test_std))
    return model, stats
Ejemplo n.º 5
0
def train(args):
    if torch.cuda.is_available() and not args.cpu:
        device = torch.device("cuda:0")
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        torch.cuda.empty_cache()
        print("Running on the GPU")
    else:
        device = torch.device("cpu")
        print("Running on the CPU")

    # set random seed
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    print("{} {}".format(args.folder, args.speed))
    print("Training scaled model:" if args.scaled else "Training noisy model:")
    print('{} pairs of coords in latent space '.format(args.latent_dim))

    #using universal autoencoder, pre-encode the training points
    autoencoder = MLPAutoencoder(args.input_dim_ae,
                                 args.hidden_dim,
                                 args.latent_dim * 2,
                                 nonlinearity='relu')
    full_model = PixelHNN(args.latent_dim * 2,
                          args.hidden_dim,
                          autoencoder=autoencoder,
                          nonlinearity=args.nonlinearity,
                          baseline=args.baseline)
    path = "{}/saved_models/{}.tar".format(args.save_dir, args.ae_path)
    full_model.load_state_dict(torch.load(path))
    full_model.eval()
    autoencoder_model = full_model.autoencoder

    # get dataset (no test data for now)
    data = get_dataset(args.folder,
                       args.speed,
                       scaled=args.scaled,
                       split=args.split_data,
                       experiment_dir=args.experiment_dir,
                       tensor=True)
    gcoords = autoencoder_model.encode(data).cpu().detach().numpy()
    x = torch.tensor(gcoords, dtype=torch.float, requires_grad=True)
    dx_np = full_model.time_derivative(
        torch.tensor(gcoords, dtype=torch.float,
                     requires_grad=True)).cpu().detach().numpy()
    dx = torch.tensor(dx_np, dtype=torch.float)

    nnmodel = MLP(args.input_dim, args.hidden_dim, args.output_dim)
    model = HNN(2, nnmodel)
    model.to(device)
    optim = torch.optim.Adam(model.parameters(),
                             args.learn_rate,
                             weight_decay=args.weight_decay)

    # vanilla ae train loop
    stats = {'train_loss': [], 'test_loss': []}
    for step in range(args.total_steps + 1):

        # train step
        ixs = torch.randperm(x.shape[0])[:args.batch_size]
        x_train, dxdt = x[ixs].to(device), dx[ixs].to(device)
        dxdt_hat = model.time_derivative(x_train)

        loss = L2_loss(dxdt, dxdt_hat)
        loss.backward()
        optim.step()
        optim.zero_grad()

        stats['train_loss'].append(loss.item())

        if step % args.print_every == 0:
            print("step {}, train_loss {:.4e}".format(step, loss.item()))

    # train_dist = hnn_ae_loss(x, x_next, model, return_scalar=False)
    # print('Final train loss {:.4e} +/- {:.4e}'
    #       .format(train_dist.mean().item(), train_dist.std().item() / np.sqrt(train_dist.shape[0])))
    return model
Ejemplo n.º 6
0
def train(args):
  # set random seed
  torch.manual_seed(args.seed)
  np.random.seed(args.seed)
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  print(device)
  # init model and optimizer
  if args.verbose:
    print("Training baseline model:" if args.baseline else "Training HNN model:")

  output_dim = args.input_dim if args.baseline else 2
  nn_model = MLPAutoencoder(args.input_dim, args.hidden_dim, args.latent_dim, args.nonlinearity)
  nn_model.to(device)
  model = HNN(args.input_dim, differentiable_model=nn_model,
            field_type=args.field_type, baseline=args.baseline, device=device)
  model.to(device)
  optim = torch.optim.Adam(model.parameters(), args.learn_rate, weight_decay=0)
  
  # arrange data
  X = np.load('statrectinputs.npy')
  Y = np.load('statrectoutputs.npy')
  Y[~np.isfinite(Y)] = 0
  xm, xd = give_min_and_dist(X)
  ym, yd= give_min_and_dist(Y)
  X = scale(X, xm, xd)
  Y = scale(Y, ym, yd)
  n_egs = X.shape[0]
  x = X[0:int(0.8*n_egs),:]
  test_x = torch.tensor(X[:-int(0.2*n_egs),:], requires_grad=True, dtype=torch.float32)
  dxdt = Y[0:int(0.8*n_egs),:]
  test_dxdt = torch.tensor(Y[:-int(0.2*n_egs),:])


  # vanilla train loop
  stats = {'train_loss': [], 'test_loss': []}
  for step in range(args.total_steps+1):

    # train step
    ixs = torch.randperm(x.shape[0])[:args.batch_size]
    x = torch.tensor(x[ixs], requires_grad=True, dtype=torch.float32)
    x.to(device)
    dxdt_hat = model.time_derivative(x)
    y = torch.tensor(dxdt[ixs])
    y.to(device)
    loss = L2_loss(y, dxdt_hat)
    loss.backward()
    grad = torch.cat([p.grad.flatten() for p in model.parameters()]).clone()
    optim.step() ; optim.zero_grad()

    # run test data
    test_ixs = torch.randperm(test_x.shape[0])[:args.batch_size]
    test_dxdt_hat = model.time_derivative(test_x[test_ixs])

    #test_dxdt_hat += args.input_noise * torch.randn(*test_x[test_ixs].shape) # add noise, maybe
    test_loss = L2_loss(test_dxdt[test_ixs], test_dxdt_hat)

    # logging
    stats['train_loss'].append(loss.item())
    stats['test_loss'].append(test_loss.item())
    if args.verbose and step % args.print_every == 0:
      print("step {}, train_loss {:.4e}, test_loss {:.4e}, grad norm {:.4e}, grad std {:.4e}"
          .format(step, loss.item(), test_loss.item(), grad@grad, grad.std()))
  ixs = torch.randperm(x.shape[0])[:10000]
  x = torch.tensor(x[ixs], requires_grad=True, dtype=torch.float32)
  x.to(device)
  enc = model.encoding(x).detach().numpy()
  print(x.shape)
  fig = plt.figure()
  ax = fig.add_subplot(111, projection='3d')
  x = x.detach().numpy()
  img = ax.scatter(enc[:,0], enc[:,3], enc[:,2], c=enc[:,1], cmap=plt.hot())
  fig.colorbar(img)
  plt.savefig('lrep.png')
  y0 = torch.tensor([0.4, 0.3, 1/np.sqrt(2), 1/np.sqrt(2)], dtype=torch.float32)
  update_fn = lambda t, y0: model_update(t, y0, model)
  orbit, settings = get_orbit(y0, t_points=10, t_span=[0, 10], update_fn=update_fn)
  print(orbit)
  plt.scatter(orbit[:,0], orbit[:, 1])
  plt.savefig('orbit.png')

  return model,  stats
Ejemplo n.º 7
0
def train(args):
    if torch.cuda.is_available() and not args.cpu:
        device = torch.device("cuda:0")
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        torch.cuda.empty_cache()
        print("Running on the GPU")
    else:
        device = torch.device("cpu")
        print("Running on the CPU")

    # set random seed
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # get dataset (no test data for now)
    angular_velo, acc_1, acc_2, sound = get_dataset_split(
        args.folder,
        args.speed,
        scaled=args.scaled,
        experiment_dir=args.experiment_dir,
        tensor=True)
    sub_col = {
        0: [angular_velo, 1, 'v'],
        1: [acc_1, 3, 'a1'],
        2: [acc_2, 3, 'a2'],
        3: [sound, 1, 's']
    }
    col2use = sub_col[args.sub_columns][0]

    # using universal autoencoder, pre-encode the training points
    autoencoder = MLPAutoencoder(sub_col[args.sub_columns][1],
                                 args.hidden_dim,
                                 args.latent_dim * 2,
                                 dropout_rate=args.dropout_rate_ae)
    full_model = PixelHNN(args.latent_dim * 2,
                          args.hidden_dim,
                          autoencoder=autoencoder,
                          nonlinearity=args.nonlinearity,
                          baseline=args.baseline,
                          dropout_rate=args.dropout_rate)
    path = "{}/saved_models/{}-{}.tar".format(args.save_dir, args.ae_path,
                                              sub_col[args.sub_columns][2])
    full_model.load_state_dict(torch.load(path))
    full_model.eval()
    autoencoder_model = full_model.autoencoder

    gcoords = autoencoder_model.encode(col2use).cpu().detach().numpy()
    x = torch.tensor(gcoords, dtype=torch.float, requires_grad=True)
    dx_np = full_model.time_derivative(
        torch.tensor(gcoords, dtype=torch.float,
                     requires_grad=True)).cpu().detach().numpy()
    dx = torch.tensor(dx_np, dtype=torch.float)

    nnmodel = MLP(args.input_dim, args.hidden_dim, args.output_dim)
    model = HNN(2, nnmodel)
    model.to(device)
    optim = torch.optim.Adam(model.parameters(),
                             args.learn_rate,
                             weight_decay=args.weight_decay)

    print("Data from {} {}, column: {}".format(args.folder, args.speed,
                                               sub_col[args.sub_columns][2]))

    # x = torch.tensor(col2use[:-1], dtype=torch.float)
    # x_next = torch.tensor(col2use[1:], dtype=torch.float)
    #
    # autoencoder = MLPAutoencoder(sub_col[args.sub_columns][1], args.hidden_dim, args.latent_dim * 2, dropout_rate=args.dropout_rate_ae)
    # model = PixelHNN(args.latent_dim * 2, args.hidden_dim,
    #                  autoencoder=autoencoder, nonlinearity=args.nonlinearity, baseline=args.baseline, dropout_rate=args.dropout_rate)
    # model.to(device)
    # optim = torch.optim.Adam(model.parameters(), args.learn_rate, weight_decay=args.weight_decay)

    # vanilla ae train loop
    stats = {'train_loss': []}
    for step in range(args.total_steps + 1):
        # train step
        ixs = torch.randperm(x.shape[0])[:args.batch_size]
        x_train, dxdt = x[ixs].to(device), dx[ixs].to(device)
        dxdt_hat = model.time_derivative(x_train)

        loss = L2_loss(dxdt, dxdt_hat)
        loss.backward()
        optim.step()
        optim.zero_grad()

        stats['train_loss'].append(loss.item())
        if step % args.print_every == 0:
            print("step {}, train_loss {:.4e}".format(step, loss.item()))

    # train_dist = hnn_ae_loss(x, x_next, model, return_scalar=False)
    # print('Final train loss {:.4e} +/- {:.4e}'
    #       .format(train_dist.mean().item(), train_dist.std().item() / np.sqrt(train_dist.shape[0])))
    return model