Exemple #1
0
def train(args):
  # set random seed
  torch.manual_seed(args.seed)
  np.random.seed(args.seed)

  # init model and optimizer
  if args.verbose:
    print("Training baseline model:" if args.baseline else "Training HNN model:")
  S_net = MLP(int(args.input_dim/2), 140, int(args.input_dim/2)**2, args.nonlinearity)
  U_net = MLP(int(args.input_dim/2), 140, 1, args.nonlinearity)
  model = Lagrangian(int(args.input_dim/2), S_net, U_net, dt=1e-3)

  num_parm = get_model_parm_nums(model)
  print('model contains {} parameters'.format(num_parm))

  optim = torch.optim.Adam(model.parameters(), args.learn_rate, weight_decay=1e-4)

  # arrange data
  data = get_lag_dataset(seed=args.seed)
  x = torch.tensor( data['x'], requires_grad=False, dtype=torch.float32)
  # append zero control
  u = torch.zeros_like(x[:,0]).unsqueeze(-1)
  x = torch.cat((x, u), -1)

  test_x = torch.tensor( data['test_x'], requires_grad=False, dtype=torch.float32)
  # append zero control
  test_x = torch.cat((test_x, u), -1)

  dxdt = torch.Tensor(data['dx'])
  test_dxdt = torch.Tensor(data['test_dx'])

  # vanilla train loop
  stats = {'train_loss': [], 'test_loss': []}
  for step in range(args.total_steps+1):
    
    # train step
    dq, dp, du = model.time_derivative(x).split(1,1)
    dxdt_hat = torch.cat((dq, dp), -1)
    loss = L2_loss(dxdt, dxdt_hat)
    loss.backward() ; optim.step() ; optim.zero_grad()
    
    # run test data
    dq_test, dp_test, du_test = model.time_derivative(test_x).split(1,1)
    test_dxdt_hat = torch.cat((dq_test, dp_test), -1)
    test_loss = L2_loss(test_dxdt, test_dxdt_hat)

    # logging
    stats['train_loss'].append(loss.item())
    stats['test_loss'].append(test_loss.item())
    if args.verbose and step % args.print_every == 0:
      print("step {}, train_loss {:.4e}, test_loss {:.4e}".format(step, loss.item(), test_loss.item()))

  train_dq, train_dp, train_du = model.time_derivative(x).split(1,1)
  train_dxdt_hat = torch.cat((train_dq, train_dp), -1)
  train_dist = (dxdt - train_dxdt_hat)**2
  test_dq, test_dp, test_du = model.time_derivative(test_x).split(1,1)
  test_dxdt_hat = torch.cat((test_dq, test_dp), -1)
  test_dist = (test_dxdt - test_dxdt_hat)**2
  print('Final train loss {:.4e} +/- {:.4e}\nFinal test loss {:.4e} +/- {:.4e}'
    .format(train_dist.mean().item(), train_dist.std().item()/np.sqrt(train_dist.shape[0]),
            test_dist.mean().item(), test_dist.std().item()/np.sqrt(test_dist.shape[0])))

  return model, stats
Exemple #2
0
def train(args):
    # set random seed
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    device = torch.device(
        'cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu')

    # init model and optimizer
    if args.conv:
        autoencoder = ConvAutoencoder().to(device)
    else:
        autoencoder = MLP_VAE(args.input_dim,
                              args.hidden_dim,
                              args.latent_dim,
                              nonlinearity='relu').to(device)

    model = PixelSymODEN_R(int(args.latent_dim / 2),
                           autoencoder=autoencoder,
                           nonlinearity=args.nonlinearity,
                           dt=1e-3,
                           device=device)
    if args.verbose:
        print("Training baseline model:" if args.
              baseline else "Training HNN model:")

    num_parm = get_model_parm_nums(model)
    print('model contains {} parameters'.format(num_parm))

    optim = torch.optim.Adam(model.parameters(),
                             args.learn_rate,
                             weight_decay=1e-5)

    # get dataset
    data = get_dataset('pendulum', args.save_dir, verbose=True, seed=args.seed)

    x = torch.tensor(data['pixels'], dtype=torch.float32).to(device)
    test_x = torch.tensor(data['test_pixels'], dtype=torch.float32).to(device)
    next_x = torch.tensor(data['next_pixels'], dtype=torch.float32).to(device)
    test_next_x = torch.tensor(data['test_next_pixels'],
                               dtype=torch.float32).to(device)

    # vanilla ae train loop
    stats = {'train_loss': [], 'test_loss': []}
    for step in tqdm(range(args.total_steps + 1)):

        # train step
        ixs = torch.randperm(x.shape[0])[:args.batch_size]
        loss = pixelhnn_loss(x[ixs], next_x[ixs], model, device)
        loss.backward()
        optim.step()
        optim.zero_grad()

        stats['train_loss'].append(loss.item())
        if args.verbose and step % args.print_every == 0:
            # run validation
            test_ixs = torch.randperm(test_x.shape[0])[:args.batch_size]
            test_loss = pixelhnn_loss(test_x[test_ixs], test_next_x[test_ixs],
                                      model)
            stats['test_loss'].append(test_loss.item())

            print("step {}, train_loss {:.4e}, test_loss {:.4e}".format(
                step, loss.item(), test_loss.item()))

    # this stuff was done because
    # the job kept being killed for memory use
    # the generators seem to kee that from happening
    # TODO: clean
    train_ind = list(range(0, x.shape[0], args.batch_size))
    train_ind.append(x.shape[0] - 1)

    train_dist1, train_dist2 = tee(
        pixelhnn_loss(x[i].unsqueeze(0), next_x[i].unsqueeze(0), model,
                      device).detach().cpu().numpy() for i in train_ind)
    train_avg = sum(train_dist1) / x.shape[0]
    train_std = sum((v - train_avg)**2 for v in train_dist2) / x.shape[0]

    test_ind = list(range(0, test_x.shape[0], args.batch_size))
    test_ind.append(test_x.shape[0] - 1)

    test_dist1, test_dist2 = tee(
        pixelhnn_loss(test_x[i].unsqueeze(0), test_next_x[i].unsqueeze(0),
                      model, device).detach().cpu().numpy() for i in test_ind)
    test_avg = sum(test_dist1) / test_x.shape[0]
    test_std = sum((v - test_avg)**2 for v in test_dist2) / test_x.shape[0]

    print(
        'Final train loss {:.4e} +/- {:.4e}\nFinal test loss {:.4e} +/- {:.4e}'
        .format(train_avg, train_std, test_avg, test_std))

    return model, stats
Exemple #3
0
def train(args):
    device = torch.device(
        'cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu')
    # reproducibility: set random seed
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # init model and optimizer
    if args.verbose:
        print("Start training with num of points = {} and solver {}.".format(
            args.num_points, args.solver))

    if args.structure == False and args.baseline == True:
        nn_model = MLP(args.input_dim, 600, args.input_dim, args.nonlinearity)
        model = SymODEN_R(args.input_dim,
                          H_net=nn_model,
                          device=device,
                          baseline=True)
    elif args.structure == False and args.baseline == False:
        H_net = MLP(args.input_dim, 400, 1, args.nonlinearity)
        g_net = MLP(int(args.input_dim / 2), 200, int(args.input_dim / 2))
        model = SymODEN_R(args.input_dim,
                          H_net=H_net,
                          g_net=g_net,
                          device=device,
                          baseline=False)
    elif args.structure == True and args.baseline == False:
        M_net = MLP(int(args.input_dim / 2), 300, int(args.input_dim / 2))
        V_net = MLP(int(args.input_dim / 2), 50, 1)
        g_net = MLP(int(args.input_dim / 2), 200, int(args.input_dim / 2))
        model = SymODEN_R(args.input_dim,
                          M_net=M_net,
                          V_net=V_net,
                          g_net=g_net,
                          device=device,
                          baseline=False,
                          structure=True)
    else:
        raise RuntimeError(
            'argument *baseline* and *structure* cannot both be true')

    num_parm = get_model_parm_nums(model)
    print('model contains {} parameters'.format(num_parm))

    optim = torch.optim.Adam(model.parameters(),
                             args.learn_rate,
                             weight_decay=1e-4)

    data = get_dataset(seed=args.seed)

    # modified to use the hnn stuff
    x = torch.tensor(data['x'], requires_grad=True,
                     dtype=torch.float32)  # [1125, 2] Bx2
    # append zero control
    u = torch.zeros_like(x[:, 0]).unsqueeze(-1)
    x = torch.cat((x, u), -1)

    test_x = torch.tensor(data['test_x'],
                          requires_grad=True,
                          dtype=torch.float32)
    # append zero control
    test_x = torch.cat((test_x, u), -1)

    dxdt = torch.Tensor(data['dx'])  # [1125, 2] Bx2
    test_dxdt = torch.Tensor(data['test_dx'])

    # training loop
    stats = {'train_loss': [], 'test_loss': []}
    for step in range(args.total_steps + 1):
        # modified to match hnn
        dq, dp, du = model.time_derivative(x).split(1, 1)
        dxdt_hat = torch.cat((dq, dp), -1)

        loss = L2_loss(dxdt, dxdt_hat)
        loss.backward()
        optim.step()
        optim.zero_grad()

        # run test data
        dq_test, dp_test, du_test = model.time_derivative(test_x).split(1, 1)
        test_dxdt_hat = torch.cat((dq_test, dp_test), -1)
        test_loss = L2_loss(test_dxdt, test_dxdt_hat)

        # logging
        stats['train_loss'].append(loss.item())
        stats['test_loss'].append(test_loss.item())
        if args.verbose and step % args.print_every == 0:
            print("step {}, train_loss {:.4e}, test_loss {:.4e}".format(
                step, loss.item(), test_loss.item()))

    train_dq, train_dp, train_du = model.time_derivative(x).split(1, 1)
    train_dxdt_hat = torch.cat((train_dq, train_dp), -1)
    train_dist = (dxdt - train_dxdt_hat)**2
    test_dq, test_dp, test_du = model.time_derivative(test_x).split(1, 1)
    test_dxdt_hat = torch.cat((test_dq, test_dp), -1)
    test_dist = (test_dxdt - test_dxdt_hat)**2
    print(
        'Final train loss {:.4e} +/- {:.4e}\nFinal test loss {:.4e} +/- {:.4e}'
        .format(train_dist.mean().item(),
                train_dist.std().item() / np.sqrt(train_dist.shape[0]),
                test_dist.mean().item(),
                test_dist.std().item() / np.sqrt(test_dist.shape[0])))

    return model, stats