def init_model(): model = VRAE(args.rnn_size, args.rnn_size, args.n_features, args.latent_size, num_drivers, batch_size=args.batch_size) model.create_gradientfunctions(x_train, t_train, x_valid, t_valid) return model
layers['recog_mean'] = F.Linear(n_hidden_recog[-1], n_z) layers['recog_log_sigma'] = F.Linear(n_hidden_recog[-1], n_z) # Generating model. gen_layer_sizes = [(n_z, n_hidden_gen[0])] gen_layer_sizes += zip(n_hidden_gen[:-1], n_hidden_gen[1:]) gen_layer_sizes += [(n_hidden_gen[-1], train_x.shape[1])] layers['z'] = F.Linear(n_z, n_hidden_gen[0]) layers['gen_in_h'] = F.Linear(train_x.shape[1], n_hidden_gen[0], nobias=True) layers['gen_h_h'] = F.Linear(n_hidden_gen[0], n_hidden_gen[0]) layers['output'] = F.Linear(n_hidden_gen[-1], train_x.shape[1]) if args.init_from == "": model = VRAE(**layers) else: model = pickle.load(open(args.init_from)) # state pattern state_pattern = ['recog_h', 'gen_h'] if args.gpu >= 0: cuda.init(args.gpu) model.to_gpu() # use Adam optimizer = optimizers.Adam() optimizer.setup(model.collect_parameters())
layers['recog_mean'] = F.Linear(n_hidden_recog[-1], n_z) layers['recog_log_sigma'] = F.Linear(n_hidden_recog[-1], n_z) # Generating model. gen_layer_sizes = [(n_z, n_hidden_gen[0])] gen_layer_sizes += zip(n_hidden_gen[:-1], n_hidden_gen[1:]) gen_layer_sizes += [(n_hidden_gen[-1], train_x.shape[1])] layers['z'] = F.Linear(n_z, n_hidden_gen[0]) layers['gen_in_h'] = F.Linear(train_x.shape[1], n_hidden_gen[0], nobias=True) layers['gen_h_h'] = F.Linear(n_hidden_gen[0], n_hidden_gen[0]) layers['output'] = F.Linear(n_hidden_gen[-1], train_x.shape[1]) if args.init_from == "": model = VRAE(**layers) else: model = pickle.load(open(args.init_from)) # state pattern state_pattern = ['recog_h', 'gen_h'] if args.gpu >= 0: cuda.init(args.gpu) model.to_gpu() # use Adam optimizer = optimizers.Adam() optimizer.setup(model.collect_parameters()) total_losses = np.zeros(n_epochs, dtype=np.float32)
# Retrieved from: http://deeplearning.net/data/mnist/mnist.pkl.gz f = gzip.open('mnist.pkl.gz', 'rb') (x_train, t_train), (x_valid, t_valid), (x_test, t_test) = cPickle.load(f) f.close() """ path = "./" print "instantiating model" b1 = 0.05 b2 = 0.001 lr = 0.001 batch_size = 100 sigma_init = 0.01 num_inputs = 32000 model = VRAE(hu_encoder, hu_decoder, x_train, n_latent, b1, b2, lr, sigma_init, batch_size) batch_order = np.arange(int(model.N / model.batch_size)) epoch = 0 LB_list = [] if os.path.isfile(path + "params.pkl"): print "Restarting from earlier saved parameters!" model.load_parameters(path) LB_list = np.load(path + "LB_list.npy") epoch = len(LB_list) if __name__ == "__main__": print "iterating" while epoch < n_epochs:
if __name__ == "__main__": args = parse_args() save_path = os.path.join( "saved_weights", datetime.datetime.fromtimestamp( time.time()).strftime("%Y-%m-%d_%H:%M:%S")) os.makedirs(save_path) with open(os.path.join(save_path, 'args.pkl'), 'w') as f: pickle.dump(args, f) x_train, t_train, r_train, x_valid, t_valid, r_valid = load_data(args) num_drivers = np.max(t_train) + 1 model = VRAE(args.rnn_size, args.rnn_size, args.n_features, args.latent_size, num_drivers, batch_size=args.batch_size, lamda1=args.lamda1, lamda2=args.lamda2) batch_order = np.arange(x_train.shape[0] // model.batch_size + 1) val_batch_order = np.arange(x_valid.shape[0] // model.batch_size + 1) epoch = 0 LB_list = [] model.create_gradientfunctions(x_train, t_train, r_train, x_valid, t_valid, r_valid) print("iterating") while epoch < args.num_epochs: epoch += 1
# Retrieved from: http://deeplearning.net/data/mnist/mnist.pkl.gz f = gzip.open('mnist.pkl.gz', 'rb') (x_train, t_train), (x_valid, t_valid), (x_test, t_test) = cPickle.load(f) f.close() """ path = "./" print "instantiating model" b1 = 0.05 b2 = 0.001 lr = 0.001 batch_size = 100 sigma_init = 0.01 num_inputs = 32000 model = VRAE(hu_encoder, hu_decoder, x_train, n_latent, b1, b2, lr, sigma_init, batch_size) batch_order = np.arange(int(model.N / model.batch_size)) epoch = 0 LB_list = [] if os.path.isfile(path + "params.pkl"): print "Restarting from earlier saved parameters!" model.load_parameters(path) LB_list = np.load(path + "LB_list.npy") epoch = len(LB_list) if __name__ == "__main__": print "iterating" while epoch < n_epochs: epoch += 1
def train_model(): args = parse_arg() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") data_set = init_data(args.data_files, device) n_input = max([d.data.shape[1] for d in data_set]) num_data = len(data_set) args.input_dims = n_input with open('{}/args.pickle'.format(args.output_dir), 'wb') as f: pickle.dump(args, f) print('# GPU: {}'.format(device)) print('# dataset num: {}'.format(num_data)) print('# input dimensions: {}'.format(args.input_dims)) print('# latent dimensions: {}'.format(args.latent_dims)) print('# minibatch-size: {}'.format(args.batch_size)) print('# epoch: {}'.format(args.epoch)) print('') if args.load_model is not None: model = torch.load(args.load_model) model.eval() else: model = VRAE(args.input_dims, args.enc_states, args.latent_dims, args.dec_states, args.enc_layers, args.dec_layers, args.dropout_rate).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) num_train = int(num_data * 0.8) num_test = num_data - num_train train_dat, test_dat = tud.random_split(data_set, [num_train, num_test]) train_iter = tud.BatchSampler(tud.RandomSampler(range(len(train_dat))), batch_size=args.batch_size, drop_last=False) test_iter = tud.BatchSampler(tud.SequentialSampler(range(len(test_dat))), batch_size=args.batch_size, drop_last=False) if args.resume_from_checkpoint is not None: checkpoint = torch.load(args.resume_from_checkpoint) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) for epoch in range(args.epoch): train_loss = 0 for indices in train_iter: x_data, x_len = make_padded_sequence( [train_dat[idx] for idx in indices], device) optimizer.zero_grad() loss = model.loss(x_data, x_len, k=1) loss.backward() optimizer.step() train_loss += loss # evaluation test_loss = 0 with torch.no_grad(): for indices in test_iter: x_data, x_len = make_padded_sequence( [test_dat[idx] for idx in indices], device) test_loss += model.loss(x_data, x_len, k=10) output_log(epoch, train_loss / len(train_iter), test_loss / len(test_iter)) if (epoch + 1) % args.save_interval == 0: checkpoint = { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'train_loss': train_loss, 'test_loss': test_loss } checkpoint_path = '{}/{}_{}.checkpoint'.format( args.output_dir, args.base_file_name, epoch) torch.save(checkpoint, checkpoint_path) model_path = '{}/{}_final.model'.format(args.output_dir, args.base_file_name) torch.save(model, model_path)