def eval_temporal(cfg, use_gpu, model_name): SEED = 19 ZDIMS = cfg['zdims'] FUTURE_DECODER = cfg['prediction_decoder'] TEMPORAL_WINDOW = cfg['time_window'] * 2 FUTURE_STEPS = cfg['prediction_steps'] NUM_FEATURES = cfg['num_features'] TEST_BATCH_SIZE = 64 PROJECT_PATH = cfg['project_path'] hidden_size_layer_1 = cfg['hidden_size_layer_1'] hidden_size_layer_2 = cfg['hidden_size_layer_2'] hidden_size_rec = cfg['hidden_size_rec'] hidden_size_pred = cfg['hidden_size_pred'] dropout_encoder = cfg['dropout_encoder'] dropout_rec = cfg['dropout_rec'] dropout_pred = cfg['dropout_pred'] filepath = PROJECT_PATH + 'model/' seq_len_half = int(TEMPORAL_WINDOW / 2) if use_gpu: torch.cuda.manual_seed(SEED) model = RNN_VAE(TEMPORAL_WINDOW, ZDIMS, NUM_FEATURES, FUTURE_DECODER, FUTURE_STEPS, hidden_size_layer_1, hidden_size_layer_2, hidden_size_rec, hidden_size_pred, dropout_encoder, dropout_rec, dropout_pred).cuda() else: model = RNN_VAE(TEMPORAL_WINDOW, ZDIMS, NUM_FEATURES, FUTURE_DECODER, FUTURE_STEPS) filepath model.load_state_dict( torch.load(cfg['project_path'] + '/' + 'model/best_model/' + model_name + '_' + cfg['Project'] + '.pkl')) model.eval() #toggle evaluation mode testset = SEQUENCE_DATASET(cfg['project_path'] + 'data/train/', data='test_seq.npy', train=False, temporal_window=TEMPORAL_WINDOW) test_loader = Data.DataLoader(testset, batch_size=TEST_BATCH_SIZE, shuffle=True, drop_last=True) plot_reconstruction(filepath, test_loader, seq_len_half, model, model_name, FUTURE_DECODER, FUTURE_STEPS) plot_loss(cfg, filepath, model_name) return
def eval_temporal(cfg, use_gpu, model_name, fixed): SEED = 19 ZDIMS = cfg['zdims'] FUTURE_DECODER = cfg['prediction_decoder'] TEMPORAL_WINDOW = cfg['time_window']*2 FUTURE_STEPS = cfg['prediction_steps'] NUM_FEATURES = cfg['num_features'] if fixed == False: NUM_FEATURES = NUM_FEATURES - 2 TEST_BATCH_SIZE = 64 PROJECT_PATH = cfg['project_path'] hidden_size_layer_1 = cfg['hidden_size_layer_1'] hidden_size_layer_2 = cfg['hidden_size_layer_2'] hidden_size_rec = cfg['hidden_size_rec'] hidden_size_pred = cfg['hidden_size_pred'] dropout_encoder = cfg['dropout_encoder'] dropout_rec = cfg['dropout_rec'] dropout_pred = cfg['dropout_pred'] softplus = cfg['softplus'] filepath = os.path.join(cfg['project_path'],"model") seq_len_half = int(TEMPORAL_WINDOW/2) if use_gpu: torch.cuda.manual_seed(SEED) model = RNN_VAE(TEMPORAL_WINDOW,ZDIMS,NUM_FEATURES,FUTURE_DECODER,FUTURE_STEPS, hidden_size_layer_1, hidden_size_layer_2, hidden_size_rec, hidden_size_pred, dropout_encoder, dropout_rec, dropout_pred, softplus).cuda() model.load_state_dict(torch.load(os.path.join(cfg['project_path'],"model","best_model",model_name+'_'+cfg['Project']+'.pkl'))) else: model = RNN_VAE(TEMPORAL_WINDOW,ZDIMS,NUM_FEATURES,FUTURE_DECODER,FUTURE_STEPS, hidden_size_layer_1, hidden_size_layer_2, hidden_size_rec, hidden_size_pred, dropout_encoder, dropout_rec, dropout_pred, softplus).to() model.load_state_dict(torch.load(os.path.join(cfg['project_path'],"model","best_model",model_name+'_'+cfg['Project']+'.pkl'), map_location=torch.device('cpu'))) model.eval() #toggle evaluation mode testset = SEQUENCE_DATASET(os.path.join(cfg['project_path'],"data", "train",""), data='test_seq.npy', train=False, temporal_window=TEMPORAL_WINDOW) test_loader = Data.DataLoader(testset, batch_size=TEST_BATCH_SIZE, shuffle=True, drop_last=True) plot_reconstruction(filepath, test_loader, seq_len_half, model, model_name, FUTURE_DECODER, FUTURE_STEPS) if use_gpu: plot_loss(cfg, filepath, model_name) else: plot_loss(cfg, filepath, model_name)
def rnn_model(config, model_name, pretrained_weights=False, pretrained_model=None): config_file = Path(config).resolve() cfg = read_config(config_file) print("Train RNN model!") if not os.path.exists(cfg['project_path'] + '/' + 'model/best_model'): os.mkdir(cfg['project_path'] + '/model/' + 'best_model') os.mkdir(cfg['project_path'] + '/model/' + 'best_model/snapshots') os.mkdir(cfg['project_path'] + '/model/' + 'model_losses') # make sure torch uses cuda for GPU computing use_gpu = torch.cuda.is_available() if use_gpu: print("Using CUDA") print('GPU active:', torch.cuda.is_available()) print('GPU used:', torch.cuda.get_device_name(0)) else: print("CUDA is not working!") raise NotImplementedError('GPU Computing is required!') """ HYPERPARAMTERS """ # General CUDA = use_gpu SEED = 19 TRAIN_BATCH_SIZE = cfg['batch_size'] TEST_BATCH_SIZE = int(cfg['batch_size'] / 4) EPOCHS = cfg['max_epochs'] ZDIMS = cfg['zdims'] BETA = cfg['beta'] SNAPSHOT = cfg['model_snapshot'] LEARNING_RATE = cfg['learning_rate'] NUM_FEATURES = cfg['num_features'] TEMPORAL_WINDOW = cfg['time_window'] * 2 FUTURE_DECODER = cfg['prediction_decoder'] FUTURE_STEPS = cfg['prediction_steps'] # RNN hidden_size_layer_1 = cfg['hidden_size_layer_1'] hidden_size_layer_2 = cfg['hidden_size_layer_2'] hidden_size_rec = cfg['hidden_size_rec'] hidden_size_pred = cfg['hidden_size_pred'] dropout_encoder = cfg['dropout_encoder'] dropout_rec = cfg['dropout_rec'] dropout_pred = cfg['dropout_pred'] # Loss MSE_REC_REDUCTION = cfg['mse_reconstruction_reduction'] MSE_PRED_REDUCTION = cfg['mse_prediction_reduction'] KMEANS_LOSS = cfg['kmeans_loss'] KMEANS_LAMBDA = cfg['kmeans_lambda'] KL_START = cfg['kl_start'] ANNEALTIME = cfg['annealtime'] anneal_function = cfg['anneal_function'] optimizer_scheduler = cfg['scheduler'] BEST_LOSS = 999999 convergence = 0 print('Latent Dimensions: %d, Beta: %d, lr: %.4f' % (ZDIMS, BETA, LEARNING_RATE)) # simple logging of diverse losses train_losses = [] test_losses = [] kmeans_losses = [] kl_losses = [] weight_values = [] mse_losses = [] fut_losses = [] torch.manual_seed(SEED) if CUDA: torch.cuda.manual_seed(SEED) model = RNN_VAE(TEMPORAL_WINDOW, ZDIMS, NUM_FEATURES, FUTURE_DECODER, FUTURE_STEPS, hidden_size_layer_1, hidden_size_layer_2, hidden_size_rec, hidden_size_pred, dropout_encoder, dropout_rec, dropout_pred).cuda() if pretrained_weights: if os.path.exists(cfg['project_path'] + '/' + 'model/' + 'pretrained_model/' + pretrained_model + '.pkl'): print("Loading pretrained Model: %s" % pretrained_model) model.load_state_dict( torch.load(cfg['project_path'] + '/' + 'model/' + 'pretrained_model/' + pretrained_model + '.pkl'), strict=False) """ DATASET """ trainset = SEQUENCE_DATASET(cfg['project_path'] + 'data/train/', data='train_seq.npy', train=True, temporal_window=TEMPORAL_WINDOW) testset = SEQUENCE_DATASET(cfg['project_path'] + 'data/train/', data='test_seq.npy', train=False, temporal_window=TEMPORAL_WINDOW) train_loader = Data.DataLoader(trainset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, drop_last=True) test_loader = Data.DataLoader(testset, batch_size=TEST_BATCH_SIZE, shuffle=True, drop_last=True) optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, amsgrad=True) if optimizer_scheduler: scheduler = StepLR(optimizer, step_size=100, gamma=0.2, last_epoch=-1) else: scheduler = StepLR(optimizer, step_size=100, gamma=0, last_epoch=-1) for epoch in range(1, EPOCHS): print('Epoch: %d' % epoch) print('Train: ') weight, train_loss, km_loss, kl_loss, mse_loss, fut_loss = train( train_loader, epoch, model, optimizer, anneal_function, BETA, KL_START, ANNEALTIME, TEMPORAL_WINDOW, FUTURE_DECODER, FUTURE_STEPS, scheduler, MSE_REC_REDUCTION, MSE_PRED_REDUCTION, KMEANS_LOSS, KMEANS_LAMBDA, TRAIN_BATCH_SIZE) print('Test: ') current_loss, test_loss, test_list = test( test_loader, epoch, model, optimizer, BETA, weight, TEMPORAL_WINDOW, MSE_REC_REDUCTION, KMEANS_LOSS, KMEANS_LAMBDA, FUTURE_DECODER, TEST_BATCH_SIZE) for param_group in optimizer.param_groups: print('lr: {}'.format(param_group['lr'])) # logging losses train_losses.append(train_loss) test_losses.append(test_loss) kmeans_losses.append(km_loss) kl_losses.append(kl_loss) weight_values.append(weight) mse_losses.append(mse_loss) fut_losses.append(fut_loss) # save best model if weight > 0.99 and current_loss <= BEST_LOSS: BEST_LOSS = current_loss print("Saving model!\n") torch.save( model.state_dict(), cfg['project_path'] + '/' + 'model/' + 'best_model' + '/' + model_name + '_' + cfg['Project'] + '.pkl') convergence = 0 else: convergence += 1 if epoch % SNAPSHOT == 0: print("Saving model snapshot!\n") torch.save( model.state_dict(), cfg['project_path'] + '/' + 'model/' + 'best_model' + '/snapshots/' + model_name + '_' + cfg['Project'] + '_epoch_' + str(epoch) + '.pkl') if convergence > cfg['model_convergence']: print( 'Model converged. Please check your model with vame.evaluate_model(). \n' 'You can also re-run vame.rnn_model() to further improve your model. \n' 'Hint: Set "model_convergence" in your config.yaml to a higher value. \n' '\n' 'Next: \n' 'Use vame.behavior_segmentation() to identify behavioral motifs in your dataset!' ) #return break # save logged losses np.save( cfg['project_path'] + '/' + 'model/' + 'model_losses' + '/train_losses_' + model_name, train_losses) np.save( cfg['project_path'] + '/' + 'model/' + 'model_losses' + '/test_losses_' + model_name, test_losses) np.save( cfg['project_path'] + '/' + 'model/' + 'model_losses' + '/kmeans_losses_' + model_name, kmeans_losses) np.save( cfg['project_path'] + '/' + 'model/' + 'model_losses' + '/kl_losses_' + model_name, kl_losses) np.save( cfg['project_path'] + '/' + 'model/' + 'model_losses' + '/weight_values_' + model_name, weight_values) np.save( cfg['project_path'] + '/' + 'model/' + 'model_losses' + '/mse_train_losses_' + model_name, mse_losses) np.save( cfg['project_path'] + '/' + 'model/' + 'model_losses' + '/mse_test_losses_' + model_name, current_loss) np.save( cfg['project_path'] + '/' + 'model/' + 'model_losses' + '/fut_losses_' + model_name, fut_losses) if convergence < cfg['model_convergence']: print( 'Model seemed to have not reached convergence. You may want to check your model \n' 'with vame.evaluate_model(). If your satisfied you can continue with \n' 'Use vame.behavior_segmentation() to identify behavioral motifs!\n\n' 'OPTIONAL: You can re-run vame.rnn_model() to improve performance.' )
def train_model(config): config_file = Path(config).resolve() cfg = read_config(config_file) legacy = cfg['legacy'] model_name = cfg['model_name'] pretrained_weights = cfg['pretrained_weights'] pretrained_model = cfg['pretrained_model'] fixed = cfg['egocentric_data'] print("Train Variational Autoencoder - model name: %s \n" % model_name) if not os.path.exists( os.path.join(cfg['project_path'], 'model', 'best_model', "")): os.mkdir(os.path.join(cfg['project_path'], 'model', 'best_model', "")) os.mkdir( os.path.join(cfg['project_path'], 'model', 'best_model', 'snapshots', "")) os.mkdir(os.path.join(cfg['project_path'], 'model', 'model_losses', "")) # make sure torch uses cuda for GPU computing use_gpu = torch.cuda.is_available() if use_gpu: print("Using CUDA") print('GPU active:', torch.cuda.is_available()) print('GPU used: ', torch.cuda.get_device_name(0)) else: torch.device("cpu") print("warning, a GPU was not found... proceeding with CPU (slow!) \n") #raise NotImplementedError('GPU Computing is required!') """ HYPERPARAMTERS """ # General CUDA = use_gpu SEED = 19 TRAIN_BATCH_SIZE = cfg['batch_size'] TEST_BATCH_SIZE = int(cfg['batch_size'] / 4) EPOCHS = cfg['max_epochs'] ZDIMS = cfg['zdims'] BETA = cfg['beta'] SNAPSHOT = cfg['model_snapshot'] LEARNING_RATE = cfg['learning_rate'] NUM_FEATURES = cfg['num_features'] if fixed == False: NUM_FEATURES = NUM_FEATURES - 2 TEMPORAL_WINDOW = cfg['time_window'] * 2 FUTURE_DECODER = cfg['prediction_decoder'] FUTURE_STEPS = cfg['prediction_steps'] # RNN hidden_size_layer_1 = cfg['hidden_size_layer_1'] hidden_size_layer_2 = cfg['hidden_size_layer_2'] hidden_size_rec = cfg['hidden_size_rec'] hidden_size_pred = cfg['hidden_size_pred'] dropout_encoder = cfg['dropout_encoder'] dropout_rec = cfg['dropout_rec'] dropout_pred = cfg['dropout_pred'] noise = cfg['noise'] scheduler_step_size = cfg['scheduler_step_size'] softplus = cfg['softplus'] # Loss MSE_REC_REDUCTION = cfg['mse_reconstruction_reduction'] MSE_PRED_REDUCTION = cfg['mse_prediction_reduction'] KMEANS_LOSS = cfg['kmeans_loss'] KMEANS_LAMBDA = cfg['kmeans_lambda'] KL_START = cfg['kl_start'] ANNEALTIME = cfg['annealtime'] anneal_function = cfg['anneal_function'] optimizer_scheduler = cfg['scheduler'] BEST_LOSS = 999999 convergence = 0 print( 'Latent Dimensions: %d, Time window: %d, Batch Size: %d, Beta: %d, lr: %.4f\n' % (ZDIMS, cfg['time_window'], TRAIN_BATCH_SIZE, BETA, LEARNING_RATE)) # simple logging of diverse losses train_losses = [] test_losses = [] kmeans_losses = [] kl_losses = [] weight_values = [] mse_losses = [] fut_losses = [] torch.manual_seed(SEED) if legacy == False: RNN = RNN_VAE else: RNN = RNN_VAE_LEGACY if CUDA: torch.cuda.manual_seed(SEED) model = RNN(TEMPORAL_WINDOW, ZDIMS, NUM_FEATURES, FUTURE_DECODER, FUTURE_STEPS, hidden_size_layer_1, hidden_size_layer_2, hidden_size_rec, hidden_size_pred, dropout_encoder, dropout_rec, dropout_pred, softplus).cuda() else: #cpu support ... torch.cuda.manual_seed(SEED) model = RNN(TEMPORAL_WINDOW, ZDIMS, NUM_FEATURES, FUTURE_DECODER, FUTURE_STEPS, hidden_size_layer_1, hidden_size_layer_2, hidden_size_rec, hidden_size_pred, dropout_encoder, dropout_rec, dropout_pred, softplus).to() if pretrained_weights: if os.path.exists( os.path.join(cfg['project_path'], 'model', 'best_model', pretrained_model + '_' + cfg['Project'] + '.pkl')): print("Loading pretrained weights from model: %s\n" % pretrained_model) model.load_state_dict( torch.load( os.path.join( cfg['project_path'], 'model', 'best_model', pretrained_model + '_' + cfg['Project'] + '.pkl'))) KL_START = 0 ANNEALTIME = 1 """ DATASET """ trainset = SEQUENCE_DATASET(os.path.join(cfg['project_path'], "data", "train", ""), data='train_seq.npy', train=True, temporal_window=TEMPORAL_WINDOW) testset = SEQUENCE_DATASET(os.path.join(cfg['project_path'], "data", "train", ""), data='test_seq.npy', train=False, temporal_window=TEMPORAL_WINDOW) train_loader = Data.DataLoader(trainset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, drop_last=True) test_loader = Data.DataLoader(testset, batch_size=TEST_BATCH_SIZE, shuffle=True, drop_last=True) optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, amsgrad=True) if optimizer_scheduler: print('Scheduler step size: %d, Scheduler gamma: %.2f\n' % (scheduler_step_size, cfg['scheduler_gamma'])) # Thanks to @alexcwsmith for the optimized scheduler contribution scheduler = ReduceLROnPlateau(optimizer, 'min', factor=cfg['scheduler_gamma'], patience=cfg['scheduler_step_size'], threshold=1e-3, threshold_mode='rel', verbose=True) else: scheduler = StepLR(optimizer, step_size=scheduler_step_size, gamma=1, last_epoch=-1) print("Start training... ") for epoch in range(1, EPOCHS): print("Epoch: %d" % epoch) weight, train_loss, km_loss, kl_loss, mse_loss, fut_loss = train( train_loader, epoch, model, optimizer, anneal_function, BETA, KL_START, ANNEALTIME, TEMPORAL_WINDOW, FUTURE_DECODER, FUTURE_STEPS, scheduler, MSE_REC_REDUCTION, MSE_PRED_REDUCTION, KMEANS_LOSS, KMEANS_LAMBDA, TRAIN_BATCH_SIZE, noise) current_loss, test_loss, test_list = test( test_loader, epoch, model, optimizer, BETA, weight, TEMPORAL_WINDOW, MSE_REC_REDUCTION, KMEANS_LOSS, KMEANS_LAMBDA, FUTURE_DECODER, TEST_BATCH_SIZE) # logging losses train_losses.append(train_loss) test_losses.append(test_loss) kmeans_losses.append(km_loss) kl_losses.append(kl_loss) weight_values.append(weight) mse_losses.append(mse_loss) fut_losses.append(fut_loss) # save best model if weight > 0.99 and current_loss <= BEST_LOSS: BEST_LOSS = current_loss print("Saving model!") if use_gpu: torch.save( model.state_dict(), os.path.join(cfg['project_path'], "model", "best_model", model_name + '_' + cfg['Project'] + '.pkl')) else: torch.save( model.state_dict(), os.path.join(cfg['project_path'], "model", "best_model", model_name + '_' + cfg['Project'] + '.pkl')) convergence = 0 else: convergence += 1 if epoch % SNAPSHOT == 0: print("Saving model snapshot!\n") torch.save( model.state_dict(), os.path.join( cfg['project_path'], 'model', 'best_model', 'snapshots', model_name + '_' + cfg['Project'] + '_epoch_' + str(epoch) + '.pkl')) if convergence > cfg['model_convergence']: print('Finished training...') print( 'Model converged. Please check your model with vame.evaluate_model(). \n' 'You can also re-run vame.trainmodel() to further improve your model. \n' 'Make sure to set _pretrained_weights_ in your config.yaml to "true" \n' 'and plug your current model name into _pretrained_model_. \n' 'Hint: Set "model_convergence" in your config.yaml to a higher value. \n' '\n' 'Next: \n' 'Use vame.pose_segmentation() to identify behavioral motifs in your dataset!' ) #return break # save logged losses np.save( os.path.join(cfg['project_path'], 'model', 'model_losses', 'train_losses_' + model_name), train_losses) np.save( os.path.join(cfg['project_path'], 'model', 'model_losses', 'test_losses_' + model_name), test_losses) np.save( os.path.join(cfg['project_path'], 'model', 'model_losses', 'kmeans_losses_' + model_name), kmeans_losses) np.save( os.path.join(cfg['project_path'], 'model', 'model_losses', 'kl_losses_' + model_name), kl_losses) np.save( os.path.join(cfg['project_path'], 'model', 'model_losses', 'weight_values_' + model_name), weight_values) np.save( os.path.join(cfg['project_path'], 'model', 'model_losses', 'mse_train_losses_' + model_name), mse_losses) np.save( os.path.join(cfg['project_path'], 'model', 'model_losses', 'mse_test_losses_' + model_name), current_loss) np.save( os.path.join(cfg['project_path'], 'model', 'model_losses', 'fut_losses_' + model_name), fut_losses) print("\n") if convergence < cfg['model_convergence']: print('Finished training...') print( 'Model seems to have not reached convergence. You may want to check your model \n' 'with vame.evaluate_model(). If your satisfied you can continue. \n' 'Use vame.pose_segmentation() to identify behavioral motifs! \n' 'OPTIONAL: You can re-run vame.train_model() to improve performance.' )