def train_and_infer_map( self, train_input: np.ndarray, train_output: np.ndarray, test_helpers: pd.DataFrame, num_features: int, args: Any, ) -> None: """Train and test the model on different prediction horizons for map based Nearest Neighbors. Args: train_input (numpy array): Train input data train_output (numpy array): Train ground truth data test_helpers (pandas Dataframe): Test map helpers num_features: Number of input features args: Arguments passed to runNNBaselines.py """ # Create a temporary directory where forecasted trajectories for all the batches will be saved temp_save_dir = tempfile.mkdtemp() print( f"Forecasted trajectories will be saved in {args.traj_save_path} ..." ) # Train and Test inputs for NN train_num_tracks = train_input.shape[0] train_input = train_input.reshape( (train_num_tracks, args.obs_len * num_features), order="F") # Get test centerline test_centerlines = test_helpers["CANDIDATE_CENTERLINES"].values test_nt = test_helpers["CANDIDATE_NT_DISTANCES"].values test_references = test_helpers["CANDIDATE_DELTA_REFERENCES"].values test_seq_ids = test_helpers["SEQUENCE"].values test_num_tracks = test_nt.shape[0] for curr_pred_horizon in PREDICTION_HORIZONS: grid_search = baseline_utils.get_model(self, train_input, train_output, args, curr_pred_horizon) print("Model obtained, now starting inference ...") Parallel( n_jobs=-2, verbose=5 )(delayed(self.infer_and_save_traj_map)( grid_search, train_output, test_nt[i:min(i + args.joblib_batch_size, test_num_tracks)], test_centerlines[i:min(i + args.joblib_batch_size, test_num_tracks )], test_references[i:min(i + args.joblib_batch_size, test_num_tracks )], test_seq_ids[i:min(i + args.joblib_batch_size, test_num_tracks)], start_idx=i, args=args, num_features=num_features, horizon=curr_pred_horizon, save_dir=temp_save_dir, ) for i in range(0, test_num_tracks, args.joblib_batch_size)) baseline_utils.merge_saved_traj(temp_save_dir, args.traj_save_path) shutil.rmtree(temp_save_dir)
def main(): """Main.""" args = parse_arguments() if not baseline_utils.validate_args(args): return print(f"Using all ({joblib.cpu_count()}) CPUs....") if use_cuda: print(f"Using all ({torch.cuda.device_count()}) GPUs...") model_utils = ModelUtils() # key for getting feature set # Get features if args.use_map and args.use_social: baseline_key = "map_social" elif args.use_map: baseline_key = "map" elif args.use_social: baseline_key = "social" else: baseline_key = "none" # Get data data_dict = baseline_utils.get_data(args, baseline_key) # Get model criterion = nn.MSELoss() encoder = EncoderRNN( input_size=len(baseline_utils.BASELINE_INPUT_FEATURES[baseline_key])) decoder = DecoderRNN(output_size=2) if use_cuda: encoder = nn.DataParallel(encoder) decoder = nn.DataParallel(decoder) encoder.to(device) decoder.to(device) encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=args.lr) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=args.lr) # If model_path provided, resume from saved checkpoint if args.model_path is not None and os.path.isfile(args.model_path): epoch, rollout_len, _ = model_utils.load_checkpoint( args.model_path, encoder, decoder, encoder_optimizer, decoder_optimizer) start_epoch = epoch + 1 start_rollout_idx = ROLLOUT_LENS.index(rollout_len) + 1 else: start_epoch = 0 start_rollout_idx = 0 if not args.test: # Tensorboard logger log_dir = os.path.join(os.getcwd(), "lstm_logs", baseline_key) # Get PyTorch Dataset train_dataset = LSTMDataset(data_dict, args, "train") val_dataset = LSTMDataset(data_dict, args, "val") # Setting Dataloaders train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.train_batch_size, shuffle=True, drop_last=False, collate_fn=model_utils.my_collate_fn, ) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=args.val_batch_size, drop_last=False, shuffle=False, collate_fn=model_utils.my_collate_fn, ) print("Training begins ...") decrement_counter = 0 epoch = start_epoch global_start_time = time.time() for i in range(start_rollout_idx, len(ROLLOUT_LENS)): rollout_len = ROLLOUT_LENS[i] logger = Logger(log_dir, name="{}".format(rollout_len)) best_loss = float("inf") prev_loss = best_loss while epoch < args.end_epoch: start = time.time() train( train_loader, epoch, criterion, logger, encoder, decoder, encoder_optimizer, decoder_optimizer, model_utils, rollout_len, ) end = time.time() print( f"Training epoch completed in {(end - start) / 60.0} mins, Total time: {(end - global_start_time) / 60.0} mins" ) epoch += 1 if epoch % 5 == 0: start = time.time() prev_loss, decrement_counter = validate( val_loader, epoch, criterion, logger, encoder, decoder, encoder_optimizer, decoder_optimizer, model_utils, prev_loss, decrement_counter, rollout_len, ) end = time.time() print( f"Validation completed in {(end - start) / 60.0} mins, Total time: {(end - global_start_time) / 60.0} mins" ) # If val loss increased 3 times consecutively, go to next rollout length if decrement_counter > 2: break else: start_time = time.time() temp_save_dir = tempfile.mkdtemp() test_size = data_dict["test_input"].shape[0] test_data_subsets = baseline_utils.get_test_data_dict_subset( data_dict, args) # test_batch_size should be lesser than joblib_batch_size Parallel(n_jobs=1, verbose=2)( delayed(infer_helper)(test_data_subsets[i], i, encoder, decoder, model_utils, temp_save_dir) for i in range(0, test_size, args.joblib_batch_size)) baseline_utils.merge_saved_traj(temp_save_dir, args.traj_save_path) shutil.rmtree(temp_save_dir) end = time.time() print(f"Test completed in {(end - start_time) / 60.0} mins") print(f"Forecasted Trajectories saved at {args.traj_save_path}")
def train_and_infer_absolute( self, train_input: np.ndarray, train_output: np.ndarray, test_input: np.ndarray, test_helpers: pd.DataFrame, num_features: int, args: Any, ) -> None: """Train and test the model on different prediction horizons for non-map baselines. Args: train_input (numpy array): Train input data train_output (numpy array): Train ground truth data test_input (numpy array): Test input data test_helpers (pandas Dataframe): Test map helpers num_features: Number of input features args (Argparse): Config parameters """ # Create a temporary directory where forecasted trajectories for all the batches will be saved temp_save_dir = tempfile.mkdtemp() print( f"Forecasted trajectories will be saved in {args.traj_save_path} ..." ) # Train and Test inputs for NN test_num_tracks = test_input.shape[0] test_input = test_input.reshape( (test_num_tracks, args.obs_len * num_features), order="F") train_num_tracks = train_input.shape[0] train_input = train_input.reshape( (train_num_tracks, args.obs_len * num_features), order="F") test_references = test_helpers["DELTA_REFERENCE"].values test_translation = (test_helpers["TRANSLATION"].values if args.normalize else None) test_rotation = test_helpers[ "ROTATION"].values if args.normalize else None test_seq_ids = test_helpers["SEQUENCE"].values # Run experiments over different prediction horizons for curr_pred_horizon in PREDICTION_HORIZONS: grid_search = baseline_utils.get_model(self, train_input, train_output, args, curr_pred_horizon) print("Model obtained, now starting inference ...") Parallel( n_jobs=-2, verbose=5 )(delayed(self.infer_and_save_traj_absolute)( grid_search, train_output, test_input[i:min(i + args.joblib_batch_size, test_num_tracks)], test_references[i:min(i + args.joblib_batch_size, test_num_tracks )] if args.use_delta else None, test_seq_ids[i:min(i + args.joblib_batch_size, test_num_tracks)], test_translation[i:min(i + args.joblib_batch_size, test_num_tracks )] if args.normalize else None, test_rotation[i:min(i + args.joblib_batch_size, test_num_tracks )] if args.normalize else None, start_idx=i, args=args, num_features=num_features, horizon=curr_pred_horizon, save_dir=temp_save_dir, ) for i in range(0, test_num_tracks, args.joblib_batch_size)) baseline_utils.merge_saved_traj(temp_save_dir, args.traj_save_path) shutil.rmtree(temp_save_dir)