def main(): args = parse_arguments() model_utils = ModelUtils() if args.use_map and args.use_social: baseline_key = "map_social" elif args.use_map: baseline_key = "map" elif args.use_social: baseline_key = "social" else: baseline_key = "none" data_dict = baseline_utils.get_data(args, baseline_key) criterion = nn.MSELoss() encoder = EncoderRNN(ins=len(baseline_utils.BASELINE_INPUT_FEATURES[baseline_key])) decoder = DecoderRNN(ops=2) if use_cuda: encoder = nn.DataParallel(encoder) decoder = nn.DataParallel(decoder) encoder.to(device) decoder.to(device) encoder_optim = torch.optim.Adam(encoder.parameters(), lr=args.lr) decoder_optim = torch.optim.Adam(decoder.parameters(), lr=args.lr) rollout_id = 0 log_dir = os.path.join(os.getcwd(), "lstm_logs", baseline_key) train_dataset = LSTMDataset(data_dict, args, "train") val_dataset = LSTMDataset(data_dict, args, "val") train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.train_batch_size, shuffle=True, drop_last=False, collate_fn=model_utils.my_collate_fn) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.val_batch_size, drop_last=False, shuffle=True, collate_fn=model_utils.my_collate_fn,) epoch = 0 for i in range(rollout_id, len(ROLLOUT_LENS)): rollout_len = ROLLOUT_LENS[i] logger = Logger(log_dir, name="{}".format(rollout_len)) while epoch < args.end_epoch: start = time.time() train(train_loader, epoch, criterion, logger, encoder, decoder, encoder_optim, decoder_optim, model_utils, rollout_len) epoch += 1 if epoch % 5 == 0: val_loss = validate(val_loader, epoch, criterion, logger, encoder, decoder, encoder_optim, decoder_optim, model_utils, rollout_len) print("Validation loss =", val_loss, "after", epoch, "epochs")
def main(): """Main.""" args = parse_arguments() if not baseline_utils.validate_args(args): return print(f"Using all ({joblib.cpu_count()}) CPUs....") if use_cuda: print(f"Using all ({torch.cuda.device_count()}) GPUs...") model_utils = ModelUtils() # key for getting feature set # Get features if args.use_map and args.use_social: baseline_key = "map_social" elif args.use_map: baseline_key = "map" elif args.use_social: baseline_key = "social" else: baseline_key = "none" # Get data data_dict = baseline_utils.get_data(args, baseline_key) # Get model criterion = nn.MSELoss() encoder = EncoderRNN( input_size=len(baseline_utils.BASELINE_INPUT_FEATURES[baseline_key])) decoder = DecoderRNN(output_size=2) if use_cuda: encoder = nn.DataParallel(encoder) decoder = nn.DataParallel(decoder) encoder.to(device) decoder.to(device) encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=args.lr) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=args.lr) # If model_path provided, resume from saved checkpoint if args.model_path is not None and os.path.isfile(args.model_path): epoch, rollout_len, _ = model_utils.load_checkpoint( args.model_path, encoder, decoder, encoder_optimizer, decoder_optimizer) start_epoch = epoch + 1 start_rollout_idx = ROLLOUT_LENS.index(rollout_len) + 1 else: start_epoch = 0 start_rollout_idx = 0 if not args.test: # Tensorboard logger log_dir = os.path.join(os.getcwd(), "lstm_logs", baseline_key) # Get PyTorch Dataset train_dataset = LSTMDataset(data_dict, args, "train") val_dataset = LSTMDataset(data_dict, args, "val") # Setting Dataloaders train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.train_batch_size, shuffle=True, drop_last=False, collate_fn=model_utils.my_collate_fn, ) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=args.val_batch_size, drop_last=False, shuffle=False, collate_fn=model_utils.my_collate_fn, ) print("Training begins ...") decrement_counter = 0 epoch = start_epoch global_start_time = time.time() for i in range(start_rollout_idx, len(ROLLOUT_LENS)): rollout_len = ROLLOUT_LENS[i] logger = Logger(log_dir, name="{}".format(rollout_len)) best_loss = float("inf") prev_loss = best_loss while epoch < args.end_epoch: start = time.time() train( train_loader, epoch, criterion, logger, encoder, decoder, encoder_optimizer, decoder_optimizer, model_utils, rollout_len, ) end = time.time() print( f"Training epoch completed in {(end - start) / 60.0} mins, Total time: {(end - global_start_time) / 60.0} mins" ) epoch += 1 if epoch % 5 == 0: start = time.time() prev_loss, decrement_counter = validate( val_loader, epoch, criterion, logger, encoder, decoder, encoder_optimizer, decoder_optimizer, model_utils, prev_loss, decrement_counter, rollout_len, ) end = time.time() print( f"Validation completed in {(end - start) / 60.0} mins, Total time: {(end - global_start_time) / 60.0} mins" ) # If val loss increased 3 times consecutively, go to next rollout length if decrement_counter > 2: break else: start_time = time.time() temp_save_dir = tempfile.mkdtemp() test_size = data_dict["test_input"].shape[0] test_data_subsets = baseline_utils.get_test_data_dict_subset( data_dict, args) # test_batch_size should be lesser than joblib_batch_size Parallel(n_jobs=1, verbose=2)( delayed(infer_helper)(test_data_subsets[i], i, encoder, decoder, model_utils, temp_save_dir) for i in range(0, test_size, args.joblib_batch_size)) baseline_utils.merge_saved_traj(temp_save_dir, args.traj_save_path) shutil.rmtree(temp_save_dir) end = time.time() print(f"Test completed in {(end - start_time) / 60.0} mins") print(f"Forecasted Trajectories saved at {args.traj_save_path}")
def infer_map( test_loader: torch.utils.data.DataLoader, encoder: EncoderRNN, decoder: DecoderRNN, start_idx: int, forecasted_save_dir: str, model_utils: ModelUtils, ): """Infer function for map-based LSTM baselines and save the forecasted trajectories. Args: test_loader: DataLoader for the test set encoder: Encoder network instance decoder: Decoder network instance start_idx: start index for the current joblib batch forecasted_save_dir: Directory where forecasted trajectories are to be saved model_utils: ModelUtils instance """ args = parse_arguments() global best_loss forecasted_trajectories = {} for i, (_input, target, helpers) in enumerate(test_loader): _input = _input.to(device) batch_helpers = list(zip(*helpers)) helpers_dict = {} for k, v in config.LSTM_HELPER_DICT_IDX.items(): helpers_dict[k] = batch_helpers[v] # Set to eval mode encoder.eval() decoder.eval() # Encoder batch_size = _input.shape[0] input_length = _input.shape[1] # Iterate over every element in the batch for batch_idx in range(batch_size): num_candidates = len( helpers_dict["CANDIDATE_CENTERLINES"][batch_idx]) curr_centroids = helpers_dict["CENTROIDS"][batch_idx] seq_id = int(helpers_dict["SEQ_PATHS"][batch_idx]) abs_outputs = [] # Predict using every centerline candidate for the current trajectory for candidate_idx in range(num_candidates): curr_centerline = helpers_dict["CANDIDATE_CENTERLINES"][ batch_idx][candidate_idx] curr_nt_dist = helpers_dict["CANDIDATE_NT_DISTANCES"][ batch_idx][candidate_idx] _input = torch.FloatTensor( np.expand_dims(curr_nt_dist[:args.obs_len].astype(float), 0)).to(device) # Initialize encoder hidden state encoder_hidden = model_utils.init_hidden( 1, encoder.module.hidden_size if use_cuda else encoder.hidden_size) # Encode observed trajectory for ei in range(input_length): encoder_input = _input[:, ei, :] encoder_hidden = encoder(encoder_input, encoder_hidden) # Initialize decoder input with last coordinate in encoder decoder_input = encoder_input[:, :2] # Initialize decoder hidden state as encoder hidden state decoder_hidden = encoder_hidden decoder_outputs = torch.zeros((1, args.pred_len, 2)).to(device) # Decode hidden state in future trajectory for di in range(args.pred_len): decoder_output, decoder_hidden = decoder( decoder_input, decoder_hidden) decoder_outputs[:, di, :] = decoder_output # Use own predictions as inputs at next step decoder_input = decoder_output # Get absolute trajectory abs_helpers = {} abs_helpers["REFERENCE"] = np.expand_dims( np.array(helpers_dict["CANDIDATE_DELTA_REFERENCES"] [batch_idx][candidate_idx]), 0, ) abs_helpers["CENTERLINE"] = np.expand_dims(curr_centerline, 0) abs_input, abs_output = baseline_utils.get_abs_traj( _input.clone().cpu().numpy(), decoder_outputs.detach().clone().cpu().numpy(), args, abs_helpers, ) # array of shape (1,30,2) to list of (30,2) abs_outputs.append(abs_output[0]) forecasted_trajectories[seq_id] = abs_outputs os.makedirs(forecasted_save_dir, exist_ok=True) with open(os.path.join(forecasted_save_dir, f"{start_idx}.pkl"), "wb") as f: pkl.dump(forecasted_trajectories, f)
def infer_absolute( test_loader: torch.utils.data.DataLoader, encoder: EncoderRNN, decoder: DecoderRNN, start_idx: int, forecasted_save_dir: str, model_utils: ModelUtils, ): """Infer function for non-map LSTM baselines and save the forecasted trajectories. Args: test_loader: DataLoader for the test set encoder: Encoder network instance decoder: Decoder network instance start_idx: start index for the current joblib batch forecasted_save_dir: Directory where forecasted trajectories are to be saved model_utils: ModelUtils instance """ args = parse_arguments() forecasted_trajectories = {} for i, (_input, target, helpers) in enumerate(test_loader): _input = _input.to(device) batch_helpers = list(zip(*helpers)) helpers_dict = {} for k, v in config.LSTM_HELPER_DICT_IDX.items(): helpers_dict[k] = batch_helpers[v] # Set to eval mode encoder.eval() decoder.eval() # Encoder batch_size = _input.shape[0] input_length = _input.shape[1] input_shape = _input.shape[2] # Initialize encoder hidden state encoder_hidden = model_utils.init_hidden( batch_size, encoder.module.hidden_size if use_cuda else encoder.hidden_size) # Encode observed trajectory for ei in range(input_length): encoder_input = _input[:, ei, :] encoder_hidden = encoder(encoder_input, encoder_hidden) # Initialize decoder input with last coordinate in encoder decoder_input = encoder_input[:, :2] # Initialize decoder hidden state as encoder hidden state decoder_hidden = encoder_hidden decoder_outputs = torch.zeros( (batch_size, args.pred_len, 2)).to(device) # Decode hidden state in future trajectory for di in range(args.pred_len): decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden) decoder_outputs[:, di, :] = decoder_output # Use own predictions as inputs at next step decoder_input = decoder_output # Get absolute trajectory abs_helpers = {} abs_helpers["REFERENCE"] = np.array(helpers_dict["DELTA_REFERENCE"]) abs_helpers["TRANSLATION"] = np.array(helpers_dict["TRANSLATION"]) abs_helpers["ROTATION"] = np.array(helpers_dict["ROTATION"]) abs_inputs, abs_outputs = baseline_utils.get_abs_traj( _input.clone().cpu().numpy(), decoder_outputs.detach().clone().cpu().numpy(), args, abs_helpers, ) for i in range(abs_outputs.shape[0]): seq_id = int(helpers_dict["SEQ_PATHS"][i]) forecasted_trajectories[seq_id] = [abs_outputs[i]] with open(os.path.join(forecasted_save_dir, f"{start_idx}.pkl"), "wb") as f: pkl.dump(forecasted_trajectories, f)
def validate( val_loader: Any, epoch: int, criterion: Any, logger: Logger, encoder: Any, decoder: Any, encoder_optimizer: Any, decoder_optimizer: Any, model_utils: ModelUtils, prev_loss: float, decrement_counter: int, rollout_len: int = 30, ) -> Tuple[float, int]: """Validate the lstm network. Args: val_loader: DataLoader for the train set epoch: epoch number criterion: Loss criterion logger: Tensorboard logger encoder: Encoder network instance decoder: Decoder network instance encoder_optimizer: optimizer for the encoder network decoder_optimizer: optimizer for the decoder network model_utils: instance for ModelUtils class prev_loss: Loss in the previous validation run decrement_counter: keeping track of the number of consecutive times loss increased in the current rollout rollout_len: current prediction horizon """ args = parse_arguments() global best_loss total_loss = [] for i, (_input, target, helpers) in enumerate(val_loader): _input = _input.to(device) target = target.to(device) # Set to eval mode encoder.eval() decoder.eval() # Encoder batch_size = _input.shape[0] input_length = _input.shape[1] output_length = target.shape[1] input_shape = _input.shape[2] # Initialize encoder hidden state encoder_hidden = model_utils.init_hidden( batch_size, encoder.module.hidden_size if use_cuda else encoder.hidden_size) # Initialize loss loss = 0 # Encode observed trajectory for ei in range(input_length): encoder_input = _input[:, ei, :] encoder_hidden = encoder(encoder_input, encoder_hidden) # Initialize decoder input with last coordinate in encoder decoder_input = encoder_input[:, :2] # Initialize decoder hidden state as encoder hidden state decoder_hidden = encoder_hidden decoder_outputs = torch.zeros(target.shape).to(device) # Decode hidden state in future trajectory for di in range(output_length): decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden) decoder_outputs[:, di, :] = decoder_output # Update losses for all benchmarks loss += criterion(decoder_output[:, :2], target[:, di, :2]) # Use own predictions as inputs at next step decoder_input = decoder_output # Get average loss for pred_len loss = loss / output_length total_loss.append(loss) if i % 10 == 0: cprint( f"Val -- Epoch:{epoch}, loss:{loss}, Rollout: {rollout_len}", color="green", ) # Save val_loss = sum(total_loss) / len(total_loss) if val_loss <= best_loss: best_loss = val_loss if args.use_map: save_dir = "saved_models/lstm_map" elif args.use_social: save_dir = "saved_models/lstm_social" else: save_dir = "saved_models/lstm" os.makedirs(save_dir, exist_ok=True) model_utils.save_checkpoint( save_dir, { "epoch": epoch + 1, "rollout_len": rollout_len, "encoder_state_dict": encoder.state_dict(), "decoder_state_dict": decoder.state_dict(), "best_loss": val_loss, "encoder_optimizer": encoder_optimizer.state_dict(), "decoder_optimizer": decoder_optimizer.state_dict(), }, ) logger.scalar_summary(tag="Val/loss", value=val_loss.item(), step=epoch) # Keep track of the loss to change preiction horizon if val_loss <= prev_loss: decrement_counter = 0 else: decrement_counter += 1 return val_loss, decrement_counter
def train( train_loader: Any, epoch: int, criterion: Any, logger: Logger, encoder: Any, decoder: Any, encoder_optimizer: Any, decoder_optimizer: Any, model_utils: ModelUtils, rollout_len: int = 30, ) -> None: """Train the lstm network. Args: train_loader: DataLoader for the train set epoch: epoch number criterion: Loss criterion logger: Tensorboard logger encoder: Encoder network instance decoder: Decoder network instance encoder_optimizer: optimizer for the encoder network decoder_optimizer: optimizer for the decoder network model_utils: instance for ModelUtils class rollout_len: current prediction horizon """ args = parse_arguments() global global_step for i, (_input, target, helpers) in enumerate(train_loader): _input = _input.to(device) target = target.to(device) # Set to train mode encoder.train() decoder.train() # Zero the gradients encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() # Encoder batch_size = _input.shape[0] input_length = _input.shape[1] output_length = target.shape[1] input_shape = _input.shape[2] # Initialize encoder hidden state encoder_hidden = model_utils.init_hidden( batch_size, encoder.module.hidden_size if use_cuda else encoder.hidden_size) # Initialize losses loss = 0 # Encode observed trajectory for ei in range(input_length): encoder_input = _input[:, ei, :] encoder_hidden = encoder(encoder_input, encoder_hidden) # Initialize decoder input with last coordinate in encoder decoder_input = encoder_input[:, :2] # Initialize decoder hidden state as encoder hidden state decoder_hidden = encoder_hidden decoder_outputs = torch.zeros(target.shape).to(device) # Decode hidden state in future trajectory for di in range(rollout_len): decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden) decoder_outputs[:, di, :] = decoder_output # Update loss loss += criterion(decoder_output[:, :2], target[:, di, :2]) # Use own predictions as inputs at next step decoder_input = decoder_output # Get average loss for pred_len loss = loss / rollout_len # Backpropagate loss.backward() encoder_optimizer.step() decoder_optimizer.step() if global_step % 1000 == 0: # Log results print( f"Train -- Epoch:{epoch}, loss:{loss}, Rollout:{rollout_len}") logger.scalar_summary(tag="Train/loss", value=loss.item(), step=epoch) global_step += 1