def train(args): datasets = range(4) # Remove the leaveDataset from datasets datasets.remove(args.leaveDataset) # Create the SocialDataLoader object data_loader = SocialDataLoader(args.batch_size, args.seq_length, args.maxNumPeds, datasets, forcePreProcess=True) with open(os.path.join('save', 'social_config.pkl'), 'wb') as f: pickle.dump(args, f) # Create a SocialModel object with the arguments model = SocialModel(args) # Initialize a TensorFlow session with tf.Session() as sess: # Initialize all variables in the graph sess.run(tf.initialize_all_variables()) # Initialize a saver that saves all the variables in the graph saver = tf.train.Saver(tf.all_variables()) # summary_writer = tf.train.SummaryWriter('/tmp/lstm/logs', graph_def=sess.graph_def) # For each epoch for e in range(args.num_epochs): # Assign the learning rate value for this epoch sess.run( tf.assign(model.lr, args.learning_rate * (args.decay_rate**e))) # Reset the data pointers in the data_loader data_loader.reset_batch_pointer() # For each batch for b in range(data_loader.num_batches): # Tic start = time.time() # Get the source, target and dataset data for the next batch # x, y are input and target data which are lists containing numpy arrays of size seq_length x maxNumPeds x 3 # d is the list of dataset indices from which each batch is generated (used to differentiate between datasets) x, y, d = data_loader.next_batch() # variable to store the loss for this batch loss_batch = 0 # For each sequence in the batch for batch in range(data_loader.batch_size): # x_batch, y_batch and d_batch contains the source, target and dataset index data for # seq_length long consecutive frames in the dataset # x_batch, y_batch would be numpy arrays of size seq_length x maxNumPeds x 3 # d_batch would be a scalar identifying the dataset from which this sequence is extracted x_batch, y_batch, d_batch = x[batch], y[batch], d[batch] if d_batch == 0 and datasets[0] == 0: dataset_data = [640, 480] else: dataset_data = [720, 576] grid_batch = getSequenceGridMask(x_batch, dataset_data, args.neighborhood_size, args.grid_size) # Feed the source, target data feed = { model.input_data: x_batch, model.target_data: y_batch, model.grid_data: grid_batch } train_loss, _ = sess.run([model.cost, model.train_op], feed) loss_batch += train_loss end = time.time() loss_batch = loss_batch / data_loader.batch_size print( "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, loss_batch, end - start)) # Save the model if the current epoch and batch number match the frequency if (e * data_loader.num_batches + b) % args.save_every == 0 and ( (e * data_loader.num_batches + b) > 0): checkpoint_path = os.path.join('save', 'social_model.ckpt') saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path))
def main(): parser = argparse.ArgumentParser() # Observed length of the trajectory parameter parser.add_argument('--obs_length', type=int, default=8, help='Observed length of the trajectory') # Predicted length of the trajectory parameter parser.add_argument('--pred_length', type=int, default=12, help='Predicted length of the trajectory') # Test dataset parser.add_argument('--test_dataset', type=int, default=0, help='Dataset to be tested on') # Parse the parameters sample_args = parser.parse_args() # Define the path for the config file for saved args with open(os.path.join('save', 'social_config.pkl'), 'rb') as f: saved_args = pickle.load(f) # Create a SocialModel object with the saved_args and infer set to true model = SocialModel(saved_args, True) # Initialize a TensorFlow session sess = tf.InteractiveSession() # Initialize a saver saver = tf.train.Saver() # Get the checkpoint state for the model ckpt = tf.train.get_checkpoint_state('save') print('loading model: ', ckpt.model_checkpoint_path) # Restore the model at the checkpoint saver.restore(sess, ckpt.model_checkpoint_path) # saver.restore(sess, 'save/social_model.ckpt-800') # Dataset to get data from dataset = [sample_args.test_dataset] # Create a SocialDataLoader object with batch_size 1 and seq_length equal to observed_length + pred_length data_loader = SocialDataLoader( 1, sample_args.pred_length + sample_args.obs_length, saved_args.maxNumPeds, dataset, True) # Reset all pointers of the data_loader data_loader.reset_batch_pointer() # Variable to maintain total error total_error = 0 # For each batch for b in range(data_loader.num_batches): # Get the source, target and dataset data for the next batch x, y, d = data_loader.next_batch() # Batch size is 1 x_batch, y_batch, d_batch = x[0], y[0], d[0] if d_batch == 0 and dataset[0] == 0: dimensions = [640, 480] else: dimensions = [720, 576] grid_batch = getSequenceGridMask(x_batch, dimensions, saved_args.neighborhood_size, saved_args.grid_size) obs_traj = x_batch[:sample_args.obs_length] obs_grid = grid_batch[:sample_args.obs_length] # obs_traj is an array of shape obs_length x maxNumPeds x 3 complete_traj = model.sample(sess, obs_traj, obs_grid, dimensions, x_batch, sample_args.pred_length) # ipdb.set_trace() # complete_traj is an array of shape (obs_length+pred_length) x maxNumPeds x 3 total_error += get_mean_error(complete_traj, x[0], sample_args.obs_length, saved_args.maxNumPeds) print "Processed trajectory number : ", b, "out of ", data_loader.num_batches, " trajectories" # Print the mean error across all the batches print "Total mean error of the model is ", total_error / data_loader.num_batches
def train(args): datasets = range(5) # Remove the leaveDataset from datasets datasets.remove(args.leaveDataset) # Create the SocialDataLoader object data_loader = SocialDataLoader(args.batch_size, args.seq_length, args.maxNumPeds, datasets, forcePreProcess=True, infer=False) # Log directory log_directory = 'log/' log_directory += str(args.leaveDataset) + '/' # Logging files log_file_curve = open(os.path.join(log_directory, 'log_curve.txt'), 'w') log_file = open(os.path.join(log_directory, 'val.txt'), 'w') # Save directory save_directory = 'save/' save_directory += str(args.leaveDataset) + '/' with open(os.path.join(save_directory, 'social_config.pkl'), 'wb') as f: pickle.dump(args, f) # Create a SocialModel object with the arguments model = SocialModel(args) config = tf.ConfigProto() config.gpu_options.allow_growth = True config = tf.ConfigProto( log_device_placement=True ) # Showing which device is allocated (in case of multiple GPUs) config.gpu_options.per_process_gpu_memory_fraction = 0.8 # Allocating 20% of memory in each GPU with 0.5 # Initialize a TensorFlow session with tf.Session() as sess: # Initialize all variables in the graph sess.run(tf.global_variables_initializer()) # Initialize a saver that saves all the variables in the graph saver = tf.train.Saver(tf.global_variables(), max_to_keep=None) # summary_writer = tf.train.SummaryWriter('/tmp/lstm/logs', graph_def=sess.graph_def) print('Training begin') best_val_loss = 100 best_epoch = 0 # For each epoch for e in range(args.num_epochs): # Assign the learning rate value for this epoch sess.run( tf.assign(model.lr, args.learning_rate * (args.decay_rate**e))) # Reset the data pointers in the data_loader data_loader.reset_batch_pointer(valid=False) loss_epoch = 0 # For each batch for b in range(data_loader.num_batches): # Tic start = time.time() # Get the source, target and dataset data for the next batch # x, y are input and target data which are lists containing numpy arrays of size seq_length x maxNumPeds x 3 # d is the list of dataset indices from which each batch is generated (used to differentiate between datasets) x, y, d = data_loader.next_batch() # variable to store the loss for this batch loss_batch = 0 # For each sequence in the batch for batch in range(data_loader.batch_size): # x_batch, y_batch and d_batch contains the source, target and dataset index data for # seq_length long consecutive frames in the dataset # x_batch, y_batch would be numpy arrays of size seq_length x maxNumPeds x 3 # d_batch would be a scalar identifying the dataset from which this sequence is extracted x_batch, y_batch, d_batch = x[batch], y[batch], d[batch] if d_batch == 0 and datasets[0] == 0: dataset_data = [640, 480] else: dataset_data = [720, 576] grid_batch = getSequenceGridMask(x_batch, dataset_data, args.neighborhood_size, args.grid_size) # Feed the source, target data feed = { model.input_data: x_batch, model.target_data: y_batch, model.grid_data: grid_batch } train_loss, _ = sess.run([model.cost, model.train_op], feed) loss_batch += train_loss end = time.time() loss_batch = loss_batch / data_loader.batch_size loss_epoch += loss_batch print( "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, loss_batch, end - start)) # Save the model if the current epoch and batch number match the frequency ''' if (e * data_loader.num_batches + b) % args.save_every == 0 and ((e * data_loader.num_batches + b) > 0): checkpoint_path = os.path.join('save', 'social_model.ckpt') saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path)) ''' loss_epoch /= data_loader.num_batches log_file_curve.write(str(e) + ',' + str(loss_epoch) + ',') print('*****************') # Validation data_loader.reset_batch_pointer(valid=True) loss_epoch = 0 for b in range(data_loader.num_batches): # Get the source, target and dataset data for the next batch # x, y are input and target data which are lists containing numpy arrays of size seq_length x maxNumPeds x 3 # d is the list of dataset indices from which each batch is generated (used to differentiate between datasets) x, y, d = data_loader.next_valid_batch() # variable to store the loss for this batch loss_batch = 0 # For each sequence in the batch for batch in range(data_loader.batch_size): # x_batch, y_batch and d_batch contains the source, target and dataset index data for # seq_length long consecutive frames in the dataset # x_batch, y_batch would be numpy arrays of size seq_length x maxNumPeds x 3 # d_batch would be a scalar identifying the dataset from which this sequence is extracted x_batch, y_batch, d_batch = x[batch], y[batch], d[batch] if d_batch == 0 and datasets[0] == 0: dataset_data = [640, 480] else: dataset_data = [720, 576] grid_batch = getSequenceGridMask(x_batch, dataset_data, args.neighborhood_size, args.grid_size) # Feed the source, target data feed = { model.input_data: x_batch, model.target_data: y_batch, model.grid_data: grid_batch } train_loss = sess.run(model.cost, feed) loss_batch += train_loss loss_batch = loss_batch / data_loader.batch_size loss_epoch += loss_batch loss_epoch /= data_loader.valid_num_batches # Update best validation loss until now if loss_epoch < best_val_loss: best_val_loss = loss_epoch best_epoch = e print('(epoch {}), valid_loss = {:.3f}'.format(e, loss_epoch)) print('Best epoch', best_epoch, 'Best validation loss', best_val_loss) log_file_curve.write(str(loss_epoch) + '\n') print('*****************') # Save the model after each epoch print('Saving model') checkpoint_path = os.path.join(save_directory, 'social_model.ckpt') saver.save(sess, checkpoint_path, global_step=e) print("model saved to {}".format(checkpoint_path)) print('Best epoch', best_epoch, 'Best validation loss', best_val_loss) log_file.write(str(best_epoch) + ',' + str(best_val_loss)) # CLose logging files log_file.close() log_file_curve.close()
def main(): # Set random seed np.random.seed(1) parser = argparse.ArgumentParser() # Observed length of the trajectory parameter parser.add_argument('--obs_length', type=int, default=6, help='Observed length of the trajectory') # Predicted length of the trajectory parameter parser.add_argument('--pred_length', type=int, default=6, help='Predicted length of the trajectory') # Test dataset parser.add_argument('--test_dataset', type=int, default=3, help='Dataset to be tested on') # Model to be loaded parser.add_argument('--epoch', type=int, default=0, help='Epoch of model to be loaded') # Parse the parameters # sample_args = parser.parse_args() args = parser.parse_args() # Save directory save_directory = 'save/' + str(args.test_dataset) + '/' # Define the path for the config file for saved args with open(os.path.join(save_directory, 'social_config.pkl'), 'rb') as f: saved_args = pickle.load(f) # Create a SocialModel object with the saved_args and infer set to true model = SocialModel(saved_args, True) # Initialize a TensorFlow session sess = tf.InteractiveSession() # Initialize a saver saver = tf.train.Saver() # Get the checkpoint state for the model ckpt = tf.train.get_checkpoint_state(save_directory) # print ('loading model: ', ckpt.model_checkpoint_path) # print('hahah: ', len(ckpt.all_model_checkpoint_paths)) print('loading model: ', ckpt.all_model_checkpoint_paths[args.epoch]) # Restore the model at the checkpoint saver.restore(sess, ckpt.all_model_checkpoint_paths[args.epoch]) # Dataset to get data from dataset = [0] # Create a SocialDataLoader object with batch_size 1 and seq_length equal to observed_length + pred_length data_loader = SocialDataLoader(1, args.pred_length + args.obs_length, saved_args.maxNumPeds, dataset, True, infer=True) # Reset all pointers of the data_loader data_loader.reset_batch_pointer() results = [] # Variable to maintain total error total_error = 0 # For each batch for b in range(data_loader.num_batches): # Get the source, target and dataset data for the next batch x, y, d = data_loader.next_batch(randomUpdate=False) # Batch size is 1 x_batch, y_batch, d_batch = x[0], y[0], d[0] # if d_batch == 0 and dataset[0] == 0: # dimensions = [640, 480] # else: # dimensions = [720, 576] dimensions = [1640, 78] grid_batch = getSequenceGridMask(x_batch, dimensions, saved_args.neighborhood_size, saved_args.grid_size) obs_traj = x_batch[:args.obs_length] obs_grid = grid_batch[:args.obs_length] # obs_traj is an array of shape obs_length x maxNumPeds x 3 print "********************** SAMPLING A NEW TRAJECTORY", b, "******************************" complete_traj = model.sample(sess, obs_traj, obs_grid, dimensions, x_batch, args.pred_length) # ipdb.set_trace() # complete_traj is an array of shape (obs_length+pred_length) x maxNumPeds x 3 print('hahah', len(complete_traj)) total_error += get_mean_error(complete_traj, x[0], args.obs_length, saved_args.maxNumPeds) print "Processed trajectory number : ", b, "out of ", data_loader.num_batches, " trajectories" # plot_trajectories(x[0], complete_traj, sample_args.obs_length) # return results.append((x[0], complete_traj, args.obs_length)) # Print the mean error across all the batches print "Total mean error of the model is ", total_error / data_loader.num_batches print "Saving results" with open(os.path.join(save_directory, 'social_results.pkl'), 'wb') as f: pickle.dump(results, f)
def train(args): datasets = list(range(5)) # Remove the leaveDataset from data_sets datasets.remove(args.test_dataset) # Create the data loader object. This object would preprocess the data in terms of # batches each of size args.batch_size, of length args.seq_length data_loader = DataLoader(args.batch_size, args.obs_length, args.obs_length, maxNumPeds=args.maxNumPeds, datasets=datasets, forcePreProcess=True) # https://stackoverflow.com/a/41146954/2049763 import pathlib # Log directory log_directory = os.path.join(args.train_logs, 'log', str(args.test_dataset)) path = pathlib.Path(log_directory) path.mkdir(parents=True, exist_ok=True) # Logging files log_file_curve = open(os.path.join(log_directory, 'log_curve.txt'), 'w') log_file = open(os.path.join(log_directory, 'val.txt'), 'w') # Save directory save_directory = os.path.join(args.train_logs, 'save', str(args.test_dataset)) path = pathlib.Path(save_directory) path.mkdir(parents=True, exist_ok=True) # model directory model_directory = os.path.join(args.train_logs, 'model', str(args.test_dataset)) path = pathlib.Path(model_directory) path.mkdir(parents=True, exist_ok=True) # Save the arguments int the config file with open(os.path.join(save_directory, 'config.pkl'), 'wb') as f: pickle.dump(args, f) checkpoint_path = os.path.join(model_directory, 'model.ckpt') # Create a Vanilla LSTM model with the arguments model = Model(args) config = tf.ConfigProto() config.gpu_options.allow_growth = True # Showing which device is allocated (in case of multiple GPUs) config = tf.ConfigProto(log_device_placement=True) # Allocating 70% of memory in each GPU with 0.5 config.gpu_options.per_process_gpu_memory_fraction = 0.7 # Initialize a TensorFlow session with tf.Session() as sess: # Summaries need to be displayed # Whenever you need to record the loss, feed the mean loss to this placeholder tf_loss_ph = tf.placeholder(tf.float32, shape=None, name='loss_summary') # Create a scalar summary object for the loss so it can be displayed tf_loss_summary = tf.summary.scalar('loss', tf_loss_ph) # Whenever you need to record the loss, feed the mean loss to this placeholder tf_embedding_w_ph = tf.placeholder(tf.float32, shape=None, name='embedding_w_summary') tf_embedding_w_summary = tf.summary.scalar('embedding_w', tf_embedding_w_ph) # Whenever you need to record the loss, feed the mean loss to this placeholder tf_output_w_ph = tf.placeholder(tf.float32, shape=None, name='output_w_summary') tf_output_w_summary = tf.summary.scalar('output_w', tf_output_w_ph) # Whenever you need to record the loss, feed the mean loss to this placeholder tf_lr_ph = tf.placeholder(tf.float32, shape=None, name='learning_rate_summary') tf_lr_ph_summary = tf.summary.scalar('learning_rate', tf_lr_ph) # Whenever you need to record the loss, feed the mean loss to this placeholder # tf_val_loss_ph = tf.placeholder(tf.float32, shape=None, name='val_loss_summary') # tf_val_loss_summary = tf.summary.scalar('val_loss', tf_val_loss_ph) # Whenever you need to record the loss, feed the mean loss to this placeholder tf_val_error_ph = tf.placeholder(tf.float32, shape=None, name='val_error_summary') tf_val_error_summary = tf.summary.scalar('val_error', tf_val_error_ph) # https://stackoverflow.com/a/40148954/2049763 train_writer = tf.summary.FileWriter(model_directory, sess.graph) val_writer = tf.summary.FileWriter( os.path.join(model_directory, 'eval')) # Initialize all the variables in the graph sess.run(tf.global_variables_initializer()) # Add all the variables to the list of variables to be saved saver = tf.train.Saver(tf.global_variables()) best_val_loss = 100 best_epoch = 0 print("**** Training is starting !") # For each epoch for epoch in range(args.num_epochs): # Assign the learning rate (decayed acc. to the epoch number) # learning_rate = args.learning_rate * (args.decay_rate ** epoch) # Reset the pointers in the data loader object data_loader.reset_batch_pointer() loss_per_epoch = [] total_steps = args.num_epochs * data_loader.num_batches # For each batch in this epoch for batch in range(data_loader.num_batches): # Tic start = time.time() # Get the source and target data of the current batch # x has the source data, y has the target data x, y, d = data_loader.next_batch(randomUpdate=True) # variable to store the loss for this batch loss_per_batch = [] gradients = {} # For each sequence in the batch for sequence in range(data_loader.batch_size): # x_batch, y_batch and d_batch contains the source, target and dataset index data for # seq_length long consecutive frames in the dataset # x_batch, y_batch would be numpy arrays of size seq_length x maxNumPeds x 3 # d_batch would be a scalar identifying the dataset from which this sequence is extracted x_batch, y_batch = x[sequence], y[sequence] # Feed the source, target data feed = { model.input_data: x_batch, model.target_data: y_batch, model.keep_prob: args.keep_prob, model.lr: args.learning_rate, model.training_epoch: epoch } # train_loss, gradient, _, lr = sess.run( # [model.cost, model.clipped_gradients, model.train_op, model.final_lr], feed) train_loss, gradient, lr = sess.run( [model.cost, model.clipped_gradients, model.final_lr], feed) vars = tf.trainable_variables() # vars_vals = sess.run(vars) for var, val in zip(vars, gradient): # print(var.name, np.shape(val)) if var.name in gradients: gradients[var.name].append(val) else: gradients[var.name] = [] if not np.isnan(train_loss): loss_per_batch.append(train_loss) # break else: print( "epoch#{} batch#{} sequence#{} train_loss is NaN". format(epoch + 1, batch + 1, sequence)) avg_loss_per_batch = np.mean(loss_per_batch) loss_per_epoch.append(avg_loss_per_batch) my_global_step = (epoch + 1) * data_loader.num_batches + batch # Print epoch, batch, loss and time taken print( "## {}/{} (Epoch: {}/{}), (Batch: {}/{}) train_loss = {:.3f}, time/batch = {:.3f}" .format(my_global_step, total_steps, epoch + 1, args.num_epochs, batch + 1, data_loader.num_batches, avg_loss_per_batch, time.time() - start)) # Save the model if the current epoch and batch number match the frequency if my_global_step % args.save_every == 0 and (my_global_step > 0): saver.save(sess, checkpoint_path, global_step=my_global_step) print("model saved to {}".format(checkpoint_path)) # break # ######## **** Mini batch optimization starts **** ######## # vars_vals = sess.run(vars) feed, grad_ph_key = { model.lr: args.learning_rate, model.training_epoch: epoch }, 0 for var, val_ in gradients.items(): val = np.mean(val_, axis=0) feed[model.grad_placeholders[grad_ph_key]] = val grad_ph_key += 1 # print(var, np.shape(val_), np.shape(val)) val = np.sum(np.absolute(val)) # print("var: {}, value: {}".format(var, val)) if 'embedding_w' in var: embedding_w_summary = val elif 'output_w' in var: output_w_summary = val _ = sess.run([model.train_op_2], feed) # ######## **** Mini batch optimization ends **** ######## # ######## **** Training batch iteration ends **** ######## avg_loss_per_epoch = np.mean(loss_per_epoch) print('# (Epoch {}/{}), Learning rate = {} Training Loss = {:.3f}'. format(epoch + 1, args.num_epochs, lr, avg_loss_per_epoch)) log_file_curve.write( str(epoch) + ',' + str(avg_loss_per_epoch) + ',') # Execute the summaries defined above training_loss_summary, embedding_w_summary, output_w_summary, lr_ph_summary = sess.run( [ tf_loss_summary, tf_embedding_w_summary, tf_output_w_summary, tf_lr_ph_summary ], feed_dict={ tf_loss_ph: avg_loss_per_epoch, tf_embedding_w_ph: embedding_w_summary, tf_output_w_ph: output_w_summary, tf_lr_ph: lr }) training_summaries = tf.summary.merge([ training_loss_summary, embedding_w_summary, output_w_summary, lr_ph_summary ]) training_summaries_tensor = sess.run(training_summaries) # ######## **** Validation starts **** ######## data_loader.reset_batch_pointer(valid=True) val_loss_per_epoch, val_error_per_epoch = [], [] for batch in range(data_loader.num_batches): # Get the source, target and dataset data for the next batch x, y are input and target data which are # lists containing numpy arrays of size seq_length x maxNumPeds x 3 d is the list of dataset indices # from which each batch is generated (used to differentiate between datasets) x, y, d = data_loader.next_batch(valid=True, randomUpdate=True) # variable to store the loss for this batch val_loss_per_batch, val_error_per_batch = [], [] # For each sequence in the batch for sequence in range(data_loader.batch_size): # x_batch, y_batch and d_batch contains the source, target and dataset index data for # seq_length long consecutive frames in the dataset # x_batch, y_batch would be numpy arrays of size seq_length x maxNumPeds x 3 # d_batch would be a scalar identifying the dataset from which this sequence is extracted x_batch, y_batch = x[sequence], y[sequence] # Feed the source, target data feed = { model.input_data: x_batch, model.target_data: y_batch, model.keep_prob: 1. } output, val_loss = sess.run( [model.final_result, model.cost], feed) val_loss_per_batch.append(val_loss) val_error_per_batch.append( model.training_mean_error(x_batch, y_batch, output)) val_loss_per_epoch.append(np.mean(val_loss_per_batch)) val_error_per_epoch.append(np.mean(val_error_per_batch)) # break avg_val_loss_per_epoch = np.mean(val_loss_per_epoch) avg_val_error_per_epoch = np.mean(val_error_per_epoch) # Update best validation loss until now if avg_val_loss_per_epoch < best_val_loss: best_val_loss = avg_val_loss_per_epoch best_epoch = epoch print('# (Epoch {}/{}), Validation loss = {:.3f}, error = {:.3f}'. format(epoch + 1, args.num_epochs, avg_val_loss_per_epoch, avg_val_error_per_epoch)) log_file_curve.write(str(avg_val_loss_per_epoch) + '\n') # Execute the summaries defined above val_loss_summary, val_error_summary = sess.run( [tf_loss_summary, tf_val_error_summary], feed_dict={ tf_loss_ph: avg_val_loss_per_epoch, tf_val_error_ph: avg_val_error_per_epoch }) # Merge all summaries together performance_summaries = tf.summary.merge( [val_loss_summary, val_error_summary]) # https://stackoverflow.com/a/51784126/2049763 performance_summaries_tensor = sess.run(performance_summaries) # Write the obtained summaries to the file, so it can be displayed in the TensorBoard train_writer.add_summary(training_summaries_tensor, epoch) val_writer.add_summary(performance_summaries_tensor, epoch) print('Best epoch', best_epoch, 'Best validation loss', best_val_loss) log_file.write(str(best_epoch) + ',' + str(best_val_loss)) my_global_step += 1 saver.save(sess, checkpoint_path, global_step=my_global_step) print("model saved to {}".format(checkpoint_path)) # CLose logging files log_file.close() log_file_curve.close() train_writer.close() val_writer.close()
def train(args): if args.visible: os.environ["CUDA_VISIBLE_DEVICES"] = args.visible save_path = make_save_path(args) dataset_path = args.dataset_path log_path = os.path.join(save_path, 'log') if not os.path.isdir(log_path): os.makedirs(log_path) # Create the SocialDataLoader object data_loader = SocialDataLoader(args.batch_size, args.seq_length, args.maxNumPeds, dataset_path, forcePreProcess=True) with open(os.path.join(save_path, 'social_config.pkl'), 'wb') as f: pickle.dump(args, f) # Create a SocialModel object with the arguments model = SocialModel(args) all_loss = [] # Initialize a TensorFlow session with tf.Session() as sess: # Initialize all variables in the graph sess.run(tf.initialize_all_variables()) # Initialize a saver that saves all the variables in the graph saver = tf.train.Saver(tf.all_variables()) summary_writer = tf.summary.FileWriter(log_path, sess.graph) # For each epoch for e in range(args.num_epochs): # Assign the learning rate value for this epoch sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) # Reset the data pointers in the data_loader data_loader.reset_batch_pointer() # For each batch for b in range(data_loader.num_batches): # Tic start = time.time() # Get the source, target and dataset data for the next batch # s_batch, t_batch are input and target data which are lists containing numpy arrays of size seq_length x maxNumPeds x 3 # d is the list of dataset indices from which each batch is generated (used to differentiate between datasets) s_batch, t_batch, d = data_loader.next_batch() # variable to store the loss for this batch loss_batch = 0 # For each sequence in the batch for seq_num in range(data_loader.batch_size): # s_seq, t_seq and d_batch contains the source, target and dataset index data for # seq_length long consecutive frames in the dataset # s_seq, t_seq would be numpy arrays of size seq_length x maxNumPeds x 3 # d_batch would be a scalar identifying the dataset from which this sequence is extracted s_seq, t_seq, d_seq = s_batch[seq_num], t_batch[seq_num], d[seq_num] ''' if d_seq == 0 and datasets[0] == 0: dataset_data = [640, 480] else: dataset_data = [720, 576] ''' grid_batch = getSequenceGridMask(s_seq, [0, 0], args.neighborhood_size, args.grid_size) # Feed the source, target data feed = {model.input_data: s_seq, model.target_data: t_seq, model.grid_data: grid_batch} train_loss, _ = sess.run([model.cost, model.train_op], feed) loss_batch += train_loss end = time.time() loss_batch = loss_batch / data_loader.batch_size all_loss.append(loss_batch) print( "{}/{} (epoch {}), train_loss = {:.3f}, time/seq_num = {:.3f}" .format( e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, loss_batch, end - start)) # Save the model if the current epoch and batch number match the frequency if (e * data_loader.num_batches + b) % args.save_every == 0 and ((e * data_loader.num_batches + b) > 0): checkpoint_path = os.path.join(save_path, 'social_model.ckpt') saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path)) np.savetxt(os.path.join(log_path, 'loss.txt'), np.asarray(all_loss))
def main(): # Set random seed np.random.seed(1) parser = argparse.ArgumentParser() # Observed length of the trajectory parameter parser.add_argument('--obs_length', type=int, default=8, help='Observed length of the trajectory') # Predicted length of the trajectory parameter parser.add_argument('--pred_length', type=int, default=12, help='Predicted length of the trajectory') # Test dataset parser.add_argument('--test_dataset', type=str, help='Dataset to be tested on') parser.add_argument('--visible',type=str, required=False, default=None, help='GPU to run on') parser.add_argument('--model_path', type=str) # Parse the parameters sample_args = parser.parse_args() if sample_args.visible: os.environ["CUDA_VISIBLE_DEVICES"] = sample_args.visible save_path = sample_args.model_path # Define the path for the config file for saved args with open(os.path.join(save_path, 'social_config.pkl'), 'rb') as f: saved_args = pickle.load(f) # Create a SocialModel object with the saved_args and infer set to true model = SocialModel(saved_args, True) # Initialize a TensorFlow session sess = tf.InteractiveSession() # Initialize a saver saver = tf.train.Saver() # Get the checkpoint state for the model ckpt = tf.train.get_checkpoint_state(save_path) print ('loading model: ', ckpt.model_checkpoint_path) # Restore the model at the checkpoint saver.restore(sess, ckpt.model_checkpoint_path) # Create a SocialDataLoader object with batch_size 1 and seq_length equal to observed_length + pred_length data_loader = SocialDataLoader(1, sample_args.pred_length + sample_args.obs_length, saved_args.maxNumPeds, sample_args.test_dataset, True) # Reset all pointers of the data_loader data_loader.reset_batch_pointer() results = [] # Variable to maintain total error total_error = 0 # For each batch for b in range(data_loader.num_batches): # Get the source, target and dataset data for the next batch x, y, d = data_loader.next_batch(randomUpdate=False) # Batch size is 1 x_batch, y_batch, d_batch = x[0], y[0], d[0] ''' if d_batch == 0 and dataset[0] == 0: dimensions = [640, 480] else: dimensions = [720, 576] ''' grid_batch = getSequenceGridMask(x_batch, [0,0], saved_args.neighborhood_size, saved_args.grid_size) obs_traj = x_batch[:sample_args.obs_length] obs_grid = grid_batch[:sample_args.obs_length] # obs_traj is an array of shape obs_length x maxNumPeds x 3 print "********************** SAMPLING A NEW TRAJECTORY", b, "******************************" complete_traj = model.sample(sess, obs_traj, obs_grid, [0,0], x_batch, sample_args.pred_length) # ipdb.set_trace() # complete_traj is an array of shape (obs_length+pred_length) x maxNumPeds x 3 total_error += get_mean_error(complete_traj, x[0], sample_args.obs_length, saved_args.maxNumPeds) print "Processed trajectory number : ", b, "out of ", data_loader.num_batches, " trajectories" # plot_trajectories(x[0], complete_traj, sample_args.obs_length) # return results.append((x[0], complete_traj, sample_args.obs_length)) # Print the mean error across all the batches print "Total mean error of the model is ", total_error/data_loader.num_batches print "Saving results" with open(os.path.join(save_path, 'social_results.pkl'), 'wb') as f: pickle.dump(results, f)
with open(os.path.join(save_directory, 'social_config.pkl'), 'rb') as f: saved_args = pickle.load(f) #f = open('/home/hesl/PycharmProjects/social-lstm-tf-HW/ResultofTrainingKITTI-13NTestonKITTI-17/save/social_results.pkl', 'rb') #f = open('/home/hesl/PycharmProjects/social-lstm-tf-HW/ResultofTrainingETH1TestETH0/save/social_results.pkl', 'rb') f = open( '/home/hesl/PycharmProjects/social-lstm-tf-HW/save/social_results.pkl', 'rb') results = pickle.load(f) dataset = [sample_args.visual_dataset] data_loader = SocialDataLoader(1, sample_args.pred_length + sample_args.obs_length, saved_args.maxNumPeds, dataset, True, infer=True) #[7,16] #print(data_loader.data[0][0].shape) # # '''Visualize Ground Truth (u,v)''' # for j in range(len(data_loader.frameList[0])): # # #sourceFileName = "/home/hesl/PycharmProjects/social-lstm-tf-HW/data/KITTI-17/img1/" + str(j + 1).zfill(6) + ".jpg" # #Visualize ETH/hotel # #sourceFileName = "/media/hesl/OS/Documents and Settings/N1701420F/Desktop/video/eth/hotel/frame-" + str(int(data_loader.frameList[0][j])).zfill(3) + ".jpg" # #Eth/eth # #sourceFileName = "/media/hesl/OS/Documents and Settings/N1701420F/Desktop/video/eth/eth/frame-" + str(int(data_loader.frameList[0][j])).zfill(3)+ ".jpeg"
def sample(args, save_location, model_directory): results_pkl = os.path.join(save_location, 'results.pkl') with open(os.path.join(save_location, 'config.pkl'), 'rb') as f: saved_args = pickle.load(f) # Create a SocialModel object with the saved_args and infer set to true model = Model(saved_args, True) # Initialize a TensorFlow session config = tf.ConfigProto( log_device_placement=True ) # Showing which device is allocated (in case of multiple GPUs) config.gpu_options.per_process_gpu_memory_fraction = 0.4 # Allocating 40% of memory in each GPU sess = tf.InteractiveSession(config=config) ckpt = tf.train.get_checkpoint_state(model_directory) # Initialize a saver saver = tf.train.Saver() # Restore the model at the checkpoint saver.restore(sess, ckpt.model_checkpoint_path) # Dataset to get data from dataset = [args.test_dataset] # Create a SocialDataLoader object with batch_size 1 and seq_length equal to observed_length + pred_length data_loader = SocialDataLoader(1, args.obs_length, args.pred_length, saved_args.maxNumPeds, dataset, forcePreProcess=True, infer=True) # Reset all pointers of the data_loader data_loader.reset_batch_pointer() results = [] # Variable to maintain total error total_error = 0 final_displacement_error = [] # For each batch for b in range(data_loader.num_batches): # Get the source, target and dataset data for the next batch x, y, d = data_loader.next_batch(randomUpdate=False) # Batch size is 1 x_batch, y_batch = x[0], y[0] true_traj = np.concatenate((x_batch, y_batch[-args.pred_length:]), axis=0) # complete_traj is an array of shape ( obs_length + pred_length ) x maxNumPeds x 3 complete_traj = model.sample(sess, x_batch, true_traj, args.pred_length) total_error += model.get_mean_error(complete_traj, true_traj, args.obs_length, saved_args.maxNumPeds) final_error = model.get_final_displacement_error( complete_traj, true_traj, saved_args.maxNumPeds) if final_error is not None: final_displacement_error.append(final_error) print("Processed trajectory number : ", b, "out of ", data_loader.num_batches, " trajectories") results.append((true_traj, complete_traj, args.obs_length)) print("Saving results") with open(results_pkl, 'wb') as f: pickle.dump(results, f) # Print the mean error across all the batches print("Total mean error of the model is {:.3f}".format( total_error / data_loader.num_batches)) print("Total final error of the model is {:.3f}".format( np.mean(final_displacement_error)))
def main(): np.random.seed(1) parser = argparse.ArgumentParser() # 观测轨迹长度 parser.add_argument('--obs_length', type=int, default=7, help='Observed length of the trajectory') # 预测轨迹长度 parser.add_argument('--pred_length', type=int, default=5, help='Predicted length of the trajectory') # 测试数据集 parser.add_argument('--test_dataset', type=int, default=2, help='Epoch of model to be loaded') # 导入的模型 parser.add_argument('--epoch', type=int, default=8, help='Epoch of model to be loaded') sample_args = parser.parse_args(args=[]) # 存储历史 save_directory = 'save/' + str(sample_args.test_dataset) + '/' # Define the path for the config file for saved args with open(os.path.join(save_directory, 'social_config.pkl'), 'rb') as f: saved_args = pickle.load(f) # Create a SocialModel object with the saved_args and infer set to true model = SocialModel(saved_args, True) # Initialize a TensorFlow session config = tf.ConfigProto( log_device_placement=True ) # Showing which device is allocated (in case of multiple GPUs) config.gpu_options.per_process_gpu_memory_fraction = 0.8 # Allocating 20% of memory in each GPU sess = tf.InteractiveSession(config=config) # Initialize a saver saver = tf.train.Saver() # Get the checkpoint state for the model ckpt = tf.train.get_checkpoint_state(save_directory) # print ('loading model: ', ckpt.model_checkpoint_path) print('loading model: ', ckpt.all_model_checkpoint_paths[sample_args.epoch]) # Restore the model at the checkpoint saver.restore(sess, ckpt.all_model_checkpoint_paths[sample_args.epoch]) # Dataset to get data from dataset = [sample_args.test_dataset] # Create a SocialDataLoader object with batch_size 1 and seq_length equal to observed_length + pred_length data_loader = SocialDataLoader(1, sample_args.pred_length + sample_args.obs_length, saved_args.maxNumPeds, dataset, True, infer=True) # Reset all pointers of the data_loader data_loader.reset_batch_pointer() results = [] # Variable to maintain total error total_error = 0 # For each batch for b in range(data_loader.num_batches): # Get the source, target and dataset data for the next batch x, y, d = data_loader.next_batch(randomUpdate=False) # Batch size is 1 x_batch, y_batch, d_batch = x[0], y[0], d[0] if d_batch == 0 and dataset[0] == 0: dimensions = [640, 480] else: dimensions = [720, 576] grid_batch = getSequenceGridMask(x_batch, dimensions, saved_args.neighborhood_size, saved_args.grid_size) obs_traj = x_batch[:sample_args.obs_length] obs_grid = grid_batch[:sample_args.obs_length] # obs_traj is an array of shape obs_length x maxNumPeds x 3 print("********************** SAMPLING A NEW TRAJECTORY", b, "******************************") complete_traj = model.sample(sess, obs_traj, obs_grid, dimensions, x_batch, sample_args.pred_length) # ipdb.set_trace() # complete_traj is an array of shape (obs_length+pred_length) x maxNumPeds x 3 total_error += get_mean_error(complete_traj, x[0], sample_args.obs_length, saved_args.maxNumPeds) print("Processed trajectory number : ", b, "out of ", data_loader.num_batches, " trajectories") print('Model loaded: ', ckpt.all_model_checkpoint_paths[sample_args.epoch]) # plot_trajectories(x[0], complete_traj, sample_args.obs_length) # return results.append((x[0], complete_traj, sample_args.obs_length)) # Print the mean error across all the batches print("Total mean error of the model is ", total_error / data_loader.num_batches) print("Saving results") with open(os.path.join(save_directory, 'social_results.pkl'), 'wb') as f: pickle.dump(results, f)