print('Precrop: {}'.format(PRECROP)) print('Total Epochs: {}'.format(NUM_EPOCHS)) if __name__ == '__main__': """ Main function to carry out the training loop. This function creates the generator and data loaders. Then, it trains the generator. """ if PRINT_PARAMS: print_params() device = 'cuda' if torch.cuda.is_available() else 'cpu' # generator model = FullNetwork(output_shape=(BATCH_SIZE, CHANNELS, FRAMES, HEIGHT, WIDTH)) model.load_state_dict(torch.load(weights_path)) # print('Model Built.') model = model.to(device) print(model) if device == 'cuda': net = torch.nn.DataParallel(model) cudnn.benchmark = True criterion = nn.MSELoss() if not os.path.exists(output_video_dir): os.mkdir(output_video_dir)
print('Total Epochs: {}'.format(NUM_EPOCHS)) print('Learning Rate: {}'.format(LR)) if __name__ == '__main__': """ Main function to carry out the training loop. This function creates the generator and data loaders. Then, it trains the generator. """ if PRINT_PARAMS: print_params() device = 'cuda' if torch.cuda.is_available() else 'cpu' # generator model = FullNetwork(output_shape=(BATCH_SIZE, CHANNELS, FRAMES, HEIGHT, WIDTH)) model = model.to(device) print(model) if device == 'cuda': net = torch.nn.DataParallel(model) cudnn.benchmark = True criterion = nn.MSELoss() optimizer = optim.Adam(model.parameters(), lr=LR) # data trainset = NTUDataset(root_dir=data_root_dir, data_file=train_splits, resize_height=HEIGHT,
def train_network(gpu_config): net = FullNetwork(input_shape=(config.img_height, config.img_width)) with tf.Session(graph=net.graph, config=gpu_config) as sess: tf.global_variables_initializer().run() if config.use_resnet_weights: old_model_scope = '' mem_model_scope = 'mem_encoder/' curr_model_scope = 'curr_encoder/' mem_map = { variable.name[len(mem_model_scope):]: variable for variable in net.variables_to_restore if variable.name.startswith(mem_model_scope) } mem_map = { name.split(":")[0]: variable for name, variable in mem_map.items() if name.startswith(old_model_scope) } mem_saver = tf.train.Saver(mem_map) mem_saver.restore(sess, config.resnet_file_name) curr_map = { variable.name[len(curr_model_scope):]: variable for variable in net.variables_to_restore if variable.name.startswith(curr_model_scope) } curr_map = { name.split(":")[0]: variable for name, variable in curr_map.items() if name.startswith(old_model_scope) } curr_saver = tf.train.Saver(curr_map) curr_saver.restore(sess, config.resnet_file_name) writer = tf.summary.FileWriter( '{0}model_{1}'.format(config.tf_logs_dir, config.model_num), sess.graph) prev_batch_num = 0 get_num_params() if config.use_trained_weights: net.load( sess, config.save_file_best_name % config.epoch_save) # Uncomment to train from saved weights print('Loaded in old weights') # else: # config.clear_output() n_eps_after_acc, best_loss = -1, 1000000 print('Training on %s' % config.data_dir) for ep in range(1, config.n_epochs + 1): print(20 * '*', 'epoch', ep, 20 * '*') sys.stdout.flush() # Trains network for 1 epoch data_gen = TrainDataGen(config.wait_for_data, crop_size=(config.img_height, config.img_width), n_frames=config.n_frames, rand_frame_skip=config.rand_frame_skip, use_all=config.use_all_frames) seg_loss, prev_batch_num = train_one_epoch(sess, net, data_gen, writer, prev_batch_num) # config.write_output('Epoch%d: SL: %.4f.\n' % (ep, seg_loss)) # saves every 10 epochs if ep % config.save_every_n_epochs == 0: try: net.save(sess, config.save_file_name % 1) # config.write_output('Saved Network\n') except: print('Failed to save network!!!') sys.stdout.flush() # saves when validation loss becomes smaller (after 50 epochs to save space) t_loss = seg_loss if t_loss < best_loss: best_loss = t_loss try: net.save(sess, config.save_file_best_name % 0) # config.write_output('Saved Network - Minimum val\n') except: print('Failed to save network!!!') sys.stdout.flush() writer.close() tf.reset_default_graph()