def save_model_predictions(p, val_loader, model): """ Save model predictions for all tasks """ print('Save model predictions to {}'.format(p['save_dir'])) model.eval() tasks = p.TASKS.NAMES save_dirs = {task: os.path.join(p['save_dir'], task) for task in tasks} for save_dir in save_dirs.values(): mkdir_if_missing(save_dir) for ii, sample in enumerate(val_loader): inputs, meta = sample['image'].cuda(non_blocking=True), sample['meta'] img_size = (inputs.size(2), inputs.size(3)) output = model(inputs) for task in p.TASKS.NAMES: output_task = get_output(output[task], task).cpu().data.numpy() for jj in range(int(inputs.size()[0])): if len(sample[task][jj].unique() ) == 1 and sample[task][jj].unique() == 255: continue fname = meta['image'][jj] result = cv2.resize(output_task[jj], dsize=(meta['im_size'][1][jj], meta['im_size'][0][jj]), interpolation=p.TASKS.INFER_FLAGVALS[task]) if task == 'depth': sio.savemat(os.path.join(save_dirs[task], fname + '.mat'), {'depth': result}) else: imageio.imwrite( os.path.join(save_dirs[task], fname + '.png'), result.astype(np.uint8))
def get_config(self): """ Return the resolution and traversible of the SBPD building. If python version is 2.7 return a precomputed traversible (from python 3.6) as some of the loading libraries do not match currently. """ traversible_dir = self.p.traversible_dir traversible_dir = os.path.join(traversible_dir, self.p.building_params.building_name) if self.p.building_params.load_traversible_from_pickle_file or not self.p.building_params.load_meshes: filename = os.path.join(traversible_dir, 'data.pkl') with open(filename, 'rb') as f: data = pickle.load(f) resolution = data['resolution'] traversible = data['traversible'] else: assert sys.version[0] == '3' resolution, traversible = self.building.env.resolution, self.building.traversible mkdir_if_missing(traversible_dir) filenames = os.listdir(traversible_dir) if 'data.pkl' not in filenames: data = {'resolution': resolution, 'traversible': traversible} with open(os.path.join(traversible_dir, 'data.pkl'), 'wb') as f: # Save with protocol = 2 for python2.7 pickle.dump(data, f, protocol=2) return resolution, traversible
def create_session_dir(self, args): """ Create the job and the session directories. """ # Store the test data with the data # of the trained network you are testing if args.command == 'test': trainer_dir = self.p.trainer.ckpt_path.split('checkpoints')[0] checkpoint_number = int( self.p.trainer.ckpt_path.split('checkpoints')[1].split('-')[1]) job_dir = os.path.join(trainer_dir, 'test', 'checkpoint_{:d}'.format(checkpoint_number), args.job_dir) else: job_dir = args.job_dir # Create the job directory if required utils.mkdir_if_missing(job_dir) self.p.job_dir = job_dir # Create the session directory self.p.session_dir = os.path.join( self.p.job_dir, 'session_%s' % (datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))) os.mkdir(self.p.session_dir)
def _data_file_name(self, file_format='.pkl', v0=None, incorrectly_binned=True): """Returns the unique file name given either a starting velocity or incorrectly binned=True.""" # One of these must be True assert (v0 is not None or incorrectly_binned) p = self.params base_dir = os.path.join(p.dir, 'control_pipeline_v0') base_dir = os.path.join( base_dir, 'planning_horizon_{:d}_dt_{:.2f}'.format( p.planning_horizon, p.system_dynamics_params.dt)) base_dir = os.path.join(base_dir, self.system_dynamics.name) base_dir = os.path.join(base_dir, self.waypoint_grid.descriptor_string) base_dir = os.path.join( base_dir, '{:d}_velocity_bins'.format(p.binning_parameters.num_bins)) # If using python 2.7 on the real robot the control pipeline will need to be converted to a python 2.7 # friendly pickle format and will be stored in the subfolder py27. if sys.version_info[0] == 2: # If using python 2.7 on real robot base_dir = os.path.join(base_dir, 'py27') utils.mkdir_if_missing(base_dir) if v0 is not None: filename = 'velocity_{:.3f}{:s}'.format(v0, file_format) elif incorrectly_binned: filename = 'incorrectly_binned{:s}'.format(file_format) else: assert (False) filename = os.path.join(base_dir, filename) return filename
def create_config(args): ## NOTE : edit temporary hyperparams here config_file_env = args.config_env config_file_exp = args.config_exp description = args.description # Config for environment path with open(config_file_env, 'r') as stream: root_dir = yaml.safe_load(stream)['root_dir'] with open(config_file_exp, 'r') as stream: config = yaml.safe_load(stream) cfg = EasyDict() # Copy for k, v in config.items(): cfg[k] = v # Set paths for pretext task (These directories are needed in every stage) base_dir = os.path.join(root_dir, cfg['train_db_name']) if description: base_dir += "-" base_dir += description pretext_dir = os.path.join(base_dir, 'pretext') mkdir_if_missing(base_dir) mkdir_if_missing(pretext_dir) cfg['pretext_dir'] = pretext_dir cfg['pretext_checkpoint'] = os.path.join(pretext_dir, 'checkpoint.pth.tar') cfg['pretext_model'] = os.path.join(pretext_dir, 'model.pth.tar') cfg['topk_neighbors_train_path'] = os.path.join( pretext_dir, 'topk-train-neighbors.npy') cfg['topk_neighbors_val_path'] = os.path.join(pretext_dir, 'topk-val-neighbors.npy') # If we perform clustering or self-labeling step we need additional paths. # We also include a run identifier to support multiple runs w/ same hyperparams. if cfg['setup'] in ['scan', 'selflabel']: base_dir = os.path.join(root_dir, cfg['train_db_name']) if description: base_dir += "-" base_dir += description scan_dir = os.path.join(base_dir, 'scan') selflabel_dir = os.path.join(base_dir, 'selflabel') mkdir_if_missing(base_dir) mkdir_if_missing(scan_dir) mkdir_if_missing(selflabel_dir) cfg['scan_dir'] = scan_dir cfg['scan_checkpoint'] = os.path.join(scan_dir, 'checkpoint.pth.tar') cfg['scan_model'] = os.path.join(scan_dir, 'model.pth.tar') cfg['selflabel_dir'] = selflabel_dir cfg['selflabel_checkpoint'] = os.path.join(selflabel_dir, 'checkpoint.pth.tar') cfg['selflabel_model'] = os.path.join(selflabel_dir, 'model.pth.tar') return cfg
def _create_image_dir(self, parent_dir): """ Create a new directory where image data can be saved. """ from utils import utils img_dir = self._get_image_dir_name() img_dir = os.path.join(parent_dir, img_dir) utils.mkdir_if_missing(img_dir) return img_dir
def create_config(config_file_env, config_file_exp): # Config for environment path computer_name = socket.gethostname() with open(config_file_env, 'r') as stream: root_dir = yaml.safe_load(stream)[computer_name]['root_dir'] with open(config_file_exp, 'r') as stream: config = yaml.safe_load(stream) cfg = EasyDict() # Copy for k, v in config.items(): cfg[k] = v # Set paths for pretext task (These directories are needed in every stage) base_dir = os.path.join(root_dir, cfg['train_db_name']) pretext_dir = os.path.join(base_dir, 'pretext') mkdir_if_missing(base_dir) mkdir_if_missing(pretext_dir) cfg['pretext_dir'] = pretext_dir cfg['pretext_checkpoint'] = os.path.join(pretext_dir, 'checkpoint.pth.tar') cfg['pretext_model'] = os.path.join(pretext_dir, 'model.pth.tar') cfg['pretext_fine_tune_checkpoint'] = os.path.join(pretext_dir, 'fine-tune', 'checkpoint.pth.tar') cfg['pretext_fine_tune_model'] = os.path.join(pretext_dir, 'fine-tune', 'model.pth.tar') cfg['topk_neighbors_train_path'] = os.path.join( pretext_dir, 'topk-train-neighbors.npy') cfg['topk_neighbors_val_path'] = os.path.join(pretext_dir, 'topk-val-neighbors.npy') # If we perform clustering or self-labeling step we need additional paths. # We also include a run identifier to support multiple runs w/ same hyperparams. if cfg['setup'] in ['scan', 'selflabel']: base_dir = os.path.join(root_dir, cfg['train_db_name']) scan_dir = os.path.join(base_dir, 'scan') selflabel_dir = os.path.join(base_dir, 'selflabel') mkdir_if_missing(base_dir) mkdir_if_missing(scan_dir) mkdir_if_missing(selflabel_dir) cfg['scan_dir'] = scan_dir cfg['scan_checkpoint'] = os.path.join(scan_dir, 'checkpoint.pth.tar') cfg['scan_model'] = os.path.join(scan_dir, 'model.pth.tar') cfg['selflabel_dir'] = selflabel_dir cfg['selflabel_checkpoint'] = os.path.join(selflabel_dir, 'checkpoint.pth.tar') cfg['selflabel_model'] = os.path.join(selflabel_dir, 'model.pth.tar') return cfg
def create_config(config_file_env, config_file_exp, run_idx=None): # Config for environment path with open(config_file_env, 'r') as stream: root_dir = yaml.safe_load(stream)['root_dir'] with open(config_file_exp, 'r') as stream: config = yaml.safe_load(stream) cfg = EasyDict() # Copy for k, v in config.items(): cfg[k] = v # Num classes if cfg['train_db_name'] == 'VOCSegmentation': cfg['num_classes'] = 20 cfg['has_bg'] = True else: raise ValueError('Invalid train db name {}'.format( cfg['train_db_name'])) # Paths output_dir = os.path.join(root_dir, os.path.basename(config_file_exp).split('.')[0]) mkdir_if_missing(output_dir) cfg['output_dir'] = output_dir cfg['checkpoint'] = os.path.join(cfg['output_dir'], 'checkpoint.pth.tar') cfg['best_model'] = os.path.join(cfg['output_dir'], 'best_model.pth.tar') cfg['save_dir'] = os.path.join(cfg['output_dir'], 'predictions') mkdir_if_missing(cfg['save_dir']) cfg['log_file'] = os.path.join(cfg['output_dir'], 'logger.txt') # Special directories for K-Means -> Which happens off-line cfg['embedding_dir'] = os.path.join(cfg['output_dir'], 'embeddings') cfg['sal_dir'] = os.path.join(cfg['output_dir'], 'saliency') mkdir_if_missing(cfg['embedding_dir']) mkdir_if_missing(cfg['sal_dir']) # Special directories for retrieval cfg['retrieval_dir'] = os.path.join(cfg['output_dir'], 'retrieval') mkdir_if_missing(cfg['retrieval_dir']) if 'kmeans_eval' not in cfg.keys(): cfg['kmeans_eval'] = False return cfg
def _maybe_stop_recording_video(self, i, data): """ If simulator.params.record_video=True then call simulator.stop_recording_video with a file name to save to. """ simulator = data['simulator'] dirname = data['dir'] base_dir = data['base_dir'] if simulator.params.record_video: video_dir = os.path.join(base_dir, dirname, 'videos') utils.mkdir_if_missing(video_dir) video_name = os.path.join(video_dir, '{:d}.mp4'.format(i)) simulator.stop_recording_video(i, video_name)
def _download_data(self): if osp.exists(self.root): print("This dataset has been downloaded.") return mkdir_if_missing(self.root) fpath = osp.join(self.root, osp.basename(self.dataset_url)) print("Downloading iLIDS-VID dataset") url_opener = urllib.URLopener() url_opener.retrieve(self.dataset_url, fpath) print("Extracting files") tar = tarfile.open(fpath) tar.extractall(path=self.root) tar.close()
def _init_callback_instance_variables(self): """Initialize instance variables needed for the callback function.""" # Initialize the summary writer for tensorboard summaries self.nn_summary_writer = tf.contrib.summary.create_file_writer( self._summary_dir(), flush_millis=int(20e3)) # Create the callback directory self.callback_dir = os.path.join(self.p.session_dir, 'callbacks') utils.mkdir_if_missing(self.callback_dir) # Initialize the simulator_data dictionary to be used in callbacks nn_simulator_params = self._nn_simulator_params() self.simulator_data = self._init_simulator_data( nn_simulator_params, self.p.trainer.callback_number_tests, self.p.trainer.callback_seed, dirname='callbacks')
def _download(self): _fpath = os.path.join(MyPath.db_root_dir(), self.FILE) if os.path.isfile(_fpath): print('Files already downloaded') return else: print('Downloading from google drive') mkdir_if_missing(os.path.dirname(_fpath)) download_file_from_google_drive(self.GOOGLE_DRIVE_ID, _fpath) # extract file cwd = os.getcwd() print('\nExtracting tar file') tar = tarfile.open(_fpath) os.chdir(MyPath.db_root_dir()) tar.extractall() tar.close() os.chdir(cwd) print('Done!')
def create_config(config_file_env, config_file_exp): # Config for environment path with open(config_file_env, 'r') as stream: root_dir = yaml.safe_load(stream)['root_dir'] with open(config_file_exp, 'r') as stream: config = yaml.safe_load(stream) cfg = EasyDict() # Copy for k, v in config.items(): cfg[k] = v # Set paths output_dir = os.path.join(root_dir, os.path.basename(config_file_exp).split('.')[0]) mkdir_if_missing(output_dir) cfg['output_dir'] = output_dir cfg['checkpoint'] = os.path.join(cfg['output_dir'], 'checkpoint.pth.tar') cfg['best_model'] = os.path.join(cfg['output_dir'], 'best_model.pth.tar') return cfg
def _plot_episode_images(self, i, data): """ Plot the images the robot saw during a particular episode. Useful for debugging at test time. """ simulator = data['simulator'] dirname = data['dir'] base_dir = data['base_dir'] imgs_nmkd = simulator.vehicle_data['img_nmkd'] fig, _, axs = utils.subplot2(plt, (len(imgs_nmkd), 1), (8, 8), (.4, .4)) axs = axs[::-1] for idx, img_mkd in enumerate(imgs_nmkd): ax = axs[idx] size = img_mkd.shape[0] * simulator.params.obstacle_map_params.dx plot_image_observation(ax, img_mkd, size) ax.set_title('Img: {:d}'.format(idx)) figdir = os.path.join(base_dir, dirname, 'imgs') utils.mkdir_if_missing(figdir) figname = os.path.join(figdir, '{:d}.pdf'.format(i)) fig.savefig(figname, bbox_inches='tight') plt.close(fig)
torch.manual_seed(args.seed) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu cuda = torch.cuda.is_available() cudnn.benchmark = False log_name = os.path.join( args.log_root, 'log_%s-%s.txt' % (args.phase, time.strftime("%Y-%m-%d-%H-%M-%S"))) sys.stdout = Logger(log_name) print("==========\nArgs:{}\n==========".format(args)) torch.cuda.manual_seed_all(args.seed) np.random.seed(5) config.display() """""" """""~~~ dataset loader ~~~""" """""" mkdir_if_missing(args.snapshot_root) mkdir_if_missing(args.output_root) mkdir_if_missing(args.log_root) mkdir_if_missing(args.val_root) train_sub = MyData(args.train_dataroot, DF=['BinRushed', 'MESSIDOR'], transform=True) train_loader = DataLoader(train_sub, batch_size=args.b_size, shuffle=True, num_workers=4, pin_memory=True) val_sub = MyData(args.test_dataroot, DF=[parameters["dataset"]]) val_loader = DataLoader(val_sub, batch_size=1,
def _save_trajectory_data(self, i, data): """ Optionally log all trajectory data to a pickle file. Additionally create a metadata.pkl file which saves the episode number and type of every trajectory. """ simulator = data['simulator'] dirname = data['dir'] base_dir = data['base_dir'] if simulator.params.save_trajectory_data: trajectory_data_dir = os.path.join(base_dir, dirname, 'trajectories') utils.mkdir_if_missing(trajectory_data_dir) data = {} vehicle_trajectory, vehicle_data, vehicle_data_last_step, vehicle_commanded_actions_1kf = simulator.get_simulator_data_numpy_repr( ) data['vehicle_trajectory'] = vehicle_trajectory data['vehicle_data'] = vehicle_data data['vehicle_data_last_step'] = vehicle_data_last_step data['commanded_actions_1kf'] = vehicle_commanded_actions_1kf data['goal_config'] = simulator.goal_config.to_numpy_repr() data['episode_number'] = i data['episode_type_int'] = simulator.episode_type data[ 'episode_type_string'] = simulator.params.episode_termination_reasons[ simulator.episode_type] data['valid_episode'] = simulator.valid_episode # Current Occupancy Grid- Useful for plotting these trajectories later if hasattr(simulator.obstacle_map, 'occupancy_grid_map'): data[ 'occupancy_grid'] = simulator.obstacle_map.occupancy_grid_map data['map_bounds_extent'] = np.array( simulator.obstacle_map.map_bounds).flatten(order='F') trajectory_file = os.path.join(trajectory_data_dir, 'traj_{:d}.pkl'.format(i)) with open(trajectory_file, 'wb') as f: pickle.dump(data, f) # Add Trajectory Metadata metadata_file = os.path.join(trajectory_data_dir, 'metadata.pkl') if os.path.exists(metadata_file): with open(metadata_file, 'rb') as f: metadata = pickle.load(f) metadata['episode_number'].append(i) metadata['episode_type_int'].append(data['episode_type_int']) metadata['episode_type_string'].append( data['episode_type_string']) metadata['valid_episode'].append(data['valid_episode']) else: metadata = {} metadata['episode_number'] = [i] metadata['episode_type_int'] = [data['episode_type_int']] metadata['episode_type_string'] = [data['episode_type_string']] metadata['valid_episode'] = [data['valid_episode']] with open(metadata_file, 'wb') as f: pickle.dump(metadata, f)
def eval_edge_predictions(p, database, save_dir): """ The edge are evaluated through seism """ print( 'Evaluate the edge prediction using seism ... This can take a while ...' ) # DataLoaders if database == 'PASCALContext': from data.pascal_context import PASCALContext split = 'val' db = PASCALContext(split=split, do_edge=True, do_human_parts=False, do_semseg=False, do_normals=False, do_sal=True, overfit=False) else: raise NotImplementedError # First check if all files are there files = glob.glob(os.path.join(save_dir, 'edge/*png')) assert (len(files) == len(db)) # rsync the results to the seism root print('Rsync the results to the seism root ...') exp_name = database + '_' + p['setup'] + '_' + p['model'] seism_root = MyPath.seism_root() result_dir = os.path.join(seism_root, 'datasets/%s/%s/' % (database, exp_name)) mkdir_if_missing(result_dir) os.system('rsync -a %s %s' % (os.path.join(save_dir, 'edge/*'), result_dir)) print('Done ...') v = list(np.arange(0.01, 1.00, 0.01)) parameters_location = os.path.join(seism_root, 'parameters/%s.txt' % (exp_name)) with open(parameters_location, 'w') as f: for l in v: f.write('%.2f\n' % (l)) # generate a seism script that we will run. print('Generate seism script to perform the evaluation ...') seism_base = os.path.join(PROJECT_ROOT_DIR, 'evaluation/seism/pr_curves_base.m') with open(seism_base) as f: seism_file = f.readlines() seism_file = [line.strip() for line in seism_file] output_file = [seism_file[0]] ## Add experiments parameters (TODO) output_file += [ 'addpath(\'%s\')' % (os.path.join(seism_root, 'src/scripts/')) ] output_file += [ 'addpath(\'%s\')' % (os.path.join(seism_root, 'src/misc/')) ] output_file += [ 'addpath(\'%s\')' % (os.path.join(seism_root, 'src/tests/')) ] output_file += [ 'addpath(\'%s\')' % (os.path.join(seism_root, 'src/gt_wrappers/')) ] output_file += ['addpath(\'%s\')' % (os.path.join(seism_root, 'src/io/'))] output_file += [ 'addpath(\'%s\')' % (os.path.join(seism_root, 'src/measures/')) ] output_file += [ 'addpath(\'%s\')' % (os.path.join(seism_root, 'src/piotr_edges/')) ] output_file += [ 'addpath(\'%s\')' % (os.path.join(seism_root, 'src/segbench/')) ] output_file.extend(seism_file[1:18]) ## Add method (TODO) output_file += [ 'methods(end+1).name = \'%s\'; methods(end).io_func = @read_one_png; methods(end).legend = methods(end).name; methods(end).type = \'contour\';' % (exp_name) ] output_file.extend(seism_file[19:61]) ## Add path to save output output_file += [ 'filename = \'%s\'' % (os.path.join(save_dir, database + '_' + 'test' + '_edge.txt')) ] output_file += seism_file[62:] # save the file to the seism dir output_file_path = os.path.join(seism_root, exp_name + '.m') with open(output_file_path, 'w') as f: for line in output_file: f.write(line + '\n') # go to the seism dir and perform evaluation print( 'Go to seism root dir and run the evaluation ... This takes time ...') cwd = os.getcwd() os.chdir(seism_root) os.system( "matlab -nodisplay -nosplash -nodesktop -r \"addpath(\'%s\');%s;exit\"" % (seism_root, exp_name)) os.chdir(cwd) # write to json print('Finished evaluation in seism ... Write results to JSON ...') with open(os.path.join(save_dir, database + '_' + 'test' + '_edge.txt'), 'r') as f: seism_result = [line.strip() for line in f.readlines()] eval_dict = {} for line in seism_result: metric, score = line.split(':') eval_dict[metric] = float(score) with open(os.path.join(save_dir, database + '_' + 'test' + '_edge.json'), 'w') as f: json.dump(eval_dict, f) # print print('Edge Detection Evaluation') for k, v in eval_dict.items(): spaces = '' for j in range(0, 10 - len(k)): spaces += ' ' print('{0:s}{1:s}{2:.4f}'.format(k, spaces, 100 * v)) # cleanup - Important. Else Matlab will reuse the files. print('Cleanup files in seism ...') result_rm = os.path.join(seism_root, 'results/%s/%s/' % (database, exp_name)) data_rm = os.path.join(seism_root, 'datasets/%s/%s/' % (database, exp_name)) os.system("rm -rf %s" % (result_rm)) os.system("rm -rf %s" % (data_rm)) print('Finished cleanup ...') return eval_dict
import tensorflow as tf import matplotlib.pyplot as plt import os from utils.image_utils import plot_image_observation import numpy as np import pickle import sys """ Plot images from pkl file """ filename = '/home/anjianl/Desktop/project/WayPtNav/data/successful_data/v2_filter_obstacle_0.25/area5a/success_v2_44k/img_data_rgb_1024_1024_3_90.00_90.00_0.01_20.00_0.22_18_10_100_80_-45_1.000/file1.pkl' with open(filename, 'rb') as handle: data = pickle.load(handle) imgs_nmkd = data['img_nmkd'] imgs_nmkd = imgs_nmkd[0:20, :, :, :] fig, _, axs = utils.subplot2(plt, (len(imgs_nmkd), 1), (8, 8), (.4, .4)) axs = axs[::-1] for idx, img_mkd in enumerate(imgs_nmkd): ax = axs[idx] size = img_mkd.shape[0] * 0.05 plot_image_observation(ax, img_mkd, size) ax.set_title('Img: {:d}'.format(idx)) figdir = os.path.join('/home/anjianl/Desktop/', 'imgs') utils.mkdir_if_missing(figdir) figname = os.path.join(figdir, '{:d}.pdf'.format(2)) fig.savefig(figname, bbox_inches='tight') plt.close(fig)
def train( save_path, save_every, img_size, resume, epochs, batch_size, accumulated_batches, opt=None ): os.environ['CUDA_VISIBLE_DEVICES']=opt.gpu model_name = opt.backbone_name + '_img_size' + str(img_size[0]) + '_' + str(img_size[1]) weights_path = osp.join(save_path, model_name) loss_log_path = osp.join(weights_path, 'loss.json') mkdir_if_missing(weights_path) cfg = {} cfg['width'] = img_size[0] cfg['height'] = img_size[1] cfg['backbone_name'] = opt.backbone_name cfg['lr'] = opt.lr if resume: latest_resume = osp.join(weights_path, 'latest.pt') torch.backends.cudnn.benchmark = True # root = '/home/hunter/Document/torch' root = '/data/dgw' if opt.all_datasets: paths_trainset = {'02':'./data/track/train/MOT16-02.txt', '04':'./data/track/train/MOT16-04.txt', '05':'./data/track/train/MOT16-05.txt', '09':'./data/track/train/MOT16-09.txt', '10':'./data/track/train/MOT16-10.txt', '11':'./data/track/train/MOT16-11.txt', '13':'./data/track/train/MOT16-13.txt', 'CT':'./data/detect/CT_train.txt', 'ETH':'./data/detect/ETH.txt', 'PRW':'./data/detect/PRW_train.txt', 'CP':'./data/detect/cp_train.txt', 'CS':'./data/detect/CUHK_train.txt'} paths_valset = {'02':'./data/track/val/MOT16-02.txt', '04':'./data/track/val/MOT16-04.txt', '05':'./data/track/val/MOT16-05.txt', '09':'./data/track/val/MOT16-09.txt', '10':'./data/track/val/MOT16-10.txt', '11':'./data/track/val/MOT16-11.txt', '13':'./data/track/val/MOT16-13.txt', 'CP':'./data/detect/cp_val.txt', 'PRW':'./data/detect/PRW_val.txt', 'CT':'./data/detect/CT_val.txt', 'CS':'./data/detect/CUHK_val.txt'} else: paths_trainset = {'02':'./data/track/train/MOT16-02.txt', '04':'./data/track/train/MOT16-04.txt', '05':'./data/track/train/MOT16-05.txt', '09':'./data/track/train/MOT16-09.txt', '10':'./data/track/train/MOT16-10.txt', '11':'./data/track/train/MOT16-11.txt', '13':'./data/track/train/MOT16-13.txt'} paths_valset = {'02':'./data/track/val/MOT16-02.txt', '04':'./data/track/val/MOT16-04.txt', '05':'./data/track/val/MOT16-05.txt', '09':'./data/track/val/MOT16-09.txt', '10':'./data/track/val/MOT16-10.txt', '11':'./data/track/val/MOT16-11.txt', '13':'./data/track/val/MOT16-13.txt'} transforms = T.Compose([T.ToTensor()]) trainset = JointDataset(root=root, paths=paths_trainset, img_size=img_size, augment=True, transforms=transforms) valset = JointDataset(root=root, paths=paths_valset, img_size=img_size, augment=False, transforms=transforms) dataloader_trainset = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True, drop_last=True, collate_fn=collate_fn) dataloader_valset = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True, drop_last=True, collate_fn=collate_fn) cfg['num_ID'] = trainset.nID backbone = resnet_fpn_backbone(opt.backbone_name, True) backbone.out_channels = 256 model = Jde_RCNN(backbone, num_ID=trainset.nID, min_size=img_size[1], max_size=img_size[0], version=opt.model_version, len_embeddings=opt.len_embed) model.cuda().train() # model = torch.nn.DataParallel(model) start_epoch = 0 optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=opt.lr, momentum=.9, weight_decay=5e-4) after_scheduler = StepLR(optimizer, 10, 0.1) scheduler = GradualWarmupScheduler(optimizer, multiplier=10, total_epoch=10, after_scheduler=after_scheduler) if resume: checkpoint = torch.load(latest_resume, map_location='cpu') # Load weights to resume from print(model.load_state_dict(checkpoint['model'],strict=False)) start_epoch = checkpoint['epoch_det'] del checkpoint # current, saved else: with open(osp.join(weights_path,'model.yaml'), 'w+') as f: yaml.dump(cfg, f) for epoch in range(epochs): model.cuda().eval() with torch.no_grad(): if epoch%3==0: test_emb(model, dataloader_valset, print_interval=50)[-1] test(model, dataloader_valset, conf_thres=0.5, iou_thres=0.2, print_interval=50) scheduler.step(epoch+start_epoch) model.cuda().train() print('lr: ', optimizer.param_groups[0]['lr']) loss_epoch_log = dict(loss_total=0, loss_classifier=0, loss_box_reg=0, loss_reid=0, loss_objectness=0, loss_rpn_box_reg=0) for i, (imgs, labels, _, _, targets_len) in enumerate(tqdm(dataloader_trainset)): targets = [] imgs = imgs.cuda() labels = labels.cuda() flag = False for target_len, label in zip(targets_len.view(-1,), labels): ## convert the input to demanded format target = {} if target_len==0: flag = True if torch.all(label[0:int(target_len), 1]==-1): flag = True target['boxes'] = label[0:int(target_len), 2:6] target['ids'] = (label[0:int(target_len), 1]).long() target['labels'] = torch.ones_like(target['ids']) targets.append(target) if flag: continue losses = model(imgs, targets) loss = losses['loss_classifier'] + losses['loss_box_reg'] + losses['loss_objectness'] + losses['loss_rpn_box_reg'] + 0.4*losses['loss_reid'] loss.backward() if ((i + 1) % accumulated_batches == 0) or (i == len(dataloader_trainset) - 1): optimizer.step() optimizer.zero_grad() ## print and log the loss for key, val in losses.items(): loss_epoch_log[key] = float(val) + loss_epoch_log[key] for key, val in loss_epoch_log.items(): loss_epoch_log[key] =loss_epoch_log[key]/i print("loss in epoch %d: "%(epoch)) print(loss_epoch_log) epoch_det = epoch + start_epoch epoch_reid = epoch + start_epoch checkpoint = {'epoch_det': epoch_det, 'epoch_reid': epoch_reid, 'model': model.state_dict() } latest = osp.join(weights_path, 'latest.pt') torch.save(checkpoint, latest) if epoch % save_every == 0 and epoch != 0: torch.save(checkpoint, osp.join(weights_path, "weights_epoch_" + str(epoch_det) + '_' + str(epoch_reid) + ".pt")) with open(loss_log_path, 'a+') as f: f.write('epoch_det:'+str(epoch_det)+',epoch_reid:'+str(epoch_reid)+'\n') json.dump(loss_epoch_log, f) f.write('\n')
def train(save_path, save_every, img_size, resume, epochs, opt=None): os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu model_name = 'flowNet' weights_path = osp.join(save_path, model_name) loss_log_path = osp.join(weights_path, 'loss.json') mkdir_if_missing(weights_path) cfg = {} cfg['lr'] = opt.lr cfg['height'] = img_size[1] cfg['width'] = img_size[0] if resume: latest_resume = osp.join(weights_path, 'latest.pt') torch.backends.cudnn.benchmark = True # root = '/home/hunter/Document/torch' root = '/data/dgw' paths_trainset = './data/flow/MOT16.txt' transforms = T.Compose([T.ToTensor()]) trainset = LoadImagesAndLabels_2(root=root, path=paths_trainset, img_size=img_size, augment=False, transforms=transforms) dataloader_trainset = torch.utils.data.DataLoader(trainset, batch_size=1, shuffle=True) model = flowTracker(img_size) # model.train() model.cuda().train() start_epoch = 0 optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=opt.lr, momentum=.9, weight_decay=5e-4) after_scheduler = StepLR(optimizer, 10, 0.1) scheduler = GradualWarmupScheduler(optimizer, multiplier=10, total_epoch=10, after_scheduler=after_scheduler) if resume: checkpoint = torch.load(latest_resume, map_location='cpu') # Load weights to resume from print(model.load_state_dict(checkpoint['model'], strict=False)) start_epoch = checkpoint['epoch'] + 1 del checkpoint # current, saved else: with open(osp.join(weights_path, 'model.yaml'), 'w+') as f: yaml.dump(cfg, f) for epoch in range(epochs): epoch = epoch + start_epoch print('lr: ', optimizer.param_groups[0]['lr']) scheduler.step(epoch) loss_epoch_log = 0 for i, (imgs, labels, img_path, _) in enumerate(tqdm(dataloader_trainset)): imgs = torch.cat(imgs, dim=0) imgs = imgs.permute(1, 0, 2, 3).unsqueeze(0).cuda() boxes, target = labels[0][0].cuda(), labels[1][0].cuda() loss = model(imgs, boxes, target, img_path) if loss is None: continue loss.backward() optimizer.step() ## print and log the loss if i % 50 == 0: print(loss) loss_epoch_log += loss loss_epoch_log = loss_epoch_log / i print("loss in epoch %d: " % (epoch)) print(loss_epoch_log) checkpoint = {'epoch': epoch, 'model': model.state_dict()} latest = osp.join(weights_path, 'latest.pt') torch.save(checkpoint, latest) if epoch % save_every == 0 and epoch != 0: torch.save( checkpoint, osp.join(weights_path, "weights_epoch_" + str(epoch) + ".pt")) with open(loss_log_path, 'a+') as f: f.write('epoch:' + str(epoch) + '\n') json.dump(float(loss_epoch_log), f) f.write('\n')
def create_config(config_file_env, config_file_exp, tb_run, make_dirs=True): # Config for environment path with open(config_file_env, 'r') as stream: root_dir = yaml.safe_load(stream)['root_dir'] with open(config_file_exp, 'r') as stream: config = yaml.safe_load(stream) cfg = EasyDict() # Copy for k, v in config.items(): cfg[k] = v # Set paths for pretext task (These directories are needed in every stage) base_dir = os.path.join(root_dir, tb_run) pretext_dir = os.path.join(base_dir, 'pretext') if make_dirs: mkdir_if_missing(base_dir) mkdir_if_missing(pretext_dir) cfg['pretext_dir'] = pretext_dir cfg['pretext_checkpoint'] = os.path.join(pretext_dir, 'checkpoint.pth.tar') cfg['pretext_model'] = os.path.join(pretext_dir, 'model.pth.tar') cfg['topk_neighbors_train_path'] = os.path.join( pretext_dir, 'topk-train-neighbors.npy') cfg['topk_neighbors_val_path'] = os.path.join(pretext_dir, 'topk-val-neighbors.npy') cfg['topk_furthest_train_path'] = os.path.join(pretext_dir, 'topk-train-furthest.npy') cfg['topk_furthest_val_path'] = os.path.join(pretext_dir, 'topk-val-furthest.npy') # If we perform clustering or self-labeling step we need additional paths. # We also include a run identifier to support multiple runs w/ same hyperparams. if cfg['setup'] in ['scan', 'selflabel', 'simpred']: base_dir = os.path.join(root_dir, tb_run) scan_dir = os.path.join(base_dir, 'scan') simpred_dir = os.path.join(base_dir, 'simpred') selflabel_dir = os.path.join(base_dir, 'selflabel') if make_dirs: mkdir_if_missing(base_dir) mkdir_if_missing(scan_dir) mkdir_if_missing(simpred_dir) mkdir_if_missing(selflabel_dir) cfg['scan_dir'] = scan_dir cfg['scan_checkpoint'] = os.path.join(scan_dir, 'checkpoint.pth.tar') cfg['scan_model'] = os.path.join(scan_dir, 'model.pth.tar') cfg['simpred_dir'] = simpred_dir cfg['simpred_checkpoint'] = os.path.join(simpred_dir, 'checkpoint.pth.tar') cfg['simpred_model'] = os.path.join(simpred_dir, 'model.pth.tar') cfg['selflabel_dir'] = selflabel_dir cfg['selflabel_checkpoint'] = os.path.join(selflabel_dir, 'checkpoint.pth.tar') cfg['selflabel_model'] = os.path.join(selflabel_dir, 'model.pth.tar') cfg['scan_tb_dir'] = os.path.join(base_dir, 'tb_scan') cfg['simpred_tb_dir'] = os.path.join(base_dir, 'tb_simpred') cfg['selflabel_tb_dir'] = os.path.join(base_dir, 'tb_selflabel') return cfg
def create_config(env_file, exp_file): # Read the files with open(env_file, 'r') as stream: root_dir = yaml.safe_load(stream)['root_dir'] with open(exp_file, 'r') as stream: config = yaml.safe_load(stream) # Copy all the arguments cfg = edict() for k, v in config.items(): cfg[k] = v # Parse the task dictionary separately cfg.TASKS, extra_args = parse_task_dictionary(cfg['train_db_name'], cfg['task_dictionary']) for k, v in extra_args.items(): cfg[k] = v cfg.ALL_TASKS = edict() # All tasks = Main tasks cfg.ALL_TASKS.NAMES = [] cfg.ALL_TASKS.NUM_OUTPUT = {} cfg.ALL_TASKS.FLAGVALS = {'image': cv2.INTER_CUBIC} cfg.ALL_TASKS.INFER_FLAGVALS = {} for k in cfg.TASKS.NAMES: cfg.ALL_TASKS.NAMES.append(k) cfg.ALL_TASKS.NUM_OUTPUT[k] = cfg.TASKS.NUM_OUTPUT[k] cfg.ALL_TASKS.FLAGVALS[k] = cfg.TASKS.FLAGVALS[k] cfg.ALL_TASKS.INFER_FLAGVALS[k] = cfg.TASKS.INFER_FLAGVALS[k] # Parse auxiliary dictionary separately if 'auxilary_task_dictionary' in cfg.keys(): cfg.AUXILARY_TASKS, extra_args = parse_task_dictionary( cfg['train_db_name'], cfg['auxilary_task_dictionary']) for k, v in extra_args.items(): cfg[k] = v for k in cfg.AUXILARY_TASKS.NAMES: # Add auxilary tasks to all tasks if not k in cfg.ALL_TASKS.NAMES: cfg.ALL_TASKS.NAMES.append(k) cfg.ALL_TASKS.NUM_OUTPUT[k] = cfg.AUXILARY_TASKS.NUM_OUTPUT[k] cfg.ALL_TASKS.FLAGVALS[k] = cfg.AUXILARY_TASKS.FLAGVALS[k] cfg.ALL_TASKS.INFER_FLAGVALS[ k] = cfg.AUXILARY_TASKS.INFER_FLAGVALS[k] # Other arguments if cfg['train_db_name'] == 'PASCALContext': cfg.TRAIN = edict() cfg.TRAIN.SCALE = (512, 512) cfg.TEST = edict() cfg.TEST.SCALE = (512, 512) elif cfg['train_db_name'] == 'NYUD': cfg.TRAIN = edict() cfg.TRAIN.SCALE = (480, 640) cfg.TEST = edict() cfg.TEST.SCALE = (480, 640) else: raise NotImplementedError # Location of single-task performance dictionaries (For multi-task learning evaluation) if cfg['setup'] == 'multi_task': cfg.TASKS.SINGLE_TASK_TEST_DICT = edict() cfg.TASKS.SINGLE_TASK_VAL_DICT = edict() for task in cfg.TASKS.NAMES: task_dir = os.path.join(root_dir, cfg['train_db_name'], cfg['backbone'], 'single_task', task) val_dict = os.path.join( task_dir, 'results', '%s_val_%s.json' % (cfg['val_db_name'], task)) test_dict = os.path.join( task_dir, 'results', '%s_test_%s.json' % (cfg['val_db_name'], task)) cfg.TASKS.SINGLE_TASK_TEST_DICT[task] = test_dict cfg.TASKS.SINGLE_TASK_VAL_DICT[task] = val_dict # Overfitting (Useful for debugging -> Overfit on small partition of the data) if not 'overfit' in cfg.keys(): cfg['overfit'] = False # Determine output directory if cfg['setup'] == 'single_task': output_dir = os.path.join(root_dir, cfg['train_db_name'], cfg['backbone'], cfg['setup']) output_dir = os.path.join(output_dir, cfg.TASKS.NAMES[0]) elif cfg['setup'] == 'multi_task': if cfg['model'] == 'baseline': output_dir = os.path.join(root_dir, cfg['train_db_name'], cfg['backbone'], 'multi_task_baseline') else: output_dir = os.path.join(root_dir, cfg['train_db_name'], cfg['backbone'], cfg['model']) else: raise NotImplementedError cfg['root_dir'] = root_dir cfg['output_dir'] = output_dir cfg['save_dir'] = os.path.join(output_dir, 'results') cfg['checkpoint'] = os.path.join(output_dir, 'checkpoint.pth.tar') cfg['best_model'] = os.path.join(output_dir, 'best_model.pth.tar') mkdir_if_missing(cfg['output_dir']) mkdir_if_missing(cfg['save_dir']) return cfg
def train( cfg, data_cfg, weights_from="", weights_to="", save_every=10, img_size=(1088, 608), resume=False, epochs=100, batch_size=16, accumulated_batches=1, freeze_backbone=False, opt=None, ): # The function starts NUM_WORKERS = opt.num_workers timme = strftime("%Y-%d-%m %H:%M:%S", gmtime()) timme = timme[5:-3].replace('-', '_') timme = timme.replace(' ', '_') timme = timme.replace(':', '_') weights_to = osp.join(weights_to, 'run' + timme) mkdir_if_missing(weights_to) mkdir_if_missing(weights_to + '/cfg/') if resume: latest_resume = osp.join(weights_from, 'latest.pt') torch.backends.cudnn.benchmark = True # unsuitable for multiscale # Configure run f = open(data_cfg) data_config = json.load(f) trainset_paths = data_config['train'] dataset_root = data_config['root'] f.close() transforms = T.Compose([T.ToTensor()]) # Get dataloader dataset = JointDataset(dataset_root, trainset_paths, img_size, augment=True, transforms=transforms) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True, drop_last=True, collate_fn=collate_fn) # Initialize model model = Darknet(cfg, dataset.nID) cutoff = -1 # backbone reaches to cutoff layer start_epoch = 0 if resume: checkpoint = torch.load(latest_resume, map_location='cpu') # Load weights to resume from model.load_state_dict(checkpoint['model']) model.cuda().train() # Set optimizer optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=opt.lr, momentum=.9) start_epoch = checkpoint['epoch'] + 1 if checkpoint['optimizer'] is not None: optimizer.load_state_dict(checkpoint['optimizer']) del checkpoint # current, saved else: # Initialize model with backbone (optional) if cfg.endswith('yolov3.cfg'): load_darknet_weights(model, osp.join(weights_from, 'darknet53.conv.74')) cutoff = 75 elif cfg.endswith('yolov3-tiny.cfg'): load_darknet_weights(model, osp.join(weights_from, 'yolov3-tiny.conv.15')) cutoff = 15 model.cuda().train() # Set optimizer optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=opt.lr, momentum=.9, weight_decay=1e-4) model = torch.nn.DataParallel(model) # Set scheduler scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[int(0.5 * opt.epochs), int(0.75 * opt.epochs)], gamma=0.1) # An important trick for detection: freeze bn during fine-tuning if not opt.unfreeze_bn: for i, (name, p) in enumerate(model.named_parameters()): p.requires_grad = False if 'batch_norm' in name else True # model_info(model) t0 = time.time() for epoch in range(epochs): epoch += start_epoch logger.info( ('%8s%12s' + '%10s' * 6) % ('Epoch', 'Batch', 'box', 'conf', 'id', 'total', 'nTargets', 'time')) # Freeze darknet53.conv.74 for first epoch if freeze_backbone and (epoch < 2): for i, (name, p) in enumerate(model.named_parameters()): if int(name.split('.')[2]) < cutoff: # if layer < 75 p.requires_grad = False if (epoch == 0) else True ui = -1 rloss = defaultdict(float) # running loss ## training schedule optimizer.zero_grad() for i, (imgs, targets, _, _, targets_len) in enumerate(dataloader): if sum([len(x) for x in targets]) < 1: # if no targets continue continue # SGD burn-in burnin = min(1000, len(dataloader)) if (epoch == 0) & (i <= burnin): lr = opt.lr * (i / burnin)**4 for g in optimizer.param_groups: g['lr'] = lr # Compute loss, compute gradient, update parameters loss, components = model(imgs.cuda(), targets.cuda(), targets_len.cuda()) components = torch.mean(components.view(-1, 5), dim=0) loss = torch.mean(loss) loss.backward() # accumulate gradient for x batches before optimizing if ((i + 1) % accumulated_batches == 0) or (i == len(dataloader) - 1): optimizer.step() optimizer.zero_grad() # Running epoch-means of tracked metrics ui += 1 for ii, key in enumerate(model.module.loss_names): rloss[key] = (rloss[key] * ui + components[ii]) / (ui + 1) # rloss indicates running loss values with mean updated at every epoch s = ('%8s%12s' + '%10.3g' * 6) % ( '%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, len(dataloader) - 1), rloss['box'], rloss['conf'], rloss['id'], rloss['loss'], rloss['nT'], time.time() - t0) t0 = time.time() if i % opt.print_interval == 0: logger.info(s) # Save latest checkpoint checkpoint = { 'epoch': epoch, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict() } copyfile(cfg, weights_to + '/cfg/yolo3.cfg') copyfile(data_cfg, weights_to + '/cfg/ccmcpe.json') latest = osp.join(weights_to, 'latest.pt') torch.save(checkpoint, latest) if epoch % save_every == 0 and epoch != 0: # making the checkpoint lite checkpoint["optimizer"] = [] torch.save( checkpoint, osp.join(weights_to, "weights_epoch_" + str(epoch) + ".pt")) # Calculate mAP ''' if epoch % opt.test_interval == 0: with torch.no_grad(): mAP, R, P = test.test(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, print_interval=40, nID=dataset.nID) test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, print_interval=40, nID=dataset.nID) ''' # Call scheduler.step() after opimizer.step() with pytorch > 1.1.0 scheduler.step()