def main(): args, agent_args = get_args() c.MAIN_ARGS = vars(args) # For documenting runs if args.verbose: logs.set_level(logging.DEBUG) if args.update_sim: ensure_sim(update=True) return if args.public and not c.PUBLIC: answer = input('Please confirm you want to make the results ' 'of this evaluation public? ') args.public = answer.lower() in ['y', 'yes'] if not args.public: print('Answer was not "y" or "yes", not making public') if args.recording_dir.startswith('~'): args.recording_dir = os.path.expanduser(args.recording_dir) if args.hdf5_2_tfrecord: from agents.dagger.train import hdf5_to_tfrecord hdf5_to_tfrecord.encode(hdf5_path=args.recording_dir, experiment=args.experiment) return elif args.server or args.json_server: from deepdrive_api import server sim_args = None log.info('Starting Deepdrive server') if len(sys.argv) > 2: # More than just --server was passed, # so sim will be configured purely on the server side, # vs purely from the client in order to prevent # cheating / clients that change their environment in evals. sim_args = get_sim_args_from_command_args(args) if sim_args is not None: sim_args = sim_args.to_dict() ensure_sim() server.start(sim, json_mode=args.json_server, sim_path=get_sim_path(), sim_args=sim_args) return else: camera_rigs = get_camera_rigs(args) driving_style = DrivingStyle.from_str(args.driving_style) from install import check_tensorflow_gpu if args.path_follower: run_path_follower(args, camera_rigs) elif not check_tensorflow_gpu(): log.info('Tensorflow not installed, falling back to PID path ' 'follower agent as mnet2 baseline agent requires ' 'Tensorflow') run_path_follower(args, camera_rigs) else: run_tf_based_models(args, camera_rigs, driving_style)
def run_tf_based_models(args, camera_rigs, driving_style): from install import check_tensorflow_gpu if not check_tensorflow_gpu(): raise RuntimeError('Tensorflow not installed, cannot run or ' 'trained tensorflow agents') configure_net_args(args) if args.train or args.agent == c.BOOTSTRAPPED_PPO2: # Training and running are more coupled in RL in our # implementation (and generally), so we # call train_agent even for eval_only. train_agent(args, driving_style) else: run_agent(args, camera_rigs)
def train_mobile_net(data_dir, resume_dir=None, train_args_collection_name=None): """ Should see eval steering error of about 0.1135 Original Deepdrive 2.0 baseline steering error eval was ~0.2, train steering error: ~0.08 """ if not check_tensorflow_gpu(): raise RuntimeError( 'Invalid Tensorflow version detected. See above for details.') train_args = TRAIN_ARG_COLLECTIONS.get(train_args_collection_name, {}) if not os.path.exists(c.MNET2_PRETRAINED_PATH + '.meta'): util.download.download(c.MNET2_PRETRAINED_URL + '?cache_bust=1', c.WEIGHTS_DIR, warn_existing=False, overwrite=True) if not glob.glob(data_dir + '/*.tfrecord'): if glob.glob(data_dir + '/*/*.hdf5'): raise RuntimeError( 'No tfrecords in %s - ' 'Run main.py --hdf5-2-tfrecord --recording-dir="%s" ' 'to convert hdf5 records' % (data_dir, data_dir)) else: raise RuntimeError('No tfrecords found in %s - aborting' % data_dir) # Execute sessions in separate processes to ensure Tensorflow # cleans up nicely. # Without this, fine_tune_all_layers would crash towards the end with # Error polling for event status: failed to query event: # CUDA_ERROR_LAUNCH_FAILED: if resume_dir is None: train_dir = datetime.now().strftime( os.path.join(c.TENSORFLOW_OUT_DIR, '%Y-%m-%d__%I-%M-%S%p')) print('train_dir is ', train_dir) isolate_in_process(fine_tune_new_layers, args=(data_dir, train_dir, train_args.get( 'fine_tune_new_layers', None))) isolate_in_process(eval_mobile_net, args=(data_dir,)) else: # TODO(post v3): Fix MNET2/tf-slim issue resuming with train_dir train_dir = resume_dir print('resume_dir is ', resume_dir) isolate_in_process(fine_tune_all_layers, args=( data_dir, train_dir, train_args.get('fine_tune_all_layers', None))) isolate_in_process(eval_mobile_net, args=(data_dir,)) log.info('Finished training')