Esempio n. 1
0
def main():
    args, agent_args = get_args()
    c.MAIN_ARGS = vars(args)  # For documenting runs
    if args.verbose:
        logs.set_level(logging.DEBUG)

    if args.update_sim:
        ensure_sim(update=True)
        return

    if args.public and not c.PUBLIC:
        answer = input('Please confirm you want to make the results '
                       'of this evaluation public? ')
        args.public = answer.lower() in ['y', 'yes']
        if not args.public:
            print('Answer was not "y" or "yes", not making public')

    if args.recording_dir.startswith('~'):
        args.recording_dir = os.path.expanduser(args.recording_dir)

    if args.hdf5_2_tfrecord:
        from agents.dagger.train import hdf5_to_tfrecord
        hdf5_to_tfrecord.encode(hdf5_path=args.recording_dir,
                                experiment=args.experiment)
        return
    elif args.server or args.json_server:
        from deepdrive_api import server
        sim_args = None
        log.info('Starting Deepdrive server')
        if len(sys.argv) > 2:
            # More than just --server was passed,
            # so sim will be configured purely on the server side,
            # vs purely from the client in order to prevent
            # cheating / clients that change their environment in evals.
            sim_args = get_sim_args_from_command_args(args)
        if sim_args is not None:
            sim_args = sim_args.to_dict()
        ensure_sim()
        server.start(sim, json_mode=args.json_server,
                     sim_path=get_sim_path(), sim_args=sim_args)
        return
    else:
        camera_rigs = get_camera_rigs(args)
        driving_style = DrivingStyle.from_str(args.driving_style)
        from install import check_tensorflow_gpu

        if args.path_follower:
            run_path_follower(args, camera_rigs)
        elif not check_tensorflow_gpu():
            log.info('Tensorflow not installed, falling back to PID path '
                     'follower agent as mnet2 baseline agent requires '
                     'Tensorflow')
            run_path_follower(args, camera_rigs)
        else:
            run_tf_based_models(args, camera_rigs, driving_style)
Esempio n. 2
0
def run_tf_based_models(args, camera_rigs, driving_style):
    from install import check_tensorflow_gpu
    if not check_tensorflow_gpu():
        raise RuntimeError('Tensorflow not installed, cannot run or '
                           'trained tensorflow agents')
    configure_net_args(args)
    if args.train or args.agent == c.BOOTSTRAPPED_PPO2:
        # Training and running are more coupled in RL in our
        # implementation (and generally), so we
        # call train_agent even for eval_only.
        train_agent(args, driving_style)
    else:
        run_agent(args, camera_rigs)
def train_mobile_net(data_dir, resume_dir=None,
                     train_args_collection_name=None):
    """
    Should see eval steering error of about 0.1135
    Original Deepdrive 2.0 baseline steering error eval was ~0.2,
    train steering error: ~0.08
    """

    if not check_tensorflow_gpu():
        raise RuntimeError(
            'Invalid Tensorflow version detected. See above for details.')

    train_args = TRAIN_ARG_COLLECTIONS.get(train_args_collection_name, {})

    if not os.path.exists(c.MNET2_PRETRAINED_PATH + '.meta'):
        util.download.download(c.MNET2_PRETRAINED_URL + '?cache_bust=1', c.WEIGHTS_DIR,
                               warn_existing=False, overwrite=True)

    if not glob.glob(data_dir + '/*.tfrecord'):
        if glob.glob(data_dir + '/*/*.hdf5'):
            raise RuntimeError(
                'No tfrecords in %s - '
                'Run main.py --hdf5-2-tfrecord --recording-dir="%s" '
                'to convert hdf5 records' %
                (data_dir, data_dir))
        else:
            raise RuntimeError('No tfrecords found in %s - aborting' % data_dir)

    # Execute sessions in separate processes to ensure Tensorflow
    # cleans up nicely.
    # Without this, fine_tune_all_layers would crash towards the end with
    #  Error polling for event status: failed to query event:
    #  CUDA_ERROR_LAUNCH_FAILED:

    if resume_dir is None:
        train_dir = datetime.now().strftime(
            os.path.join(c.TENSORFLOW_OUT_DIR, '%Y-%m-%d__%I-%M-%S%p'))
        print('train_dir is ', train_dir)
        isolate_in_process(fine_tune_new_layers,
                           args=(data_dir, train_dir, train_args.get(
                                     'fine_tune_new_layers', None)))
        isolate_in_process(eval_mobile_net, args=(data_dir,))
    else:
        # TODO(post v3): Fix MNET2/tf-slim issue resuming with train_dir
        train_dir = resume_dir
        print('resume_dir is ', resume_dir)

    isolate_in_process(fine_tune_all_layers, args=(
    data_dir, train_dir, train_args.get('fine_tune_all_layers', None)))
    isolate_in_process(eval_mobile_net, args=(data_dir,))
    log.info('Finished training')