Ejemplo n.º 1
0
def train_agent(args, driving_style):
    from agents.dagger.agent import ensure_mnet2_baseline_weights
    if args.agent == c.DAGGER or args.agent == c.DAGGER_MNET2:
        train_dagger(args)
    elif args.agent == c.BOOTSTRAPPED_PPO2:
        from agents.bootstrap_rl.train import train
        net_path = args.net_path
        if not net_path:
            log.info('Bootstrapping from baseline agent')
            net_path = ensure_mnet2_baseline_weights(args.net_path)
        if not args.sync and not args.eval_only:
            args.sync = True
            log.warning('Detected training RL in async mode which '
                        'can cause unequal time deltas. '
                        'Switching to synchronous mode. '
                        'Use --sync to avoid this.')
        sim_args = get_sim_args_from_command_args(args)
        train.run(args.env_id, resume_dir=args.resume_train,
                  bootstrap_net_path=net_path, agent_name=args.agent,
                  render=args.render, camera_rigs=[c.DEFAULT_CAM],
                  is_sync=args.sync, driving_style=driving_style,
                  is_remote_client=args.remote, eval_only=args.eval_only,
                  sim_args=sim_args)
    else:
        raise Exception('Agent type not recognized')
Ejemplo n.º 2
0
def train_dagger(args):
    """
    Run the first iteration of DAgger where our policy is random.
    """
    from agents.dagger.train import train
    train.run(resume_dir=args.resume_train, data_dir=args.recording_dir,
              agent_name=args.agent,
              overfit=args.overfit, eval_only=args.eval_only,
              tf_debug=args.tf_debug,
              freeze_pretrained=args.freeze_pretrained,
              train_args_collection_name=args.train_args_collection)