def train_agent(args, driving_style): from agents.dagger.agent import ensure_mnet2_baseline_weights if args.agent == c.DAGGER or args.agent == c.DAGGER_MNET2: train_dagger(args) elif args.agent == c.BOOTSTRAPPED_PPO2: from agents.bootstrap_rl.train import train net_path = args.net_path if not net_path: log.info('Bootstrapping from baseline agent') net_path = ensure_mnet2_baseline_weights(args.net_path) if not args.sync and not args.eval_only: args.sync = True log.warning('Detected training RL in async mode which ' 'can cause unequal time deltas. ' 'Switching to synchronous mode. ' 'Use --sync to avoid this.') sim_args = get_sim_args_from_command_args(args) train.run(args.env_id, resume_dir=args.resume_train, bootstrap_net_path=net_path, agent_name=args.agent, render=args.render, camera_rigs=[c.DEFAULT_CAM], is_sync=args.sync, driving_style=driving_style, is_remote_client=args.remote, eval_only=args.eval_only, sim_args=sim_args) else: raise Exception('Agent type not recognized')
def train_dagger(args): """ Run the first iteration of DAgger where our policy is random. """ from agents.dagger.train import train train.run(resume_dir=args.resume_train, data_dir=args.recording_dir, agent_name=args.agent, overfit=args.overfit, eval_only=args.eval_only, tf_debug=args.tf_debug, freeze_pretrained=args.freeze_pretrained, train_args_collection_name=args.train_args_collection)