def learn_graph(args): elap = time.time() # Do not need to log detailed computation stats common.debugger = utils.FakeLogger() common.ensure_object_targets(True) set_seed(args['seed']) task = common.create_env(args['house'], task_name=args['task_name'], false_rate=args['false_rate'], success_measure=args['success_measure'], depth_input=args['depth_input'], target_mask_input=args['target_mask_input'], segment_input=args['segmentation_input'], cacheAllTarget=True, render_device=args['render_gpu'], use_discrete_action=True, include_object_target=True, include_outdoor_target=True, discrete_angle=True) # create motion __graph_warmstart = args['warmstart'] args['warmstart'] = args['motion_warmstart'] motion = create_motion(args, task) # create graph args['warmstart'] = __graph_warmstart graph = GraphPlanner(motion) # logger logger = utils.MyLogger(args['save_dir'], True) logger.print("> Training Mode = {}".format(args['training_mode'])) logger.print("> Graph Eps = {}".format(args['graph_eps'])) logger.print("> N_Trials = {}".format(args['n_trials'])) logger.print("> Max Exploration Steps = {}".format(args['max_exp_steps'])) # Graph Building logger.print('Start Graph Building ...') if args['warmstart'] is not None: filename = args['warmstart'] logger.print(' >>> Loading Pre-Trained Graph from {}'.format(filename)) with open(filename, 'rb') as file: g_params = pickle.load(file) graph.set_parameters(g_params) train_mode = args['training_mode'] if train_mode in ['mle', 'joint']: graph.learn(n_trial=args['n_trials'], max_allowed_steps=args['max_exp_steps'], eps=args['graph_eps'], logger=logger) if train_mode in ['evolution', 'joint']: graph.evolve() # TODO: not implemented yet logger.print('######## Final Stats ###########') graph._show_prior_room(logger=logger) graph._show_prior_object(logger=logger) return graph
def create_motion(args, task, oracle_func=None): if args['motion'] == 'rnn': if (args['warmstart_dict'] is not None) and os.path.isfile( args['warmstart_dict']): with open(args['warmstart_dict'], 'r') as f: trainer_args = json.load(f) else: trainer_args = args common.process_observation_shape( 'rnn', trainer_args['resolution_level'], segmentation_input=trainer_args['segment_input'], depth_input=trainer_args['depth_input'], history_frame_len=1, target_mask_input=trainer_args['target_mask_input']) import zmq_train trainer = zmq_train.create_zmq_trainer('a3c', 'rnn', trainer_args) model_file = args['warmstart'] if model_file is not None: trainer.load(model_file) trainer.eval() motion = RNNMotion(task, trainer, pass_target=args['multi_target'], term_measure=args['terminate_measure'], oracle_func=oracle_func) elif args['motion'] == 'random': motion = RandomMotion(task, None, term_measure=args['terminate_measure'], oracle_func=oracle_func) elif args['motion'] == 'fake': motion = FakeMotion(task, None, term_measure=args['terminate_measure'], oracle_func=oracle_func) else: # mixture motion mixture_dict_file = args['mixture_motion_dict'] try: with open(mixture_dict_file, 'r') as f: arg_dict = json.load(f) except Exception as e: print('Invalid Mixture Motion Dict!! file = <{}>'.format( mixture_dict_file)) raise e trainer_dict, pass_tar_dict, obs_mode_dict = create_mixture_motion_trainer_dict( arg_dict) motion = MixMotion(task, trainer_dict, pass_tar_dict, term_measure=args['terminate_measure'], obs_mode=obs_mode_dict, oracle_func=oracle_func) common.ensure_object_targets(args['object_target']) if ('force_oracle_done' in args) and args['force_oracle_done']: motion.set_force_oracle_done(True) return motion
def create_mixture_motion_trainer_dict(arg_dict): import zmq_train trainer_dict = dict() pass_tar_dict = dict() obs_mode_dict = dict( ) # segment_input, depth_signal=True, target_mask_signal=False, joint_visual_signal=False loaded_model = dict() for target in all_allowed_targets: assert target in arg_dict, '[MixtureMotion] Invalid <arg_dict>! Key=<{}> does not exist!'.format( target) args = arg_dict[target] model_file = args['warmstart'] assert (model_file is not None) and os.path.exists(model_file), \ '[MixtureMotion] model file <{}> for target <{}> does not exist!!'.format(model_file, target) if model_file in loaded_model: trainer_dict[target] = trainer_dict[loaded_model[model_file]] pass_tar_dict[target] = pass_tar_dict[loaded_model[model_file]] obs_mode_dict[target] = obs_mode_dict[loaded_model[model_file]] continue common.process_observation_shape( 'rnn', args['resolution_level'], segmentation_input=args['segment_input'], depth_input=args['depth_input'], history_frame_len=1, target_mask_input=args['target_mask_input']) # ensure object target __backup_CFG = common.CFG.copy() common.ensure_object_targets(args['object_target']) trainer = zmq_train.create_zmq_trainer('a3c', 'rnn', args) common.CFG = __backup_CFG # backup # load model trainer.load(model_file) trainer.eval() loaded_model[model_file] = target trainer_dict[target] = trainer pass_tar_dict[target] = args['multi_target'] obs_mode_dict[target] = dict( segment_input=(args['segment_input'] != 'none'), depth_signal=args['depth_input'], target_mask_signal=args['target_mask_input'], joint_visual_signal=(args['segment_input'] == 'joint')) return trainer_dict, pass_tar_dict, obs_mode_dict
action="store_false", dest="debug", help="turn off debug logs") parser.set_defaults(debug=False) return parser.parse_args() if __name__ == '__main__': cmd_args = parse_args() common.set_house_IDs(cmd_args.env_set, ensure_kitchen=(not cmd_args.multi_target)) print('>> Environment Set = <%s>, Total %d Houses!' % (cmd_args.env_set, len(common.all_houseIDs))) common.ensure_object_targets(cmd_args.object_target) if cmd_args.seed is not None: np.random.seed(cmd_args.seed) random.seed(cmd_args.seed) torch.manual_seed(cmd_args.seed) #optional if cmd_args.action_dim is not None: print('Degree of freedom set to be <{}>!'.format(cmd_args.action_dim)) common.action_shape = (cmd_args.action_dim, 2) if cmd_args.linear_reward: print( '--linearReward option is now *Deprecated*!!! Use --reward-type option instead! Now force <reward_type == \'linear\'>' ) cmd_args.reward_type = 'linear'
def evaluate(house, seed=0, render_device=None, iters=1000, max_episode_len=1000, task_name='roomnav', false_rate=0.0, hardness=None, max_birthplace_steps=None, success_measure='center', multi_target=False, fixed_target=None, algo='nop', model_name='cnn', model_file=None, log_dir='./log/eval', store_history=False, use_batch_norm=True, rnn_units=None, rnn_layers=None, rnn_cell=None, use_action_gating=False, use_residual_critic=False, use_target_gating=False, segmentation_input='none', depth_input=False, target_mask_input=False, resolution='normal', history_len=4, include_object_target=False, include_outdoor_target=True, aux_task=False, no_skip_connect=False, feed_forward=False, greedy_execution=False, greedy_aux_pred=False): assert not aux_task, 'Do not support Aux-Task now!' elap = time.time() # Do not need to log detailed computation stats common.debugger = utils.FakeLogger() args = common.create_default_args(algo, model=model_name, use_batch_norm=use_batch_norm, replay_buffer_size=50, episode_len=max_episode_len, rnn_units=rnn_units, rnn_layers=rnn_layers, rnn_cell=rnn_cell, segmentation_input=segmentation_input, resolution_level=resolution, depth_input=depth_input, target_mask_input=target_mask_input, history_frame_len=history_len) args['action_gating'] = use_action_gating args['residual_critic'] = use_residual_critic args['multi_target'] = multi_target args['object_target'] = include_object_target args['target_gating'] = use_target_gating args['aux_task'] = aux_task args['no_skip_connect'] = no_skip_connect args['feed_forward'] = feed_forward if (fixed_target is not None) and (fixed_target not in ['any-room', 'any-object']): assert fixed_target in common.n_target_instructions, 'invalid fixed target <{}>'.format( fixed_target) __backup_CFG = common.CFG.copy() if fixed_target == 'any-room': common.ensure_object_targets(False) if hardness is not None: print('>>>> Hardness = {}'.format(hardness)) if max_birthplace_steps is not None: print('>>>> Max BirthPlace Steps = {}'.format(max_birthplace_steps)) set_seed(seed) env = common.create_env(house, task_name=task_name, false_rate=false_rate, hardness=hardness, max_birthplace_steps=max_birthplace_steps, success_measure=success_measure, depth_input=depth_input, target_mask_input=target_mask_input, segment_input=args['segment_input'], genRoomTypeMap=aux_task, cacheAllTarget=multi_target, render_device=render_device, use_discrete_action=('dpg' not in algo), include_object_target=include_object_target and (fixed_target != 'any-room'), include_outdoor_target=include_outdoor_target, discrete_angle=True) if (fixed_target is not None) and (fixed_target != 'any-room') and ( fixed_target != 'any-object'): env.reset_target(fixed_target) if fixed_target == 'any-room': common.CFG = __backup_CFG common.ensure_object_targets(True) # create model if model_name == 'rnn': import zmq_train trainer = zmq_train.create_zmq_trainer(algo, model_name, args) else: trainer = common.create_trainer(algo, model_name, args) if model_file is not None: trainer.load(model_file) trainer.eval() # evaluation mode if greedy_execution and hasattr(trainer, 'set_greedy_execution'): trainer.set_greedy_execution() else: print('[Eval] WARNING!!! Greedy Policy Execution NOT Available!!!') greedy_execution = False if greedy_aux_pred and hasattr(trainer, 'set_greedy_aux_prediction'): trainer.set_greedy_aux_prediction() else: print( '[Eval] WARNING!!! Greedy Execution of Auxiliary Task NOT Available!!!' ) greedy_aux_pred = False if aux_task: assert trainer.is_rnn() # only rnn support aux_task #flag_random_reset_target = multi_target and (fixed_target is None) logger = utils.MyLogger(log_dir, True) logger.print('Start Evaluating ...') episode_success = [] episode_good = [] episode_stats = [] t = 0 for it in range(iters): cur_infos = [] trainer.reset_agent() set_seed(seed + it + 1) # reset seed obs = env.reset(target=fixed_target) #if multi_target and (fixed_target is not None) and (fixed_target != 'kitchen'): # # TODO: Currently a hacky solution # env.reset(target=fixed_target) # if house < 0: # multi-house env # obs = env.reset(reset_target=False, keep_world=True) # else: # obs = env.reset(reset_target=False) #else: # # TODO: Only support multi-target + fixed kitchen; or fixed-target (kitchen) # obs = env.reset(reset_target=flag_random_reset_target) target_id = common.target_instruction_dict[env.get_current_target()] if multi_target and hasattr(trainer, 'set_target'): trainer.set_target(env.get_current_target()) if store_history: cur_infos.append(proc_info(env.info)) #cur_images.append(env.render(renderMapLoc=env.cam_info['loc'], display=False)) if model_name != 'rnn': obs = obs.transpose([1, 0, 2]) episode_success.append(0) episode_good.append(0) cur_stats = dict(best_dist=1e50, success=0, good=0, reward=0, target=env.get_current_target(), meters=env.info['meters'], optstep=env.info['optsteps'], length=max_episode_len, images=None) if aux_task: cur_stats['aux_pred_rew'] = 0 cur_stats['aux_pred_err'] = 0 if hasattr(env.house, "_id"): cur_stats['world_id'] = env.house._id episode_step = 0 for _st in range(max_episode_len): # get action if trainer.is_rnn(): idx = 0 if multi_target: if aux_task: action, _, aux_pred = trainer.action( obs, return_numpy=True, target=[[target_id]], return_aux_pred=True) else: action, _ = trainer.action(obs, return_numpy=True, target=[[target_id]]) else: if aux_task: action, _, aux_pred = trainer.action( obs, return_numpy=True, return_aux_pred=True) else: action, _ = trainer.action(obs, return_numpy=True) action = action.squeeze() if greedy_execution: action = int(np.argmax(action)) else: action = int(action) if aux_task: aux_pred = aux_pred.squeeze() if greedy_aux_pred: aux_pred = int(np.argmax(aux_pred)) else: aux_pred = int(aux_pred) aux_rew = trainer.get_aux_task_reward( aux_pred, env.get_current_room_pred_mask()) cur_stats['aux_pred_rew'] += aux_rew if aux_rew < 0: cur_stats['aux_pred_err'] += 1 else: idx = trainer.process_observation(obs) action = trainer.action( None if greedy_execution else 1.0) # use gumbel noise # environment step obs, rew, done, info = env.step(action) if store_history: cur_infos.append(proc_info(info)) #cur_images.append(env.render(renderMapLoc=env.cam_info['loc'], display=False)) if model_name != 'rnn': obs = obs.transpose([1, 0, 2]) cur_dist = info['dist'] if cur_dist == 0: cur_stats['good'] += 1 episode_good[-1] = 1 t += 1 if cur_dist < cur_stats['best_dist']: cur_stats['best_dist'] = cur_dist episode_step += 1 # collect experience trainer.process_experience(idx, action, rew, done, (_st + 1 >= max_episode_len), info) if done: if rew > 5: # magic number: episode_success[-1] = 1 cur_stats['success'] = 1 cur_stats['length'] = episode_step if aux_task: cur_stats['aux_pred_err'] /= episode_step cur_stats['aux_pred_rew'] /= episode_step break if store_history: cur_stats['infos'] = cur_infos episode_stats.append(cur_stats) dur = time.time() - elap logger.print('Episode#%d, Elapsed = %.3f min' % (it + 1, dur / 60)) if multi_target: logger.print(' ---> Target Room = {}'.format(cur_stats['target'])) logger.print(' ---> Total Samples = {}'.format(t)) logger.print(' ---> Success = %d (rate = %.3f)' % (cur_stats['success'], np.mean(episode_success))) logger.print( ' ---> Times of Reaching Target Room = %d (rate = %.3f)' % (cur_stats['good'], np.mean(episode_good))) logger.print(' ---> Best Distance = %d' % cur_stats['best_dist']) logger.print(' ---> Birth-place Distance = %d' % cur_stats['optstep']) if aux_task: logger.print( ' >>>>>> Aux-Task: Avg Rew = %.4f, Avg Err = %.4f' % (cur_stats['aux_pred_rew'], cur_stats['aux_pred_err'])) logger.print('######## Final Stats ###########') logger.print('Success Rate = %.3f' % np.mean(episode_success)) logger.print( '> Avg Ep-Length per Success = %.3f' % np.mean([s['length'] for s in episode_stats if s['success'] > 0])) logger.print( '> Avg Birth-Meters per Success = %.3f' % np.mean([s['meters'] for s in episode_stats if s['success'] > 0])) logger.print('Reaching Target Rate = %.3f' % np.mean(episode_good)) logger.print('> Avg Ep-Length per Target Reach = %.3f' % np.mean([s['length'] for s in episode_stats if s['good'] > 0])) logger.print('> Avg Birth-Meters per Target Reach = %.3f' % np.mean([s['meters'] for s in episode_stats if s['good'] > 0])) if multi_target: all_targets = list(set([s['target'] for s in episode_stats])) for tar in all_targets: n = sum([1.0 for s in episode_stats if s['target'] == tar]) succ = [ float(s['success'] > 0) for s in episode_stats if s['target'] == tar ] good = [ float(s['good'] > 0) for s in episode_stats if s['target'] == tar ] length = [s['length'] for s in episode_stats if s['target'] == tar] meters = [s['meters'] for s in episode_stats if s['target'] == tar] good_len = np.mean([l for l, g in zip(length, good) if g > 0.5]) succ_len = np.mean([l for l, s in zip(length, succ) if s > 0.5]) good_mts = np.mean([l for l, g in zip(meters, good) if g > 0.5]) succ_mts = np.mean([l for l, s in zip(meters, succ) if s > 0.5]) logger.print( '>>>>> Multi-Target <%s>: Rate = %.3f (n=%d), Good = %.3f (AvgLen=%.3f; Mts=%.3f), Succ = %.3f (AvgLen=%.3f; Mts=%.3f)' % (tar, n / len(episode_stats), n, np.mean(good), good_len, good_mts, np.mean(succ), succ_len, succ_mts)) if aux_task: logger.print( ' -->>> Auxiliary-Task: Mean Episode Avg Rew = %.6f, Mean Episode Avg Err = %.6f' % (np.mean([float(s['aux_pred_rew']) for s in episode_stats]), np.mean([float(s['aux_pred_err']) for s in episode_stats]))) return episode_stats
if __name__ == '__main__': args = parse_args() assert (args.warmstart is None) or (os.path.exists( args.warmstart)), 'Model File Not Exists!' if args.aux_task: assert args.algo == 'a3c', 'Auxiliary Task is only supprted for <--algo a3c>' common.set_house_IDs(args.env_set, ensure_kitchen=(not args.multi_target)) print('>> Environment Set = <%s>, Total %d Houses!' % (args.env_set, len(common.all_houseIDs))) if args.object_target: common.ensure_object_targets() if not os.path.exists(args.log_dir): print('Directory <{}> does not exist! Creating directory ...'.format( args.log_dir)) os.makedirs(args.log_dir) if args.action_dim is not None: common.action_shape = (args.action_dim, 2) print('degree of freedom of the action set to <{}>'.format( args.action_dim)) if args.warmstart is None: model_name = 'random' elif args.algo in ['a2c', 'a3c']: model_name = 'rnn'
def evaluate(args): elap = time.time() # Do not need to log detailed computation stats common.debugger = utils.FakeLogger() # ensure observation shape common.process_observation_shape( 'rnn', args['resolution'], segmentation_input=args['segmentation_input'], depth_input=args['depth_input'], history_frame_len=1, target_mask_input=args['target_mask_input']) fixed_target = args['fixed_target'] if (fixed_target is not None) and (fixed_target != 'any-room') and ( fixed_target != 'any-object'): assert fixed_target in common.all_target_instructions, 'invalid fixed target <{}>'.format( fixed_target) __backup_CFG = common.CFG.copy() if fixed_target == 'any-room': common.ensure_object_targets(False) if args['hardness'] is not None: print('>>>> Hardness = {}'.format(args['hardness'])) if args['max_birthplace_steps'] is not None: print('>>>> Max BirthPlace Steps = {}'.format( args['max_birthplace_steps'])) set_seed(args['seed']) task = common.create_env(args['house'], task_name=args['task_name'], false_rate=args['false_rate'], hardness=args['hardness'], max_birthplace_steps=args['max_birthplace_steps'], success_measure=args['success_measure'], depth_input=args['depth_input'], target_mask_input=args['target_mask_input'], segment_input=args['segmentation_input'], genRoomTypeMap=False, cacheAllTarget=args['multi_target'], render_device=args['render_gpu'], use_discrete_action=True, include_object_target=args['object_target'] and (fixed_target != 'any-room'), include_outdoor_target=args['outdoor_target'], discrete_angle=True, min_birthplace_grids=args['min_birthplace_grids']) if (fixed_target is not None) and (fixed_target != 'any-room') and ( fixed_target != 'any-object'): task.reset_target(fixed_target) if fixed_target == 'any-room': common.CFG = __backup_CFG common.ensure_object_targets(True) # create semantic classifier if args['semantic_dir'] is not None: assert os.path.exists( args['semantic_dir'] ), '[Error] Semantic Dir <{}> not exists!'.format(args['semantic_dir']) assert not args[ 'object_target'], '[ERROR] currently do not support --object-target!' print('Loading Semantic Oracle from dir <{}>...'.format( args['semantic_dir'])) if args['semantic_gpu'] is None: args['semantic_gpu'] = common.get_gpus_for_rendering()[0] oracle = SemanticOracle(model_dir=args['semantic_dir'], model_device=args['semantic_gpu'], include_object=args['object_target']) oracle_func = OracleFunction( oracle, threshold=args['semantic_threshold'], filter_steps=args['semantic_filter_steps']) else: oracle_func = None # create motion motion = create_motion(args, task, oracle_func) logger = utils.MyLogger(args['log_dir'], True) logger.print('Start Evaluating ...') episode_success = [] episode_good = [] episode_stats = [] t = 0 seed = args['seed'] max_episode_len = args['max_episode_len'] plan_req = args['plan_dist_iters'] if 'plan_dist_iters' in args else None for it in range(args['max_iters']): cur_infos = [] motion.reset() set_seed(seed + it + 1) # reset seed if plan_req is not None: while True: task.reset(target=fixed_target) m = len(task.get_optimal_plan()) if (m in plan_req) and plan_req[m] > 0: break plan_req[m] -= 1 else: task.reset(target=fixed_target) info = task.info episode_success.append(0) episode_good.append(0) cur_stats = dict(best_dist=info['dist'], success=0, good=0, reward=0, target=task.get_current_target(), meters=task.info['meters'], optstep=task.info['optsteps'], length=max_episode_len, images=None) if hasattr(task.house, "_id"): cur_stats['world_id'] = task.house._id store_history = args['store_history'] if store_history: cur_infos.append(proc_info(task.info)) if args['temperature'] is not None: ep_data = motion.run(task.get_current_target(), max_episode_len, temperature=args['temperature']) else: ep_data = motion.run(task.get_current_target(), max_episode_len) for dat in ep_data: info = dat[4] if store_history: cur_infos.append(proc_info(info)) cur_dist = info['dist'] if cur_dist == 0: cur_stats['good'] += 1 episode_good[-1] = 1 if cur_dist < cur_stats['best_dist']: cur_stats['best_dist'] = cur_dist episode_step = len(ep_data) if ep_data[-1][3]: # done if ep_data[-1][2] > 5: # magic number: episode_success[-1] = 1 cur_stats['success'] = 1 cur_stats['length'] = episode_step # store length if store_history: cur_stats['infos'] = cur_infos episode_stats.append(cur_stats) dur = time.time() - elap logger.print('Episode#%d, Elapsed = %.3f min' % (it + 1, dur / 60)) if args['multi_target']: logger.print(' ---> Target Room = {}'.format(cur_stats['target'])) logger.print(' ---> Total Samples = {}'.format(t)) logger.print(' ---> Success = %d (rate = %.3f)' % (cur_stats['success'], np.mean(episode_success))) logger.print( ' ---> Times of Reaching Target Room = %d (rate = %.3f)' % (cur_stats['good'], np.mean(episode_good))) logger.print(' ---> Best Distance = %d' % cur_stats['best_dist']) logger.print(' ---> Birth-place Distance = %d' % cur_stats['optstep']) logger.print('######## Final Stats ###########') logger.print('Success Rate = %.3f' % np.mean(episode_success)) logger.print( '> Avg Ep-Length per Success = %.3f' % np.mean([s['length'] for s in episode_stats if s['success'] > 0])) logger.print( '> Avg Birth-Meters per Success = %.3f' % np.mean([s['meters'] for s in episode_stats if s['success'] > 0])) logger.print('Reaching Target Rate = %.3f' % np.mean(episode_good)) logger.print('> Avg Ep-Length per Target Reach = %.3f' % np.mean([s['length'] for s in episode_stats if s['good'] > 0])) logger.print('> Avg Birth-Meters per Target Reach = %.3f' % np.mean([s['meters'] for s in episode_stats if s['good'] > 0])) if args['multi_target']: all_targets = list(set([s['target'] for s in episode_stats])) for tar in all_targets: n = sum([1.0 for s in episode_stats if s['target'] == tar]) succ = [ float(s['success'] > 0) for s in episode_stats if s['target'] == tar ] good = [ float(s['good'] > 0) for s in episode_stats if s['target'] == tar ] length = [s['length'] for s in episode_stats if s['target'] == tar] meters = [s['meters'] for s in episode_stats if s['target'] == tar] good_len = np.mean([l for l, g in zip(length, good) if g > 0.5]) succ_len = np.mean([l for l, s in zip(length, succ) if s > 0.5]) good_mts = np.mean([l for l, g in zip(meters, good) if g > 0.5]) succ_mts = np.mean([l for l, s in zip(meters, succ) if s > 0.5]) logger.print( '>>>>> Multi-Target <%s>: Rate = %.3f (n=%d), Good = %.3f (AvgLen=%.3f; Mts=%.3f), Succ = %.3f (AvgLen=%.3f; Mts=%.3f)' % (tar, n / len(episode_stats), n, np.mean(good), good_len, good_mts, np.mean(succ), succ_len, succ_mts)) return episode_stats
def learn_controller(args): elap = time.time() # Do not need to log detailed computation stats common.debugger = utils.FakeLogger() if args['object_target']: common.ensure_object_targets() set_seed(args['seed']) task = common.create_env(args['house'], task_name=args['task_name'], false_rate=args['false_rate'], success_measure=args['success_measure'], depth_input=args['depth_input'], target_mask_input=args['target_mask_input'], segment_input=args['segmentation_input'], cacheAllTarget=True, render_device=args['render_gpu'], use_discrete_action=True, include_object_target=args['object_target'], include_outdoor_target=args['outdoor_target'], discrete_angle=True) # create motion __controller_warmstart = args['warmstart'] args['warmstart'] = args['motion_warmstart'] motion = create_motion(args, task) args['warmstart'] = __controller_warmstart # logger logger = utils.MyLogger(args['save_dir'], True) logger.print("> Planner Units = {}".format(args['units'])) logger.print("> Max Planner Steps = {}".format(args['max_planner_steps'])) logger.print("> Max Exploration Steps = {}".format(args['max_exp_steps'])) logger.print("> Reward = {} & {}".format(args['time_penalty'], args['success_reward'])) # Planner Learning logger.print('Start RNN Planner Learning ...') planner = RNNPlanner(motion, args['units'], args['warmstart']) fixed_target = None if args['only_eval_room']: fixed_target = 'any-room' elif args['only_eval_object']: fixed_target = 'any-object' train_stats, eval_stats = \ planner.learn(args['iters'], args['max_episode_len'], target=fixed_target, motion_steps=args['max_exp_steps'], planner_steps=args['max_planner_steps'], batch_size=args['batch_size'], lrate=args['lrate'], grad_clip=args['grad_clip'], weight_decay=args['weight_decay'], gamma=args['gamma'], entropy_penalty=args['entropy_penalty'], save_dir=args['save_dir'], report_rate=5, eval_rate=20, save_rate=100, logger=logger, seed=args['seed']) logger.print('######## Done ###########') filename = args['save_dir'] if filename[-1] != '/': filename = filename + '/' filename = filename + 'train_stats.pkl' with open(filename, 'wb') as f: pickle.dump([train_stats, eval_stats], f) logger.print(' --> Training Stats Saved to <{}>!'.format(filename)) return planner
def evaluate(args, data_saver=None): args['segment_input'] = args['segmentation_input'] backup_rate = args['backup_rate'] elap = time.time() # Do not need to log detailed computation stats common.debugger = utils.FakeLogger() # ensure observation shape common.process_observation_shape( 'rnn', args['resolution'], args['segmentation_input'], args['depth_input'], target_mask_input=args['target_mask_input']) fixed_target = args['fixed_target'] if (fixed_target is not None) and (fixed_target != 'any-room') and ( fixed_target != 'any-object'): assert fixed_target in common.n_target_instructions, 'invalid fixed target <{}>'.format( fixed_target) __backup_CFG = common.CFG.copy() if fixed_target == 'any-room': common.ensure_object_targets(False) if args['hardness'] is not None: print('>>>> Hardness = {}'.format(args['hardness'])) if args['max_birthplace_steps'] is not None: print('>>>> Max BirthPlace Steps = {}'.format( args['max_birthplace_steps'])) set_seed(args['seed']) task = common.create_env(args['house'], task_name=args['task_name'], false_rate=args['false_rate'], hardness=args['hardness'], max_birthplace_steps=args['max_birthplace_steps'], success_measure=args['success_measure'], depth_input=args['depth_input'], target_mask_input=args['target_mask_input'], segment_input=args['segmentation_input'], genRoomTypeMap=False, cacheAllTarget=args['multi_target'], render_device=args['render_gpu'], use_discrete_action=True, include_object_target=args['object_target'] and (fixed_target != 'any-room'), include_outdoor_target=args['outdoor_target'], discrete_angle=True, min_birthplace_grids=args['min_birthplace_grids']) if (fixed_target is not None) and (fixed_target != 'any-room') and ( fixed_target != 'any-object'): task.reset_target(fixed_target) if fixed_target == 'any-room': common.CFG = __backup_CFG common.ensure_object_targets(True) # logger logger = utils.MyLogger(args['log_dir'], True) logger.print('Start Evaluating ...') # create semantic classifier if args['semantic_dir'] is not None: assert os.path.exists( args['semantic_dir'] ), '[Error] Semantic Dir <{}> not exists!'.format(args['semantic_dir']) assert not args[ 'object_target'], '[ERROR] currently do not support --object-target!' print('Loading Semantic Oracle from dir <{}>...'.format( args['semantic_dir'])) if args['semantic_gpu'] is None: args['semantic_gpu'] = common.get_gpus_for_rendering()[0] oracle = SemanticOracle(model_dir=args['semantic_dir'], model_device=args['semantic_gpu'], include_object=args['object_target']) oracle_func = OracleFunction( oracle, threshold=args['semantic_threshold'], filter_steps=args['semantic_filter_steps'], batched_size=args['semantic_batch_size']) else: oracle_func = None # create motion motion = create_motion(args, task, oracle_func=oracle_func) if args['motion'] == 'random': motion.set_skilled_rate(args['random_motion_skill']) flag_interrupt = args['interruptive_motion'] # create planner graph = None max_motion_steps = args['n_exp_steps'] if (args['planner'] == None) or (args['planner'] == 'void'): graph = VoidPlanner(motion) elif args['planner'] == 'oracle': graph = OraclePlanner(motion) elif args['planner'] == 'rnn': #assert False, 'Currently only support Graph-planner' graph = RNNPlanner(motion, args['planner_units'], args['planner_filename'], oracle_func=oracle_func) else: graph = GraphPlanner(motion) if not args['outdoor_target']: graph.add_excluded_target('outdoor') filename = args['planner_filename'] if filename == 'None': filename = None if filename is not None: logger.print(' > Loading Graph from file = <{}>'.format(filename)) with open(filename, 'rb') as f: _params = pickle.load(f) graph.set_parameters(_params) # hack if args['planner_obs_noise'] is not None: graph.set_param(-1, args['planner_obs_noise']) # default 0.95 episode_success = [] episode_good = [] episode_stats = [] t = 0 seed = args['seed'] max_episode_len = args['max_episode_len'] plan_req = args['plan_dist_iters'] if 'plan_dist_iters' in args else None #################### accu_plan_time = 0 accu_exe_time = 0 accu_mask_time = 0 #################### for it in range(args['max_iters']): if (it > 0) and (backup_rate > 0) and (it % backup_rate == 0) and (data_saver is not None): data_saver.save(episode_stats, ep_id=it) cur_infos = [] motion.reset() set_seed(seed + it + 1) # reset seed if plan_req is not None: while True: task.reset(target=fixed_target) m = len(task.get_optimal_plan()) if (m in plan_req) and plan_req[m] > 0: break plan_req[m] -= 1 else: task.reset(target=fixed_target) info = task.info episode_success.append(0) episode_good.append(0) task_target = task.get_current_target() cur_stats = dict(best_dist=info['dist'], success=0, good=0, reward=0, target=task_target, plan=[], meters=task.info['meters'], optstep=task.info['optsteps'], length=max_episode_len, images=None) if hasattr(task.house, "_id"): cur_stats['world_id'] = task.house._id store_history = args['store_history'] if store_history: cur_infos.append(proc_info(task.info)) episode_step = 0 # reset planner if graph is not None: graph.reset() while episode_step < max_episode_len: if flag_interrupt and motion.is_interrupt(): graph_target = task.get_current_target() else: # TODO ##################### tt = time.time() mask_feat = oracle_func.get( task ) if oracle_func is not None else task.get_feature_mask() accu_mask_time += time.time() - tt tt = time.time() graph_target = graph.plan(mask_feat, task_target) accu_plan_time += time.time() - tt ################################ graph_target_id = common.target_instruction_dict[graph_target] allowed_steps = min(max_episode_len - episode_step, max_motion_steps) ############### # TODO tt = time.time() motion_data = motion.run(graph_target, allowed_steps) accu_exe_time += time.time() - tt cur_stats['plan'].append( (graph_target, len(motion_data), (motion_data[-1][0][graph_target_id] > 0))) # store stats for dat in motion_data: info = dat[4] if store_history: cur_infos.append(proc_info(info)) cur_dist = info['dist'] if cur_dist == 0: cur_stats['good'] += 1 episode_good[-1] = 1 if cur_dist < cur_stats['best_dist']: cur_stats['best_dist'] = cur_dist # update graph ## TODO ############ tt = time.time() graph.observe(motion_data, graph_target) accu_plan_time += time.time() - tt episode_step += len(motion_data) # check done if motion_data[-1][3]: if motion_data[-1][2] > 5: # magic number episode_success[-1] = 1 cur_stats['success'] = 1 break cur_stats['length'] = episode_step # store length if store_history: cur_stats['infos'] = cur_infos episode_stats.append(cur_stats) dur = time.time() - elap logger.print('Episode#%d, Elapsed = %.3f min' % (it + 1, dur / 60)) #TODO ################# logger.print(' >>> Mask Time = %.4f min' % (accu_mask_time / 60)) logger.print(' >>> Plan Time = %.4f min' % (accu_plan_time / 60)) logger.print(' >>> Motion Time = %.4f min' % (accu_exe_time / 60)) if args['multi_target']: logger.print(' ---> Target Room = {}'.format(cur_stats['target'])) logger.print(' ---> Total Samples = {}'.format(t)) logger.print(' ---> Success = %d (rate = %.3f)' % (cur_stats['success'], np.mean(episode_success))) logger.print( ' ---> Times of Reaching Target Room = %d (rate = %.3f)' % (cur_stats['good'], np.mean(episode_good))) logger.print(' ---> Best Distance = %d' % cur_stats['best_dist']) logger.print(' ---> Birth-place Meters = %.4f (optstep = %d)' % (cur_stats['meters'], cur_stats['optstep'])) logger.print(' ---> Planner Results = {}'.format(cur_stats['plan'])) logger.print('######## Final Stats ###########') logger.print('Success Rate = %.3f' % np.mean(episode_success)) logger.print( '> Avg Ep-Length per Success = %.3f' % np.mean([s['length'] for s in episode_stats if s['success'] > 0])) logger.print( '> Avg Birth-Meters per Success = %.3f' % np.mean([s['meters'] for s in episode_stats if s['success'] > 0])) logger.print('Reaching Target Rate = %.3f' % np.mean(episode_good)) logger.print('> Avg Ep-Length per Target Reach = %.3f' % np.mean([s['length'] for s in episode_stats if s['good'] > 0])) logger.print('> Avg Birth-Meters per Target Reach = %.3f' % np.mean([s['meters'] for s in episode_stats if s['good'] > 0])) if args['multi_target']: all_targets = list(set([s['target'] for s in episode_stats])) for tar in all_targets: n = sum([1.0 for s in episode_stats if s['target'] == tar]) succ = [ float(s['success'] > 0) for s in episode_stats if s['target'] == tar ] good = [ float(s['good'] > 0) for s in episode_stats if s['target'] == tar ] length = [s['length'] for s in episode_stats if s['target'] == tar] meters = [s['meters'] for s in episode_stats if s['target'] == tar] good_len = np.mean([l for l, g in zip(length, good) if g > 0.5]) succ_len = np.mean([l for l, s in zip(length, succ) if s > 0.5]) good_mts = np.mean([l for l, g in zip(meters, good) if g > 0.5]) succ_mts = np.mean([l for l, s in zip(meters, succ) if s > 0.5]) logger.print( '>>>>> Multi-Target <%s>: Rate = %.3f (n=%d), Good = %.3f (AvgLen=%.3f; Mts=%.3f), Succ = %.3f (AvgLen=%.3f; Mts=%.3f)' % (tar, n / len(episode_stats), n, np.mean(good), good_len, good_mts, np.mean(succ), succ_len, succ_mts)) return episode_stats