예제 #1
0
def learn_graph(args):

    elap = time.time()

    # Do not need to log detailed computation stats
    common.debugger = utils.FakeLogger()

    common.ensure_object_targets(True)

    set_seed(args['seed'])
    task = common.create_env(args['house'], task_name=args['task_name'], false_rate=args['false_rate'],
                             success_measure=args['success_measure'],
                             depth_input=args['depth_input'],
                             target_mask_input=args['target_mask_input'],
                             segment_input=args['segmentation_input'],
                             cacheAllTarget=True,
                             render_device=args['render_gpu'],
                             use_discrete_action=True,
                             include_object_target=True,
                             include_outdoor_target=True,
                             discrete_angle=True)

    # create motion
    __graph_warmstart = args['warmstart']
    args['warmstart'] = args['motion_warmstart']
    motion = create_motion(args, task)

    # create graph
    args['warmstart'] = __graph_warmstart
    graph = GraphPlanner(motion)

    # logger
    logger = utils.MyLogger(args['save_dir'], True)

    logger.print("> Training Mode = {}".format(args['training_mode']))
    logger.print("> Graph Eps = {}".format(args['graph_eps']))
    logger.print("> N_Trials = {}".format(args['n_trials']))
    logger.print("> Max Exploration Steps = {}".format(args['max_exp_steps']))

    # Graph Building
    logger.print('Start Graph Building ...')

    if args['warmstart'] is not None:
        filename = args['warmstart']
        logger.print(' >>> Loading Pre-Trained Graph from {}'.format(filename))
        with open(filename, 'rb') as file:
            g_params = pickle.load(file)
        graph.set_parameters(g_params)

    train_mode = args['training_mode']
    if train_mode in ['mle', 'joint']:
        graph.learn(n_trial=args['n_trials'], max_allowed_steps=args['max_exp_steps'], eps=args['graph_eps'], logger=logger)

    if train_mode in ['evolution', 'joint']:
        graph.evolve()   # TODO: not implemented yet

    logger.print('######## Final Stats ###########')
    graph._show_prior_room(logger=logger)
    graph._show_prior_object(logger=logger)
    return graph
예제 #2
0
def create_motion(args, task, oracle_func=None):
    if args['motion'] == 'rnn':
        if (args['warmstart_dict'] is not None) and os.path.isfile(
                args['warmstart_dict']):
            with open(args['warmstart_dict'], 'r') as f:
                trainer_args = json.load(f)
        else:
            trainer_args = args
        common.process_observation_shape(
            'rnn',
            trainer_args['resolution_level'],
            segmentation_input=trainer_args['segment_input'],
            depth_input=trainer_args['depth_input'],
            history_frame_len=1,
            target_mask_input=trainer_args['target_mask_input'])
        import zmq_train
        trainer = zmq_train.create_zmq_trainer('a3c', 'rnn', trainer_args)
        model_file = args['warmstart']
        if model_file is not None:
            trainer.load(model_file)
        trainer.eval()
        motion = RNNMotion(task,
                           trainer,
                           pass_target=args['multi_target'],
                           term_measure=args['terminate_measure'],
                           oracle_func=oracle_func)
    elif args['motion'] == 'random':
        motion = RandomMotion(task,
                              None,
                              term_measure=args['terminate_measure'],
                              oracle_func=oracle_func)
    elif args['motion'] == 'fake':
        motion = FakeMotion(task,
                            None,
                            term_measure=args['terminate_measure'],
                            oracle_func=oracle_func)
    else:  # mixture motion
        mixture_dict_file = args['mixture_motion_dict']
        try:
            with open(mixture_dict_file, 'r') as f:
                arg_dict = json.load(f)
        except Exception as e:
            print('Invalid Mixture Motion Dict!! file = <{}>'.format(
                mixture_dict_file))
            raise e
        trainer_dict, pass_tar_dict, obs_mode_dict = create_mixture_motion_trainer_dict(
            arg_dict)
        motion = MixMotion(task,
                           trainer_dict,
                           pass_tar_dict,
                           term_measure=args['terminate_measure'],
                           obs_mode=obs_mode_dict,
                           oracle_func=oracle_func)
        common.ensure_object_targets(args['object_target'])

    if ('force_oracle_done' in args) and args['force_oracle_done']:
        motion.set_force_oracle_done(True)

    return motion
예제 #3
0
def create_mixture_motion_trainer_dict(arg_dict):
    import zmq_train
    trainer_dict = dict()
    pass_tar_dict = dict()
    obs_mode_dict = dict(
    )  # segment_input, depth_signal=True, target_mask_signal=False, joint_visual_signal=False
    loaded_model = dict()
    for target in all_allowed_targets:
        assert target in arg_dict, '[MixtureMotion] Invalid <arg_dict>! Key=<{}> does not exist!'.format(
            target)
        args = arg_dict[target]
        model_file = args['warmstart']
        assert (model_file is not None) and os.path.exists(model_file), \
            '[MixtureMotion] model file <{}> for target <{}> does not exist!!'.format(model_file, target)
        if model_file in loaded_model:
            trainer_dict[target] = trainer_dict[loaded_model[model_file]]
            pass_tar_dict[target] = pass_tar_dict[loaded_model[model_file]]
            obs_mode_dict[target] = obs_mode_dict[loaded_model[model_file]]
            continue
        common.process_observation_shape(
            'rnn',
            args['resolution_level'],
            segmentation_input=args['segment_input'],
            depth_input=args['depth_input'],
            history_frame_len=1,
            target_mask_input=args['target_mask_input'])
        # ensure object target
        __backup_CFG = common.CFG.copy()
        common.ensure_object_targets(args['object_target'])
        trainer = zmq_train.create_zmq_trainer('a3c', 'rnn', args)
        common.CFG = __backup_CFG  # backup
        # load model
        trainer.load(model_file)
        trainer.eval()
        loaded_model[model_file] = target
        trainer_dict[target] = trainer
        pass_tar_dict[target] = args['multi_target']
        obs_mode_dict[target] = dict(
            segment_input=(args['segment_input'] != 'none'),
            depth_signal=args['depth_input'],
            target_mask_signal=args['target_mask_input'],
            joint_visual_signal=(args['segment_input'] == 'joint'))
    return trainer_dict, pass_tar_dict, obs_mode_dict
예제 #4
0
                        action="store_false",
                        dest="debug",
                        help="turn off debug logs")
    parser.set_defaults(debug=False)
    return parser.parse_args()


if __name__ == '__main__':
    cmd_args = parse_args()

    common.set_house_IDs(cmd_args.env_set,
                         ensure_kitchen=(not cmd_args.multi_target))
    print('>> Environment Set = <%s>, Total %d Houses!' %
          (cmd_args.env_set, len(common.all_houseIDs)))

    common.ensure_object_targets(cmd_args.object_target)

    if cmd_args.seed is not None:
        np.random.seed(cmd_args.seed)
        random.seed(cmd_args.seed)
        torch.manual_seed(cmd_args.seed)  #optional

    if cmd_args.action_dim is not None:
        print('Degree of freedom set to be <{}>!'.format(cmd_args.action_dim))
        common.action_shape = (cmd_args.action_dim, 2)

    if cmd_args.linear_reward:
        print(
            '--linearReward option is now *Deprecated*!!! Use --reward-type option instead! Now force <reward_type == \'linear\'>'
        )
        cmd_args.reward_type = 'linear'
예제 #5
0
def evaluate(house,
             seed=0,
             render_device=None,
             iters=1000,
             max_episode_len=1000,
             task_name='roomnav',
             false_rate=0.0,
             hardness=None,
             max_birthplace_steps=None,
             success_measure='center',
             multi_target=False,
             fixed_target=None,
             algo='nop',
             model_name='cnn',
             model_file=None,
             log_dir='./log/eval',
             store_history=False,
             use_batch_norm=True,
             rnn_units=None,
             rnn_layers=None,
             rnn_cell=None,
             use_action_gating=False,
             use_residual_critic=False,
             use_target_gating=False,
             segmentation_input='none',
             depth_input=False,
             target_mask_input=False,
             resolution='normal',
             history_len=4,
             include_object_target=False,
             include_outdoor_target=True,
             aux_task=False,
             no_skip_connect=False,
             feed_forward=False,
             greedy_execution=False,
             greedy_aux_pred=False):

    assert not aux_task, 'Do not support Aux-Task now!'

    elap = time.time()

    # Do not need to log detailed computation stats
    common.debugger = utils.FakeLogger()

    args = common.create_default_args(algo,
                                      model=model_name,
                                      use_batch_norm=use_batch_norm,
                                      replay_buffer_size=50,
                                      episode_len=max_episode_len,
                                      rnn_units=rnn_units,
                                      rnn_layers=rnn_layers,
                                      rnn_cell=rnn_cell,
                                      segmentation_input=segmentation_input,
                                      resolution_level=resolution,
                                      depth_input=depth_input,
                                      target_mask_input=target_mask_input,
                                      history_frame_len=history_len)
    args['action_gating'] = use_action_gating
    args['residual_critic'] = use_residual_critic
    args['multi_target'] = multi_target
    args['object_target'] = include_object_target
    args['target_gating'] = use_target_gating
    args['aux_task'] = aux_task
    args['no_skip_connect'] = no_skip_connect
    args['feed_forward'] = feed_forward
    if (fixed_target is not None) and (fixed_target
                                       not in ['any-room', 'any-object']):
        assert fixed_target in common.n_target_instructions, 'invalid fixed target <{}>'.format(
            fixed_target)

    __backup_CFG = common.CFG.copy()
    if fixed_target == 'any-room':
        common.ensure_object_targets(False)

    if hardness is not None:
        print('>>>> Hardness = {}'.format(hardness))
    if max_birthplace_steps is not None:
        print('>>>> Max BirthPlace Steps = {}'.format(max_birthplace_steps))
    set_seed(seed)
    env = common.create_env(house,
                            task_name=task_name,
                            false_rate=false_rate,
                            hardness=hardness,
                            max_birthplace_steps=max_birthplace_steps,
                            success_measure=success_measure,
                            depth_input=depth_input,
                            target_mask_input=target_mask_input,
                            segment_input=args['segment_input'],
                            genRoomTypeMap=aux_task,
                            cacheAllTarget=multi_target,
                            render_device=render_device,
                            use_discrete_action=('dpg' not in algo),
                            include_object_target=include_object_target
                            and (fixed_target != 'any-room'),
                            include_outdoor_target=include_outdoor_target,
                            discrete_angle=True)

    if (fixed_target is not None) and (fixed_target != 'any-room') and (
            fixed_target != 'any-object'):
        env.reset_target(fixed_target)

    if fixed_target == 'any-room':
        common.CFG = __backup_CFG
        common.ensure_object_targets(True)

    # create model
    if model_name == 'rnn':
        import zmq_train
        trainer = zmq_train.create_zmq_trainer(algo, model_name, args)
    else:
        trainer = common.create_trainer(algo, model_name, args)
    if model_file is not None:
        trainer.load(model_file)
    trainer.eval()  # evaluation mode
    if greedy_execution and hasattr(trainer, 'set_greedy_execution'):
        trainer.set_greedy_execution()
    else:
        print('[Eval] WARNING!!! Greedy Policy Execution NOT Available!!!')
        greedy_execution = False
    if greedy_aux_pred and hasattr(trainer, 'set_greedy_aux_prediction'):
        trainer.set_greedy_aux_prediction()
    else:
        print(
            '[Eval] WARNING!!! Greedy Execution of Auxiliary Task NOT Available!!!'
        )
        greedy_aux_pred = False

    if aux_task: assert trainer.is_rnn()  # only rnn support aux_task

    #flag_random_reset_target = multi_target and (fixed_target is None)

    logger = utils.MyLogger(log_dir, True)
    logger.print('Start Evaluating ...')

    episode_success = []
    episode_good = []
    episode_stats = []
    t = 0
    for it in range(iters):
        cur_infos = []
        trainer.reset_agent()
        set_seed(seed + it + 1)  # reset seed
        obs = env.reset(target=fixed_target)
        #if multi_target and (fixed_target is not None) and (fixed_target != 'kitchen'):
        #    # TODO: Currently a hacky solution
        #    env.reset(target=fixed_target)
        #    if house < 0:  # multi-house env
        #        obs = env.reset(reset_target=False, keep_world=True)
        #    else:
        #        obs = env.reset(reset_target=False)
        #else:
        #    # TODO: Only support multi-target + fixed kitchen; or fixed-target (kitchen)
        #    obs = env.reset(reset_target=flag_random_reset_target)
        target_id = common.target_instruction_dict[env.get_current_target()]
        if multi_target and hasattr(trainer, 'set_target'):
            trainer.set_target(env.get_current_target())
        if store_history:
            cur_infos.append(proc_info(env.info))
            #cur_images.append(env.render(renderMapLoc=env.cam_info['loc'], display=False))
        if model_name != 'rnn': obs = obs.transpose([1, 0, 2])
        episode_success.append(0)
        episode_good.append(0)
        cur_stats = dict(best_dist=1e50,
                         success=0,
                         good=0,
                         reward=0,
                         target=env.get_current_target(),
                         meters=env.info['meters'],
                         optstep=env.info['optsteps'],
                         length=max_episode_len,
                         images=None)
        if aux_task:
            cur_stats['aux_pred_rew'] = 0
            cur_stats['aux_pred_err'] = 0
        if hasattr(env.house, "_id"):
            cur_stats['world_id'] = env.house._id
        episode_step = 0
        for _st in range(max_episode_len):
            # get action
            if trainer.is_rnn():
                idx = 0
                if multi_target:
                    if aux_task:
                        action, _, aux_pred = trainer.action(
                            obs,
                            return_numpy=True,
                            target=[[target_id]],
                            return_aux_pred=True)
                    else:
                        action, _ = trainer.action(obs,
                                                   return_numpy=True,
                                                   target=[[target_id]])
                else:
                    if aux_task:
                        action, _, aux_pred = trainer.action(
                            obs, return_numpy=True, return_aux_pred=True)
                    else:
                        action, _ = trainer.action(obs, return_numpy=True)
                action = action.squeeze()
                if greedy_execution:
                    action = int(np.argmax(action))
                else:
                    action = int(action)
                if aux_task:
                    aux_pred = aux_pred.squeeze()
                    if greedy_aux_pred:
                        aux_pred = int(np.argmax(aux_pred))
                    else:
                        aux_pred = int(aux_pred)
                    aux_rew = trainer.get_aux_task_reward(
                        aux_pred, env.get_current_room_pred_mask())
                    cur_stats['aux_pred_rew'] += aux_rew
                    if aux_rew < 0: cur_stats['aux_pred_err'] += 1
            else:
                idx = trainer.process_observation(obs)
                action = trainer.action(
                    None if greedy_execution else 1.0)  # use gumbel noise
            # environment step
            obs, rew, done, info = env.step(action)
            if store_history:
                cur_infos.append(proc_info(info))
                #cur_images.append(env.render(renderMapLoc=env.cam_info['loc'], display=False))
            if model_name != 'rnn': obs = obs.transpose([1, 0, 2])
            cur_dist = info['dist']
            if cur_dist == 0:
                cur_stats['good'] += 1
                episode_good[-1] = 1
            t += 1
            if cur_dist < cur_stats['best_dist']:
                cur_stats['best_dist'] = cur_dist
            episode_step += 1
            # collect experience
            trainer.process_experience(idx, action, rew, done,
                                       (_st + 1 >= max_episode_len), info)
            if done:
                if rew > 5:  # magic number:
                    episode_success[-1] = 1
                    cur_stats['success'] = 1
                cur_stats['length'] = episode_step
                if aux_task:
                    cur_stats['aux_pred_err'] /= episode_step
                    cur_stats['aux_pred_rew'] /= episode_step
                break
        if store_history:
            cur_stats['infos'] = cur_infos
        episode_stats.append(cur_stats)

        dur = time.time() - elap
        logger.print('Episode#%d, Elapsed = %.3f min' % (it + 1, dur / 60))
        if multi_target:
            logger.print('  ---> Target Room = {}'.format(cur_stats['target']))
        logger.print('  ---> Total Samples = {}'.format(t))
        logger.print('  ---> Success = %d  (rate = %.3f)' %
                     (cur_stats['success'], np.mean(episode_success)))
        logger.print(
            '  ---> Times of Reaching Target Room = %d  (rate = %.3f)' %
            (cur_stats['good'], np.mean(episode_good)))
        logger.print('  ---> Best Distance = %d' % cur_stats['best_dist'])
        logger.print('  ---> Birth-place Distance = %d' % cur_stats['optstep'])
        if aux_task:
            logger.print(
                '    >>>>>> Aux-Task: Avg Rew = %.4f, Avg Err = %.4f' %
                (cur_stats['aux_pred_rew'], cur_stats['aux_pred_err']))

    logger.print('######## Final Stats ###########')
    logger.print('Success Rate = %.3f' % np.mean(episode_success))
    logger.print(
        '> Avg Ep-Length per Success = %.3f' %
        np.mean([s['length'] for s in episode_stats if s['success'] > 0]))
    logger.print(
        '> Avg Birth-Meters per Success = %.3f' %
        np.mean([s['meters'] for s in episode_stats if s['success'] > 0]))
    logger.print('Reaching Target Rate = %.3f' % np.mean(episode_good))
    logger.print('> Avg Ep-Length per Target Reach = %.3f' %
                 np.mean([s['length']
                          for s in episode_stats if s['good'] > 0]))
    logger.print('> Avg Birth-Meters per Target Reach = %.3f' %
                 np.mean([s['meters']
                          for s in episode_stats if s['good'] > 0]))
    if multi_target:
        all_targets = list(set([s['target'] for s in episode_stats]))
        for tar in all_targets:
            n = sum([1.0 for s in episode_stats if s['target'] == tar])
            succ = [
                float(s['success'] > 0) for s in episode_stats
                if s['target'] == tar
            ]
            good = [
                float(s['good'] > 0) for s in episode_stats
                if s['target'] == tar
            ]
            length = [s['length'] for s in episode_stats if s['target'] == tar]
            meters = [s['meters'] for s in episode_stats if s['target'] == tar]
            good_len = np.mean([l for l, g in zip(length, good) if g > 0.5])
            succ_len = np.mean([l for l, s in zip(length, succ) if s > 0.5])
            good_mts = np.mean([l for l, g in zip(meters, good) if g > 0.5])
            succ_mts = np.mean([l for l, s in zip(meters, succ) if s > 0.5])
            logger.print(
                '>>>>> Multi-Target <%s>: Rate = %.3f (n=%d), Good = %.3f (AvgLen=%.3f; Mts=%.3f), Succ = %.3f (AvgLen=%.3f; Mts=%.3f)'
                % (tar, n / len(episode_stats), n, np.mean(good), good_len,
                   good_mts, np.mean(succ), succ_len, succ_mts))

    if aux_task:
        logger.print(
            ' -->>> Auxiliary-Task: Mean Episode Avg Rew = %.6f, Mean Episode Avg Err = %.6f'
            % (np.mean([float(s['aux_pred_rew']) for s in episode_stats]),
               np.mean([float(s['aux_pred_err']) for s in episode_stats])))

    return episode_stats
예제 #6
0

if __name__ == '__main__':
    args = parse_args()
    assert (args.warmstart is None) or (os.path.exists(
        args.warmstart)), 'Model File Not Exists!'

    if args.aux_task:
        assert args.algo == 'a3c', 'Auxiliary Task is only supprted for <--algo a3c>'

    common.set_house_IDs(args.env_set, ensure_kitchen=(not args.multi_target))
    print('>> Environment Set = <%s>, Total %d Houses!' %
          (args.env_set, len(common.all_houseIDs)))

    if args.object_target:
        common.ensure_object_targets()

    if not os.path.exists(args.log_dir):
        print('Directory <{}> does not exist! Creating directory ...'.format(
            args.log_dir))
        os.makedirs(args.log_dir)

    if args.action_dim is not None:
        common.action_shape = (args.action_dim, 2)
        print('degree of freedom of the action set to <{}>'.format(
            args.action_dim))

    if args.warmstart is None:
        model_name = 'random'
    elif args.algo in ['a2c', 'a3c']:
        model_name = 'rnn'
예제 #7
0
def evaluate(args):

    elap = time.time()

    # Do not need to log detailed computation stats
    common.debugger = utils.FakeLogger()

    # ensure observation shape
    common.process_observation_shape(
        'rnn',
        args['resolution'],
        segmentation_input=args['segmentation_input'],
        depth_input=args['depth_input'],
        history_frame_len=1,
        target_mask_input=args['target_mask_input'])

    fixed_target = args['fixed_target']
    if (fixed_target is not None) and (fixed_target != 'any-room') and (
            fixed_target != 'any-object'):
        assert fixed_target in common.all_target_instructions, 'invalid fixed target <{}>'.format(
            fixed_target)

    __backup_CFG = common.CFG.copy()
    if fixed_target == 'any-room':
        common.ensure_object_targets(False)

    if args['hardness'] is not None:
        print('>>>> Hardness = {}'.format(args['hardness']))
    if args['max_birthplace_steps'] is not None:
        print('>>>> Max BirthPlace Steps = {}'.format(
            args['max_birthplace_steps']))
    set_seed(args['seed'])
    task = common.create_env(args['house'],
                             task_name=args['task_name'],
                             false_rate=args['false_rate'],
                             hardness=args['hardness'],
                             max_birthplace_steps=args['max_birthplace_steps'],
                             success_measure=args['success_measure'],
                             depth_input=args['depth_input'],
                             target_mask_input=args['target_mask_input'],
                             segment_input=args['segmentation_input'],
                             genRoomTypeMap=False,
                             cacheAllTarget=args['multi_target'],
                             render_device=args['render_gpu'],
                             use_discrete_action=True,
                             include_object_target=args['object_target']
                             and (fixed_target != 'any-room'),
                             include_outdoor_target=args['outdoor_target'],
                             discrete_angle=True,
                             min_birthplace_grids=args['min_birthplace_grids'])

    if (fixed_target is not None) and (fixed_target != 'any-room') and (
            fixed_target != 'any-object'):
        task.reset_target(fixed_target)

    if fixed_target == 'any-room':
        common.CFG = __backup_CFG
        common.ensure_object_targets(True)

    # create semantic classifier
    if args['semantic_dir'] is not None:
        assert os.path.exists(
            args['semantic_dir']
        ), '[Error] Semantic Dir <{}> not exists!'.format(args['semantic_dir'])
        assert not args[
            'object_target'], '[ERROR] currently do not support --object-target!'
        print('Loading Semantic Oracle from dir <{}>...'.format(
            args['semantic_dir']))
        if args['semantic_gpu'] is None:
            args['semantic_gpu'] = common.get_gpus_for_rendering()[0]
        oracle = SemanticOracle(model_dir=args['semantic_dir'],
                                model_device=args['semantic_gpu'],
                                include_object=args['object_target'])
        oracle_func = OracleFunction(
            oracle,
            threshold=args['semantic_threshold'],
            filter_steps=args['semantic_filter_steps'])
    else:
        oracle_func = None

    # create motion
    motion = create_motion(args, task, oracle_func)

    logger = utils.MyLogger(args['log_dir'], True)
    logger.print('Start Evaluating ...')

    episode_success = []
    episode_good = []
    episode_stats = []
    t = 0
    seed = args['seed']
    max_episode_len = args['max_episode_len']

    plan_req = args['plan_dist_iters'] if 'plan_dist_iters' in args else None

    for it in range(args['max_iters']):
        cur_infos = []
        motion.reset()
        set_seed(seed + it + 1)  # reset seed
        if plan_req is not None:
            while True:
                task.reset(target=fixed_target)
                m = len(task.get_optimal_plan())
                if (m in plan_req) and plan_req[m] > 0:
                    break
            plan_req[m] -= 1
        else:
            task.reset(target=fixed_target)
        info = task.info

        episode_success.append(0)
        episode_good.append(0)
        cur_stats = dict(best_dist=info['dist'],
                         success=0,
                         good=0,
                         reward=0,
                         target=task.get_current_target(),
                         meters=task.info['meters'],
                         optstep=task.info['optsteps'],
                         length=max_episode_len,
                         images=None)
        if hasattr(task.house, "_id"):
            cur_stats['world_id'] = task.house._id

        store_history = args['store_history']
        if store_history:
            cur_infos.append(proc_info(task.info))

        if args['temperature'] is not None:
            ep_data = motion.run(task.get_current_target(),
                                 max_episode_len,
                                 temperature=args['temperature'])
        else:
            ep_data = motion.run(task.get_current_target(), max_episode_len)

        for dat in ep_data:
            info = dat[4]
            if store_history:
                cur_infos.append(proc_info(info))
            cur_dist = info['dist']
            if cur_dist == 0:
                cur_stats['good'] += 1
                episode_good[-1] = 1
            if cur_dist < cur_stats['best_dist']:
                cur_stats['best_dist'] = cur_dist

        episode_step = len(ep_data)
        if ep_data[-1][3]:  # done
            if ep_data[-1][2] > 5:  # magic number:
                episode_success[-1] = 1
                cur_stats['success'] = 1
        cur_stats['length'] = episode_step  # store length

        if store_history:
            cur_stats['infos'] = cur_infos
        episode_stats.append(cur_stats)

        dur = time.time() - elap
        logger.print('Episode#%d, Elapsed = %.3f min' % (it + 1, dur / 60))
        if args['multi_target']:
            logger.print('  ---> Target Room = {}'.format(cur_stats['target']))
        logger.print('  ---> Total Samples = {}'.format(t))
        logger.print('  ---> Success = %d  (rate = %.3f)' %
                     (cur_stats['success'], np.mean(episode_success)))
        logger.print(
            '  ---> Times of Reaching Target Room = %d  (rate = %.3f)' %
            (cur_stats['good'], np.mean(episode_good)))
        logger.print('  ---> Best Distance = %d' % cur_stats['best_dist'])
        logger.print('  ---> Birth-place Distance = %d' % cur_stats['optstep'])

    logger.print('######## Final Stats ###########')
    logger.print('Success Rate = %.3f' % np.mean(episode_success))
    logger.print(
        '> Avg Ep-Length per Success = %.3f' %
        np.mean([s['length'] for s in episode_stats if s['success'] > 0]))
    logger.print(
        '> Avg Birth-Meters per Success = %.3f' %
        np.mean([s['meters'] for s in episode_stats if s['success'] > 0]))
    logger.print('Reaching Target Rate = %.3f' % np.mean(episode_good))
    logger.print('> Avg Ep-Length per Target Reach = %.3f' %
                 np.mean([s['length']
                          for s in episode_stats if s['good'] > 0]))
    logger.print('> Avg Birth-Meters per Target Reach = %.3f' %
                 np.mean([s['meters']
                          for s in episode_stats if s['good'] > 0]))
    if args['multi_target']:
        all_targets = list(set([s['target'] for s in episode_stats]))
        for tar in all_targets:
            n = sum([1.0 for s in episode_stats if s['target'] == tar])
            succ = [
                float(s['success'] > 0) for s in episode_stats
                if s['target'] == tar
            ]
            good = [
                float(s['good'] > 0) for s in episode_stats
                if s['target'] == tar
            ]
            length = [s['length'] for s in episode_stats if s['target'] == tar]
            meters = [s['meters'] for s in episode_stats if s['target'] == tar]
            good_len = np.mean([l for l, g in zip(length, good) if g > 0.5])
            succ_len = np.mean([l for l, s in zip(length, succ) if s > 0.5])
            good_mts = np.mean([l for l, g in zip(meters, good) if g > 0.5])
            succ_mts = np.mean([l for l, s in zip(meters, succ) if s > 0.5])
            logger.print(
                '>>>>> Multi-Target <%s>: Rate = %.3f (n=%d), Good = %.3f (AvgLen=%.3f; Mts=%.3f), Succ = %.3f (AvgLen=%.3f; Mts=%.3f)'
                % (tar, n / len(episode_stats), n, np.mean(good), good_len,
                   good_mts, np.mean(succ), succ_len, succ_mts))

    return episode_stats
예제 #8
0
def learn_controller(args):

    elap = time.time()

    # Do not need to log detailed computation stats
    common.debugger = utils.FakeLogger()

    if args['object_target']:
        common.ensure_object_targets()

    set_seed(args['seed'])
    task = common.create_env(args['house'],
                             task_name=args['task_name'],
                             false_rate=args['false_rate'],
                             success_measure=args['success_measure'],
                             depth_input=args['depth_input'],
                             target_mask_input=args['target_mask_input'],
                             segment_input=args['segmentation_input'],
                             cacheAllTarget=True,
                             render_device=args['render_gpu'],
                             use_discrete_action=True,
                             include_object_target=args['object_target'],
                             include_outdoor_target=args['outdoor_target'],
                             discrete_angle=True)

    # create motion
    __controller_warmstart = args['warmstart']
    args['warmstart'] = args['motion_warmstart']
    motion = create_motion(args, task)
    args['warmstart'] = __controller_warmstart

    # logger
    logger = utils.MyLogger(args['save_dir'], True)

    logger.print("> Planner Units = {}".format(args['units']))
    logger.print("> Max Planner Steps = {}".format(args['max_planner_steps']))
    logger.print("> Max Exploration Steps = {}".format(args['max_exp_steps']))
    logger.print("> Reward = {} & {}".format(args['time_penalty'],
                                             args['success_reward']))

    # Planner Learning
    logger.print('Start RNN Planner Learning ...')

    planner = RNNPlanner(motion, args['units'], args['warmstart'])

    fixed_target = None
    if args['only_eval_room']:
        fixed_target = 'any-room'
    elif args['only_eval_object']:
        fixed_target = 'any-object'
    train_stats, eval_stats = \
        planner.learn(args['iters'], args['max_episode_len'],
                      target=fixed_target,
                      motion_steps=args['max_exp_steps'],
                      planner_steps=args['max_planner_steps'],
                      batch_size=args['batch_size'],
                      lrate=args['lrate'], grad_clip=args['grad_clip'],
                      weight_decay=args['weight_decay'], gamma=args['gamma'],
                      entropy_penalty=args['entropy_penalty'],
                      save_dir=args['save_dir'],
                      report_rate=5, eval_rate=20, save_rate=100,
                      logger=logger, seed=args['seed'])

    logger.print('######## Done ###########')
    filename = args['save_dir']
    if filename[-1] != '/': filename = filename + '/'
    filename = filename + 'train_stats.pkl'
    with open(filename, 'wb') as f:
        pickle.dump([train_stats, eval_stats], f)
    logger.print('  --> Training Stats Saved to <{}>!'.format(filename))
    return planner
예제 #9
0
def evaluate(args, data_saver=None):

    args['segment_input'] = args['segmentation_input']

    backup_rate = args['backup_rate']

    elap = time.time()

    # Do not need to log detailed computation stats
    common.debugger = utils.FakeLogger()

    # ensure observation shape
    common.process_observation_shape(
        'rnn',
        args['resolution'],
        args['segmentation_input'],
        args['depth_input'],
        target_mask_input=args['target_mask_input'])

    fixed_target = args['fixed_target']
    if (fixed_target is not None) and (fixed_target != 'any-room') and (
            fixed_target != 'any-object'):
        assert fixed_target in common.n_target_instructions, 'invalid fixed target <{}>'.format(
            fixed_target)

    __backup_CFG = common.CFG.copy()
    if fixed_target == 'any-room':
        common.ensure_object_targets(False)

    if args['hardness'] is not None:
        print('>>>> Hardness = {}'.format(args['hardness']))
    if args['max_birthplace_steps'] is not None:
        print('>>>> Max BirthPlace Steps = {}'.format(
            args['max_birthplace_steps']))
    set_seed(args['seed'])
    task = common.create_env(args['house'],
                             task_name=args['task_name'],
                             false_rate=args['false_rate'],
                             hardness=args['hardness'],
                             max_birthplace_steps=args['max_birthplace_steps'],
                             success_measure=args['success_measure'],
                             depth_input=args['depth_input'],
                             target_mask_input=args['target_mask_input'],
                             segment_input=args['segmentation_input'],
                             genRoomTypeMap=False,
                             cacheAllTarget=args['multi_target'],
                             render_device=args['render_gpu'],
                             use_discrete_action=True,
                             include_object_target=args['object_target']
                             and (fixed_target != 'any-room'),
                             include_outdoor_target=args['outdoor_target'],
                             discrete_angle=True,
                             min_birthplace_grids=args['min_birthplace_grids'])

    if (fixed_target is not None) and (fixed_target != 'any-room') and (
            fixed_target != 'any-object'):
        task.reset_target(fixed_target)

    if fixed_target == 'any-room':
        common.CFG = __backup_CFG
        common.ensure_object_targets(True)

    # logger
    logger = utils.MyLogger(args['log_dir'], True)
    logger.print('Start Evaluating ...')

    # create semantic classifier
    if args['semantic_dir'] is not None:
        assert os.path.exists(
            args['semantic_dir']
        ), '[Error] Semantic Dir <{}> not exists!'.format(args['semantic_dir'])
        assert not args[
            'object_target'], '[ERROR] currently do not support --object-target!'
        print('Loading Semantic Oracle from dir <{}>...'.format(
            args['semantic_dir']))
        if args['semantic_gpu'] is None:
            args['semantic_gpu'] = common.get_gpus_for_rendering()[0]
        oracle = SemanticOracle(model_dir=args['semantic_dir'],
                                model_device=args['semantic_gpu'],
                                include_object=args['object_target'])
        oracle_func = OracleFunction(
            oracle,
            threshold=args['semantic_threshold'],
            filter_steps=args['semantic_filter_steps'],
            batched_size=args['semantic_batch_size'])
    else:
        oracle_func = None

    # create motion
    motion = create_motion(args, task, oracle_func=oracle_func)
    if args['motion'] == 'random':
        motion.set_skilled_rate(args['random_motion_skill'])
    flag_interrupt = args['interruptive_motion']

    # create planner
    graph = None
    max_motion_steps = args['n_exp_steps']
    if (args['planner'] == None) or (args['planner'] == 'void'):
        graph = VoidPlanner(motion)
    elif args['planner'] == 'oracle':
        graph = OraclePlanner(motion)
    elif args['planner'] == 'rnn':
        #assert False, 'Currently only support Graph-planner'
        graph = RNNPlanner(motion,
                           args['planner_units'],
                           args['planner_filename'],
                           oracle_func=oracle_func)
    else:
        graph = GraphPlanner(motion)
        if not args['outdoor_target']:
            graph.add_excluded_target('outdoor')
        filename = args['planner_filename']
        if filename == 'None': filename = None
        if filename is not None:
            logger.print(' > Loading Graph from file = <{}>'.format(filename))
            with open(filename, 'rb') as f:
                _params = pickle.load(f)
            graph.set_parameters(_params)
        # hack
        if args['planner_obs_noise'] is not None:
            graph.set_param(-1, args['planner_obs_noise'])  # default 0.95

    episode_success = []
    episode_good = []
    episode_stats = []
    t = 0
    seed = args['seed']
    max_episode_len = args['max_episode_len']

    plan_req = args['plan_dist_iters'] if 'plan_dist_iters' in args else None

    ####################
    accu_plan_time = 0
    accu_exe_time = 0
    accu_mask_time = 0
    ####################

    for it in range(args['max_iters']):

        if (it > 0) and (backup_rate > 0) and (it % backup_rate
                                               == 0) and (data_saver
                                                          is not None):
            data_saver.save(episode_stats, ep_id=it)

        cur_infos = []
        motion.reset()
        set_seed(seed + it + 1)  # reset seed
        if plan_req is not None:
            while True:
                task.reset(target=fixed_target)
                m = len(task.get_optimal_plan())
                if (m in plan_req) and plan_req[m] > 0:
                    break
            plan_req[m] -= 1
        else:
            task.reset(target=fixed_target)
        info = task.info

        episode_success.append(0)
        episode_good.append(0)
        task_target = task.get_current_target()
        cur_stats = dict(best_dist=info['dist'],
                         success=0,
                         good=0,
                         reward=0,
                         target=task_target,
                         plan=[],
                         meters=task.info['meters'],
                         optstep=task.info['optsteps'],
                         length=max_episode_len,
                         images=None)
        if hasattr(task.house, "_id"):
            cur_stats['world_id'] = task.house._id

        store_history = args['store_history']
        if store_history:
            cur_infos.append(proc_info(task.info))

        episode_step = 0

        # reset planner
        if graph is not None:
            graph.reset()

        while episode_step < max_episode_len:
            if flag_interrupt and motion.is_interrupt():
                graph_target = task.get_current_target()
            else:
                # TODO #####################
                tt = time.time()
                mask_feat = oracle_func.get(
                    task
                ) if oracle_func is not None else task.get_feature_mask()
                accu_mask_time += time.time() - tt
                tt = time.time()
                graph_target = graph.plan(mask_feat, task_target)
                accu_plan_time += time.time() - tt
                ################################
            graph_target_id = common.target_instruction_dict[graph_target]
            allowed_steps = min(max_episode_len - episode_step,
                                max_motion_steps)

            ###############
            # TODO
            tt = time.time()
            motion_data = motion.run(graph_target, allowed_steps)
            accu_exe_time += time.time() - tt

            cur_stats['plan'].append(
                (graph_target, len(motion_data),
                 (motion_data[-1][0][graph_target_id] > 0)))

            # store stats
            for dat in motion_data:
                info = dat[4]
                if store_history:
                    cur_infos.append(proc_info(info))
                cur_dist = info['dist']
                if cur_dist == 0:
                    cur_stats['good'] += 1
                    episode_good[-1] = 1
                if cur_dist < cur_stats['best_dist']:
                    cur_stats['best_dist'] = cur_dist

            # update graph
            ## TODO ############
            tt = time.time()
            graph.observe(motion_data, graph_target)
            accu_plan_time += time.time() - tt

            episode_step += len(motion_data)

            # check done
            if motion_data[-1][3]:
                if motion_data[-1][2] > 5:  # magic number
                    episode_success[-1] = 1
                    cur_stats['success'] = 1
                break

        cur_stats['length'] = episode_step  # store length

        if store_history:
            cur_stats['infos'] = cur_infos
        episode_stats.append(cur_stats)

        dur = time.time() - elap
        logger.print('Episode#%d, Elapsed = %.3f min' % (it + 1, dur / 60))
        #TODO #################
        logger.print(' >>> Mask Time = %.4f min' % (accu_mask_time / 60))
        logger.print(' >>> Plan Time = %.4f min' % (accu_plan_time / 60))
        logger.print(' >>> Motion Time = %.4f min' % (accu_exe_time / 60))
        if args['multi_target']:
            logger.print('  ---> Target Room = {}'.format(cur_stats['target']))
        logger.print('  ---> Total Samples = {}'.format(t))
        logger.print('  ---> Success = %d  (rate = %.3f)' %
                     (cur_stats['success'], np.mean(episode_success)))
        logger.print(
            '  ---> Times of Reaching Target Room = %d  (rate = %.3f)' %
            (cur_stats['good'], np.mean(episode_good)))
        logger.print('  ---> Best Distance = %d' % cur_stats['best_dist'])
        logger.print('  ---> Birth-place Meters = %.4f (optstep = %d)' %
                     (cur_stats['meters'], cur_stats['optstep']))
        logger.print('  ---> Planner Results = {}'.format(cur_stats['plan']))

    logger.print('######## Final Stats ###########')
    logger.print('Success Rate = %.3f' % np.mean(episode_success))
    logger.print(
        '> Avg Ep-Length per Success = %.3f' %
        np.mean([s['length'] for s in episode_stats if s['success'] > 0]))
    logger.print(
        '> Avg Birth-Meters per Success = %.3f' %
        np.mean([s['meters'] for s in episode_stats if s['success'] > 0]))
    logger.print('Reaching Target Rate = %.3f' % np.mean(episode_good))
    logger.print('> Avg Ep-Length per Target Reach = %.3f' %
                 np.mean([s['length']
                          for s in episode_stats if s['good'] > 0]))
    logger.print('> Avg Birth-Meters per Target Reach = %.3f' %
                 np.mean([s['meters']
                          for s in episode_stats if s['good'] > 0]))
    if args['multi_target']:
        all_targets = list(set([s['target'] for s in episode_stats]))
        for tar in all_targets:
            n = sum([1.0 for s in episode_stats if s['target'] == tar])
            succ = [
                float(s['success'] > 0) for s in episode_stats
                if s['target'] == tar
            ]
            good = [
                float(s['good'] > 0) for s in episode_stats
                if s['target'] == tar
            ]
            length = [s['length'] for s in episode_stats if s['target'] == tar]
            meters = [s['meters'] for s in episode_stats if s['target'] == tar]
            good_len = np.mean([l for l, g in zip(length, good) if g > 0.5])
            succ_len = np.mean([l for l, s in zip(length, succ) if s > 0.5])
            good_mts = np.mean([l for l, g in zip(meters, good) if g > 0.5])
            succ_mts = np.mean([l for l, s in zip(meters, succ) if s > 0.5])
            logger.print(
                '>>>>> Multi-Target <%s>: Rate = %.3f (n=%d), Good = %.3f (AvgLen=%.3f; Mts=%.3f), Succ = %.3f (AvgLen=%.3f; Mts=%.3f)'
                % (tar, n / len(episode_stats), n, np.mean(good), good_len,
                   good_mts, np.mean(succ), succ_len, succ_mts))

    return episode_stats