コード例 #1
0
def create_motion(args, task, oracle_func=None):
    if args['motion'] == 'rnn':
        if (args['warmstart_dict'] is not None) and os.path.isfile(
                args['warmstart_dict']):
            with open(args['warmstart_dict'], 'r') as f:
                trainer_args = json.load(f)
        else:
            trainer_args = args
        common.process_observation_shape(
            'rnn',
            trainer_args['resolution_level'],
            segmentation_input=trainer_args['segment_input'],
            depth_input=trainer_args['depth_input'],
            history_frame_len=1,
            target_mask_input=trainer_args['target_mask_input'])
        import zmq_train
        trainer = zmq_train.create_zmq_trainer('a3c', 'rnn', trainer_args)
        model_file = args['warmstart']
        if model_file is not None:
            trainer.load(model_file)
        trainer.eval()
        motion = RNNMotion(task,
                           trainer,
                           pass_target=args['multi_target'],
                           term_measure=args['terminate_measure'],
                           oracle_func=oracle_func)
    elif args['motion'] == 'random':
        motion = RandomMotion(task,
                              None,
                              term_measure=args['terminate_measure'],
                              oracle_func=oracle_func)
    elif args['motion'] == 'fake':
        motion = FakeMotion(task,
                            None,
                            term_measure=args['terminate_measure'],
                            oracle_func=oracle_func)
    else:  # mixture motion
        mixture_dict_file = args['mixture_motion_dict']
        try:
            with open(mixture_dict_file, 'r') as f:
                arg_dict = json.load(f)
        except Exception as e:
            print('Invalid Mixture Motion Dict!! file = <{}>'.format(
                mixture_dict_file))
            raise e
        trainer_dict, pass_tar_dict, obs_mode_dict = create_mixture_motion_trainer_dict(
            arg_dict)
        motion = MixMotion(task,
                           trainer_dict,
                           pass_tar_dict,
                           term_measure=args['terminate_measure'],
                           obs_mode=obs_mode_dict,
                           oracle_func=oracle_func)
        common.ensure_object_targets(args['object_target'])

    if ('force_oracle_done' in args) and args['force_oracle_done']:
        motion.set_force_oracle_done(True)

    return motion
コード例 #2
0
ファイル: zmq_train.py プロジェクト: qiming-zou/HouseNavAgent
def train(args=None, warmstart=None):

    # Process Observation Shape
    common.process_observation_shape(model='rnn',
                                     resolution_level=args['resolution_level'],
                                     segmentation_input=args['segment_input'],
                                     depth_input=args['depth_input'],
                                     target_mask_input=args['target_mask_input'],
                                     history_frame_len=1)

    args['logger'] = utils.MyLogger(args['log_dir'], True, keep_file_handler=not args['append_file'])

    name = 'ipc://@whatever' + args['job_name']
    name2 = 'ipc://@whatever' + args['job_name'] + '2'
    n_proc = args['n_proc']
    config = create_zmq_config(args)
    procs = [ZMQSimulator(k, name, name2, config) for k in range(n_proc)]
    [k.start() for k in procs]
    ensure_proc_terminate(procs)

    trainer = create_zmq_trainer(args['algo'], model='rnn', args=args)
    if warmstart is not None:
        if os.path.exists(warmstart):
            print('Warmstarting from <{}> ...'.format(warmstart))
            trainer.load(warmstart)
        else:
            save_dir = args['save_dir']
            print('Warmstarting from save_dir <{}> with version <{}> ...'.format(save_dir, warmstart))
            trainer.load(save_dir, warmstart)


    master = ZMQMaster(name, name2, trainer=trainer, config=args)

    try:
        # both loops must be running
        print('Start Iterations ....')
        send_thread = threading.Thread(target=master.send_loop, daemon=True)
        send_thread.start()
        master.recv_loop()
        print('Done!')
        trainer.save(args['save_dir'], version='final')
    except KeyboardInterrupt:
        trainer.save_all(args['save_dir'], version='interrupt')
        raise
コード例 #3
0
def create_mixture_motion_trainer_dict(arg_dict):
    import zmq_train
    trainer_dict = dict()
    pass_tar_dict = dict()
    obs_mode_dict = dict(
    )  # segment_input, depth_signal=True, target_mask_signal=False, joint_visual_signal=False
    loaded_model = dict()
    for target in all_allowed_targets:
        assert target in arg_dict, '[MixtureMotion] Invalid <arg_dict>! Key=<{}> does not exist!'.format(
            target)
        args = arg_dict[target]
        model_file = args['warmstart']
        assert (model_file is not None) and os.path.exists(model_file), \
            '[MixtureMotion] model file <{}> for target <{}> does not exist!!'.format(model_file, target)
        if model_file in loaded_model:
            trainer_dict[target] = trainer_dict[loaded_model[model_file]]
            pass_tar_dict[target] = pass_tar_dict[loaded_model[model_file]]
            obs_mode_dict[target] = obs_mode_dict[loaded_model[model_file]]
            continue
        common.process_observation_shape(
            'rnn',
            args['resolution_level'],
            segmentation_input=args['segment_input'],
            depth_input=args['depth_input'],
            history_frame_len=1,
            target_mask_input=args['target_mask_input'])
        # ensure object target
        __backup_CFG = common.CFG.copy()
        common.ensure_object_targets(args['object_target'])
        trainer = zmq_train.create_zmq_trainer('a3c', 'rnn', args)
        common.CFG = __backup_CFG  # backup
        # load model
        trainer.load(model_file)
        trainer.eval()
        loaded_model[model_file] = target
        trainer_dict[target] = trainer
        pass_tar_dict[target] = args['multi_target']
        obs_mode_dict[target] = dict(
            segment_input=(args['segment_input'] != 'none'),
            depth_signal=args['depth_input'],
            target_mask_signal=args['target_mask_input'],
            joint_visual_signal=(args['segment_input'] == 'joint'))
    return trainer_dict, pass_tar_dict, obs_mode_dict
コード例 #4
0
def evaluate(args):

    elap = time.time()

    # Do not need to log detailed computation stats
    common.debugger = utils.FakeLogger()

    # ensure observation shape
    common.process_observation_shape(
        'rnn',
        args['resolution'],
        segmentation_input=args['segmentation_input'],
        depth_input=args['depth_input'],
        history_frame_len=1,
        target_mask_input=args['target_mask_input'])

    fixed_target = args['fixed_target']
    if (fixed_target is not None) and (fixed_target != 'any-room') and (
            fixed_target != 'any-object'):
        assert fixed_target in common.all_target_instructions, 'invalid fixed target <{}>'.format(
            fixed_target)

    __backup_CFG = common.CFG.copy()
    if fixed_target == 'any-room':
        common.ensure_object_targets(False)

    if args['hardness'] is not None:
        print('>>>> Hardness = {}'.format(args['hardness']))
    if args['max_birthplace_steps'] is not None:
        print('>>>> Max BirthPlace Steps = {}'.format(
            args['max_birthplace_steps']))
    set_seed(args['seed'])
    task = common.create_env(args['house'],
                             task_name=args['task_name'],
                             false_rate=args['false_rate'],
                             hardness=args['hardness'],
                             max_birthplace_steps=args['max_birthplace_steps'],
                             success_measure=args['success_measure'],
                             depth_input=args['depth_input'],
                             target_mask_input=args['target_mask_input'],
                             segment_input=args['segmentation_input'],
                             genRoomTypeMap=False,
                             cacheAllTarget=args['multi_target'],
                             render_device=args['render_gpu'],
                             use_discrete_action=True,
                             include_object_target=args['object_target']
                             and (fixed_target != 'any-room'),
                             include_outdoor_target=args['outdoor_target'],
                             discrete_angle=True,
                             min_birthplace_grids=args['min_birthplace_grids'])

    if (fixed_target is not None) and (fixed_target != 'any-room') and (
            fixed_target != 'any-object'):
        task.reset_target(fixed_target)

    if fixed_target == 'any-room':
        common.CFG = __backup_CFG
        common.ensure_object_targets(True)

    # create semantic classifier
    if args['semantic_dir'] is not None:
        assert os.path.exists(
            args['semantic_dir']
        ), '[Error] Semantic Dir <{}> not exists!'.format(args['semantic_dir'])
        assert not args[
            'object_target'], '[ERROR] currently do not support --object-target!'
        print('Loading Semantic Oracle from dir <{}>...'.format(
            args['semantic_dir']))
        if args['semantic_gpu'] is None:
            args['semantic_gpu'] = common.get_gpus_for_rendering()[0]
        oracle = SemanticOracle(model_dir=args['semantic_dir'],
                                model_device=args['semantic_gpu'],
                                include_object=args['object_target'])
        oracle_func = OracleFunction(
            oracle,
            threshold=args['semantic_threshold'],
            filter_steps=args['semantic_filter_steps'])
    else:
        oracle_func = None

    # create motion
    motion = create_motion(args, task, oracle_func)

    logger = utils.MyLogger(args['log_dir'], True)
    logger.print('Start Evaluating ...')

    episode_success = []
    episode_good = []
    episode_stats = []
    t = 0
    seed = args['seed']
    max_episode_len = args['max_episode_len']

    plan_req = args['plan_dist_iters'] if 'plan_dist_iters' in args else None

    for it in range(args['max_iters']):
        cur_infos = []
        motion.reset()
        set_seed(seed + it + 1)  # reset seed
        if plan_req is not None:
            while True:
                task.reset(target=fixed_target)
                m = len(task.get_optimal_plan())
                if (m in plan_req) and plan_req[m] > 0:
                    break
            plan_req[m] -= 1
        else:
            task.reset(target=fixed_target)
        info = task.info

        episode_success.append(0)
        episode_good.append(0)
        cur_stats = dict(best_dist=info['dist'],
                         success=0,
                         good=0,
                         reward=0,
                         target=task.get_current_target(),
                         meters=task.info['meters'],
                         optstep=task.info['optsteps'],
                         length=max_episode_len,
                         images=None)
        if hasattr(task.house, "_id"):
            cur_stats['world_id'] = task.house._id

        store_history = args['store_history']
        if store_history:
            cur_infos.append(proc_info(task.info))

        if args['temperature'] is not None:
            ep_data = motion.run(task.get_current_target(),
                                 max_episode_len,
                                 temperature=args['temperature'])
        else:
            ep_data = motion.run(task.get_current_target(), max_episode_len)

        for dat in ep_data:
            info = dat[4]
            if store_history:
                cur_infos.append(proc_info(info))
            cur_dist = info['dist']
            if cur_dist == 0:
                cur_stats['good'] += 1
                episode_good[-1] = 1
            if cur_dist < cur_stats['best_dist']:
                cur_stats['best_dist'] = cur_dist

        episode_step = len(ep_data)
        if ep_data[-1][3]:  # done
            if ep_data[-1][2] > 5:  # magic number:
                episode_success[-1] = 1
                cur_stats['success'] = 1
        cur_stats['length'] = episode_step  # store length

        if store_history:
            cur_stats['infos'] = cur_infos
        episode_stats.append(cur_stats)

        dur = time.time() - elap
        logger.print('Episode#%d, Elapsed = %.3f min' % (it + 1, dur / 60))
        if args['multi_target']:
            logger.print('  ---> Target Room = {}'.format(cur_stats['target']))
        logger.print('  ---> Total Samples = {}'.format(t))
        logger.print('  ---> Success = %d  (rate = %.3f)' %
                     (cur_stats['success'], np.mean(episode_success)))
        logger.print(
            '  ---> Times of Reaching Target Room = %d  (rate = %.3f)' %
            (cur_stats['good'], np.mean(episode_good)))
        logger.print('  ---> Best Distance = %d' % cur_stats['best_dist'])
        logger.print('  ---> Birth-place Distance = %d' % cur_stats['optstep'])

    logger.print('######## Final Stats ###########')
    logger.print('Success Rate = %.3f' % np.mean(episode_success))
    logger.print(
        '> Avg Ep-Length per Success = %.3f' %
        np.mean([s['length'] for s in episode_stats if s['success'] > 0]))
    logger.print(
        '> Avg Birth-Meters per Success = %.3f' %
        np.mean([s['meters'] for s in episode_stats if s['success'] > 0]))
    logger.print('Reaching Target Rate = %.3f' % np.mean(episode_good))
    logger.print('> Avg Ep-Length per Target Reach = %.3f' %
                 np.mean([s['length']
                          for s in episode_stats if s['good'] > 0]))
    logger.print('> Avg Birth-Meters per Target Reach = %.3f' %
                 np.mean([s['meters']
                          for s in episode_stats if s['good'] > 0]))
    if args['multi_target']:
        all_targets = list(set([s['target'] for s in episode_stats]))
        for tar in all_targets:
            n = sum([1.0 for s in episode_stats if s['target'] == tar])
            succ = [
                float(s['success'] > 0) for s in episode_stats
                if s['target'] == tar
            ]
            good = [
                float(s['good'] > 0) for s in episode_stats
                if s['target'] == tar
            ]
            length = [s['length'] for s in episode_stats if s['target'] == tar]
            meters = [s['meters'] for s in episode_stats if s['target'] == tar]
            good_len = np.mean([l for l, g in zip(length, good) if g > 0.5])
            succ_len = np.mean([l for l, s in zip(length, succ) if s > 0.5])
            good_mts = np.mean([l for l, g in zip(meters, good) if g > 0.5])
            succ_mts = np.mean([l for l, s in zip(meters, succ) if s > 0.5])
            logger.print(
                '>>>>> Multi-Target <%s>: Rate = %.3f (n=%d), Good = %.3f (AvgLen=%.3f; Mts=%.3f), Succ = %.3f (AvgLen=%.3f; Mts=%.3f)'
                % (tar, n / len(episode_stats), n, np.mean(good), good_len,
                   good_mts, np.mean(succ), succ_len, succ_mts))

    return episode_stats
コード例 #5
0
ファイル: eval_HRL.py プロジェクト: qiming-zou/HouseNavAgent
def evaluate(args, data_saver=None):

    args['segment_input'] = args['segmentation_input']

    backup_rate = args['backup_rate']

    elap = time.time()

    # Do not need to log detailed computation stats
    common.debugger = utils.FakeLogger()

    # ensure observation shape
    common.process_observation_shape(
        'rnn',
        args['resolution'],
        args['segmentation_input'],
        args['depth_input'],
        target_mask_input=args['target_mask_input'])

    fixed_target = args['fixed_target']
    if (fixed_target is not None) and (fixed_target != 'any-room') and (
            fixed_target != 'any-object'):
        assert fixed_target in common.n_target_instructions, 'invalid fixed target <{}>'.format(
            fixed_target)

    __backup_CFG = common.CFG.copy()
    if fixed_target == 'any-room':
        common.ensure_object_targets(False)

    if args['hardness'] is not None:
        print('>>>> Hardness = {}'.format(args['hardness']))
    if args['max_birthplace_steps'] is not None:
        print('>>>> Max BirthPlace Steps = {}'.format(
            args['max_birthplace_steps']))
    set_seed(args['seed'])
    task = common.create_env(args['house'],
                             task_name=args['task_name'],
                             false_rate=args['false_rate'],
                             hardness=args['hardness'],
                             max_birthplace_steps=args['max_birthplace_steps'],
                             success_measure=args['success_measure'],
                             depth_input=args['depth_input'],
                             target_mask_input=args['target_mask_input'],
                             segment_input=args['segmentation_input'],
                             genRoomTypeMap=False,
                             cacheAllTarget=args['multi_target'],
                             render_device=args['render_gpu'],
                             use_discrete_action=True,
                             include_object_target=args['object_target']
                             and (fixed_target != 'any-room'),
                             include_outdoor_target=args['outdoor_target'],
                             discrete_angle=True,
                             min_birthplace_grids=args['min_birthplace_grids'])

    if (fixed_target is not None) and (fixed_target != 'any-room') and (
            fixed_target != 'any-object'):
        task.reset_target(fixed_target)

    if fixed_target == 'any-room':
        common.CFG = __backup_CFG
        common.ensure_object_targets(True)

    # logger
    logger = utils.MyLogger(args['log_dir'], True)
    logger.print('Start Evaluating ...')

    # create semantic classifier
    if args['semantic_dir'] is not None:
        assert os.path.exists(
            args['semantic_dir']
        ), '[Error] Semantic Dir <{}> not exists!'.format(args['semantic_dir'])
        assert not args[
            'object_target'], '[ERROR] currently do not support --object-target!'
        print('Loading Semantic Oracle from dir <{}>...'.format(
            args['semantic_dir']))
        if args['semantic_gpu'] is None:
            args['semantic_gpu'] = common.get_gpus_for_rendering()[0]
        oracle = SemanticOracle(model_dir=args['semantic_dir'],
                                model_device=args['semantic_gpu'],
                                include_object=args['object_target'])
        oracle_func = OracleFunction(
            oracle,
            threshold=args['semantic_threshold'],
            filter_steps=args['semantic_filter_steps'],
            batched_size=args['semantic_batch_size'])
    else:
        oracle_func = None

    # create motion
    motion = create_motion(args, task, oracle_func=oracle_func)
    if args['motion'] == 'random':
        motion.set_skilled_rate(args['random_motion_skill'])
    flag_interrupt = args['interruptive_motion']

    # create planner
    graph = None
    max_motion_steps = args['n_exp_steps']
    if (args['planner'] == None) or (args['planner'] == 'void'):
        graph = VoidPlanner(motion)
    elif args['planner'] == 'oracle':
        graph = OraclePlanner(motion)
    elif args['planner'] == 'rnn':
        #assert False, 'Currently only support Graph-planner'
        graph = RNNPlanner(motion,
                           args['planner_units'],
                           args['planner_filename'],
                           oracle_func=oracle_func)
    else:
        graph = GraphPlanner(motion)
        if not args['outdoor_target']:
            graph.add_excluded_target('outdoor')
        filename = args['planner_filename']
        if filename == 'None': filename = None
        if filename is not None:
            logger.print(' > Loading Graph from file = <{}>'.format(filename))
            with open(filename, 'rb') as f:
                _params = pickle.load(f)
            graph.set_parameters(_params)
        # hack
        if args['planner_obs_noise'] is not None:
            graph.set_param(-1, args['planner_obs_noise'])  # default 0.95

    episode_success = []
    episode_good = []
    episode_stats = []
    t = 0
    seed = args['seed']
    max_episode_len = args['max_episode_len']

    plan_req = args['plan_dist_iters'] if 'plan_dist_iters' in args else None

    ####################
    accu_plan_time = 0
    accu_exe_time = 0
    accu_mask_time = 0
    ####################

    for it in range(args['max_iters']):

        if (it > 0) and (backup_rate > 0) and (it % backup_rate
                                               == 0) and (data_saver
                                                          is not None):
            data_saver.save(episode_stats, ep_id=it)

        cur_infos = []
        motion.reset()
        set_seed(seed + it + 1)  # reset seed
        if plan_req is not None:
            while True:
                task.reset(target=fixed_target)
                m = len(task.get_optimal_plan())
                if (m in plan_req) and plan_req[m] > 0:
                    break
            plan_req[m] -= 1
        else:
            task.reset(target=fixed_target)
        info = task.info

        episode_success.append(0)
        episode_good.append(0)
        task_target = task.get_current_target()
        cur_stats = dict(best_dist=info['dist'],
                         success=0,
                         good=0,
                         reward=0,
                         target=task_target,
                         plan=[],
                         meters=task.info['meters'],
                         optstep=task.info['optsteps'],
                         length=max_episode_len,
                         images=None)
        if hasattr(task.house, "_id"):
            cur_stats['world_id'] = task.house._id

        store_history = args['store_history']
        if store_history:
            cur_infos.append(proc_info(task.info))

        episode_step = 0

        # reset planner
        if graph is not None:
            graph.reset()

        while episode_step < max_episode_len:
            if flag_interrupt and motion.is_interrupt():
                graph_target = task.get_current_target()
            else:
                # TODO #####################
                tt = time.time()
                mask_feat = oracle_func.get(
                    task
                ) if oracle_func is not None else task.get_feature_mask()
                accu_mask_time += time.time() - tt
                tt = time.time()
                graph_target = graph.plan(mask_feat, task_target)
                accu_plan_time += time.time() - tt
                ################################
            graph_target_id = common.target_instruction_dict[graph_target]
            allowed_steps = min(max_episode_len - episode_step,
                                max_motion_steps)

            ###############
            # TODO
            tt = time.time()
            motion_data = motion.run(graph_target, allowed_steps)
            accu_exe_time += time.time() - tt

            cur_stats['plan'].append(
                (graph_target, len(motion_data),
                 (motion_data[-1][0][graph_target_id] > 0)))

            # store stats
            for dat in motion_data:
                info = dat[4]
                if store_history:
                    cur_infos.append(proc_info(info))
                cur_dist = info['dist']
                if cur_dist == 0:
                    cur_stats['good'] += 1
                    episode_good[-1] = 1
                if cur_dist < cur_stats['best_dist']:
                    cur_stats['best_dist'] = cur_dist

            # update graph
            ## TODO ############
            tt = time.time()
            graph.observe(motion_data, graph_target)
            accu_plan_time += time.time() - tt

            episode_step += len(motion_data)

            # check done
            if motion_data[-1][3]:
                if motion_data[-1][2] > 5:  # magic number
                    episode_success[-1] = 1
                    cur_stats['success'] = 1
                break

        cur_stats['length'] = episode_step  # store length

        if store_history:
            cur_stats['infos'] = cur_infos
        episode_stats.append(cur_stats)

        dur = time.time() - elap
        logger.print('Episode#%d, Elapsed = %.3f min' % (it + 1, dur / 60))
        #TODO #################
        logger.print(' >>> Mask Time = %.4f min' % (accu_mask_time / 60))
        logger.print(' >>> Plan Time = %.4f min' % (accu_plan_time / 60))
        logger.print(' >>> Motion Time = %.4f min' % (accu_exe_time / 60))
        if args['multi_target']:
            logger.print('  ---> Target Room = {}'.format(cur_stats['target']))
        logger.print('  ---> Total Samples = {}'.format(t))
        logger.print('  ---> Success = %d  (rate = %.3f)' %
                     (cur_stats['success'], np.mean(episode_success)))
        logger.print(
            '  ---> Times of Reaching Target Room = %d  (rate = %.3f)' %
            (cur_stats['good'], np.mean(episode_good)))
        logger.print('  ---> Best Distance = %d' % cur_stats['best_dist'])
        logger.print('  ---> Birth-place Meters = %.4f (optstep = %d)' %
                     (cur_stats['meters'], cur_stats['optstep']))
        logger.print('  ---> Planner Results = {}'.format(cur_stats['plan']))

    logger.print('######## Final Stats ###########')
    logger.print('Success Rate = %.3f' % np.mean(episode_success))
    logger.print(
        '> Avg Ep-Length per Success = %.3f' %
        np.mean([s['length'] for s in episode_stats if s['success'] > 0]))
    logger.print(
        '> Avg Birth-Meters per Success = %.3f' %
        np.mean([s['meters'] for s in episode_stats if s['success'] > 0]))
    logger.print('Reaching Target Rate = %.3f' % np.mean(episode_good))
    logger.print('> Avg Ep-Length per Target Reach = %.3f' %
                 np.mean([s['length']
                          for s in episode_stats if s['good'] > 0]))
    logger.print('> Avg Birth-Meters per Target Reach = %.3f' %
                 np.mean([s['meters']
                          for s in episode_stats if s['good'] > 0]))
    if args['multi_target']:
        all_targets = list(set([s['target'] for s in episode_stats]))
        for tar in all_targets:
            n = sum([1.0 for s in episode_stats if s['target'] == tar])
            succ = [
                float(s['success'] > 0) for s in episode_stats
                if s['target'] == tar
            ]
            good = [
                float(s['good'] > 0) for s in episode_stats
                if s['target'] == tar
            ]
            length = [s['length'] for s in episode_stats if s['target'] == tar]
            meters = [s['meters'] for s in episode_stats if s['target'] == tar]
            good_len = np.mean([l for l, g in zip(length, good) if g > 0.5])
            succ_len = np.mean([l for l, s in zip(length, succ) if s > 0.5])
            good_mts = np.mean([l for l, g in zip(meters, good) if g > 0.5])
            succ_mts = np.mean([l for l, s in zip(meters, succ) if s > 0.5])
            logger.print(
                '>>>>> Multi-Target <%s>: Rate = %.3f (n=%d), Good = %.3f (AvgLen=%.3f; Mts=%.3f), Succ = %.3f (AvgLen=%.3f; Mts=%.3f)'
                % (tar, n / len(episode_stats), n, np.mean(good), good_len,
                   good_mts, np.mean(succ), succ_len, succ_mts))

    return episode_stats