Esempio n. 1
0
def train():

    logger.configure()
    set_global_seeds(args.seed)

    directory = os.path.join(
        args.log_dir,
        '_'.join([args.env,
                  datetime.datetime.now().strftime("%m%d%H%M")]))
    if not os.path.exists(directory):
        os.makedirs(directory)
    else:
        ValueError("The directory already exists...", directory)
    json.dump(vars(args),
              open(os.path.join(directory, 'learning_prop.json'), 'w'))

    env = make_atari(args.env)
    env = bench.Monitor(env, logger.get_dir())
    env = deepq.wrap_atari_dqn(env)

    nb_test_steps = args.nb_test_steps if args.nb_test_steps > 0 else None
    if args.record == 1:
        env = Monitor(env, directory=args.log_dir)
    with tf.device(args.device):
        model = deepq.models.cnn_to_mlp(
            convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
            hiddens=[256],
            dueling=bool(args.dueling),
        )

        act, records = deepq.learn(
            env,
            q_func=model,
            lr=args.learning_rate,
            lr_decay_factor=args.learning_rate_decay_factor,
            lr_growth_factor=args.learning_rate_growth_factor,
            max_timesteps=args.nb_train_steps,
            buffer_size=args.buffer_size,
            exploration_fraction=args.eps_fraction,
            exploration_final_eps=args.eps_min,
            train_freq=4,
            print_freq=1000,
            checkpoint_freq=int(args.nb_train_steps / 10),
            learning_starts=args.nb_warmup_steps,
            target_network_update_freq=args.target_update_freq,
            gamma=0.99,
            prioritized_replay=bool(args.prioritized),
            prioritized_replay_alpha=args.prioritized_replay_alpha,
            epoch_steps=args.nb_epoch_steps,
            gpu_memory=args.gpu_memory,
            double_q=args.double_q,
            save_dir=directory,
            nb_test_steps=nb_test_steps,
            scope=args.scope,
            test_eps=args.test_eps,
        )
        print("Saving model to model.pkl")
        act.save(os.path.join(directory, "model.pkl"))
    env.close()
    plot(records, directory)
Esempio n. 2
0
def train(seed, save_dir):
    logger.configure()
    set_global_seeds(seed)

    save_dir_0 = os.path.join(save_dir, 'seed_%d' % seed)
    os.makedirs(save_dir_0)

    env = envs.make(args.env,
                    'atari',
                    record=bool(args.record),
                    directory=save_dir_0)

    nb_test_steps = args.nb_test_steps if args.nb_test_steps > 0 else None
    with tf.device(args.device):
        with tf.compat.v1.variable_scope('seed_%d' % seed):
            model = deepq.models.cnn_to_mlp(
                convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
                hiddens=[256],
                dueling=bool(args.dueling),
            )

            act = deepq.learn(
                env,
                q_func=model,
                lr=args.learning_rate,
                lr_decay_factor=args.learning_rate_decay_factor,
                lr_growth_factor=args.learning_rate_growth_factor,
                max_timesteps=args.nb_train_steps,
                buffer_size=args.buffer_size,
                exploration_fraction=args.eps_fraction,
                exploration_final_eps=args.eps_min,
                train_freq=4,
                print_freq=1000,
                checkpoint_freq=int(args.nb_train_steps / 10),
                learning_starts=args.nb_warmup_steps,
                target_network_update_freq=args.target_update_freq,
                gamma=0.99,
                prioritized_replay=bool(args.prioritized),
                prioritized_replay_alpha=args.prioritized_replay_alpha,
                scope=args.scope,
                double_q=args.double_q,
                epoch_steps=args.nb_epoch_steps,
                eval_logger=Logger(args.env,
                                   'atari',
                                   nb_test_steps=nb_test_steps,
                                   save_dir=save_dir_0,
                                   render=bool(args.render)),
                save_dir=save_dir_0,
                test_eps=args.test_eps,
                gpu_memory=args.gpu_memory,
                render=bool(args.render),
            )
            print("Saving model to model.pkl")
            act.save(os.path.join(save_dir_0, "model.pkl"))
    env.close()
    if args.record == 1:
        env.moviewriter.finish()
Esempio n. 3
0
def train():
    set_global_seeds(args.seed)
    directory = os.path.join(
        args.log_dir,
        '_'.join([args.env,
                  datetime.datetime.now().strftime("%m%d%H%M")]))
    if not os.path.exists(directory):
        os.makedirs(directory)
    else:
        ValueError("The directory already exists...", directory)
    json.dump(vars(args),
              open(os.path.join(directory, 'learning_prop.json'), 'w'))

    env = envs.make(args.env,
                    render=bool(args.render),
                    record=bool(args.record),
                    dirname=directory)

    with tf.device(args.device):
        model = deepq.models.mlp([args.num_units] * args.num_layers)
        act, records = deepq.learn(
            env,
            q_func=model,
            lr=args.learning_rate,
            lr_decay_factor=args.learning_rate_decay_factor,
            lr_growth_factor=args.learning_rate_growth_factor,
            max_timesteps=args.nb_train_steps,
            buffer_size=args.buffer_size,
            batch_size=args.batch_size,
            exploration_fraction=args.eps_fraction,
            exploration_final_eps=args.eps_min,
            target_network_update_freq=args.target_update_freq,
            print_freq=10,
            checkpoint_freq=int(args.nb_train_steps / 10),
            learning_starts=args.nb_warmup_steps,
            gamma=args.gamma,
            prioritized_replay=bool(args.prioritized),
            prioritized_replay_alpha=args.prioritized_replay_alpha,
            callback=None,  #callback,
            epoch_steps=args.nb_epoch_steps,
            gpu_memory=args.gpu_memory,
            save_dir=directory,
            double_q=args.double_q,
            nb_test_steps=args.nb_test_steps,
            test_eps=args.test_eps,
            render=bool(args.render),
        )
        print("Saving model to model.pkl")
        act.save(os.path.join(directory, "model.pkl"))
    plot(records, directory)
    memo = input("Memo for this experiment?: ")
    f = open(os.path.join(directory, "memo.txt"), 'w')
    f.write(memo)
    f.close()
    if args.record == 1:
        env.moviewriter.finish()
Esempio n. 4
0
def make_mujoco_env(env_id, seed):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    rank = MPI.COMM_WORLD.Get_rank()
    set_global_seeds(seed + 10000 * rank)
    env = gym.make(env_id)
    env = Monitor(env, os.path.join(logger.get_dir(), str(rank)))
    env.seed(seed)
    return env
Esempio n. 5
0
def make_robotics_env(env_id, seed, rank=0):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    set_global_seeds(seed)
    env = gym.make(env_id)
    env = FlattenDictWrapper(env, ['observation', 'desired_goal'])
    env = Monitor(
        env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)),
        info_keywords=('is_success',))
    env.seed(seed)
    return env
Esempio n. 6
0
def make_atari_env(env_id, num_env, seed, wrapper_kwargs=None, start_index=0):
    """
    Create a wrapped, monitored SubprocVecEnv for Atari.
    """
    if wrapper_kwargs is None: wrapper_kwargs = {}
    def make_env(rank): # pylint: disable=C0111
        def _thunk():
            env = make_atari(env_id)
            env.seed(seed + rank)
            env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
            return wrap_deepmind(env, **wrapper_kwargs)
        return _thunk
    set_global_seeds(seed)
    return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)])
Esempio n. 7
0
def train(seed, save_dir):
    set_global_seeds(seed)
    save_dir_0 = os.path.join(save_dir, 'seed_%d' % seed)
    os.makedirs(save_dir_0)

    env = envs.make(args.env, 'classic_control')
    with tf.device(args.device):
        with tf.compat.v1.variable_scope('seed_%d' % seed):
            model = models.mlp([args.num_units] * args.num_layers,
                               init_mean=args.init_mean,
                               init_sd=args.init_sd)
            act = deepadfq.learn(
                env,
                q_func=model,
                lr=args.learning_rate,
                lr_decay_factor=args.learning_rate_decay_factor,
                lr_growth_factor=args.learning_rate_growth_factor,
                max_timesteps=args.nb_train_steps,
                buffer_size=args.buffer_size,
                batch_size=args.batch_size,
                exploration_fraction=args.eps_fraction,
                exploration_final_eps=args.eps_min,
                target_network_update_freq=args.target_update_freq,
                print_freq=args.nb_epoch_steps,
                checkpoint_freq=int(args.nb_train_steps / 5),
                learning_starts=args.nb_warmup_steps,
                gamma=args.gamma,
                prioritized_replay=bool(args.prioritized),
                prioritized_replay_alpha=args.prioritized_replay_alpha,
                callback=None,  #callback,
                alg=args.alg,
                scope=args.scope,
                sdMin=np.sqrt(args.varth),
                noise=args.noise,
                act_policy=args.act_policy,
                epoch_steps=args.nb_epoch_steps,
                eval_logger=Logger(args.env,
                                   'classic_control',
                                   save_dir=save_dir_0,
                                   render=bool(args.render)),
                save_dir=save_dir_0,
                test_eps=args.test_eps,
                gpu_memory=args.gpu_memory,
                render=bool(args.render),
            )
    if args.record == 1:
        env.moviewriter.finish()
Esempio n. 8
0
def test():
    set_global_seeds(args.seed)
    import json
    if args.env == 'TargetTracking-v5':
        import simple_imtracking as simple
    else:
        import simple_tracking as simple

    learning_prop = json.load(
        open(os.path.join(args.log_dir, 'learning_prop.json'), 'r'))
    env = envs.make(
        args.env,
        render=bool(args.render),
        record=bool(args.record),
        ros=bool(args.ros),
        map_name=args.map,
        num_targets=learning_prop['nb_targets'],
        dirname=args.log_dir,
        is_training=True,
        im_size=args.im_size,
    )
    act_params = {'scope': learning_prop['scope'], 'eps': args.test_eps}
    act = simple.load(os.path.join(args.log_dir, args.log_fname), act_params)

    if args.ros_log:
        from envs.target_tracking.ros_wrapper import RosLog
        log = RosLog(num_targets=args.nb_targets,
                     wrapped_num=args.ros + args.render + args.record + 1)
    t = 0
    while (t < args.nb_test_steps):  # test episode
        t += 1
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            if args.render:
                env.render()
            if args.ros_log:
                log.log(env)
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
    if args.record:
        env.moviewriter.finish()
    if args.ros_log:
        log.save(args.log_dir)
Esempio n. 9
0
def train(seed, save_dir):
    set_global_seeds(seed)
    save_dir_0 = os.path.join(save_dir, 'seed_%d'%seed)
    os.makedirs(save_dir_0)
    env = envs.make(args.env,
                    'target_tracking',
                    render=bool(args.render),
                    record=bool(args.record),
                    directory=save_dir_0,
                    ros=bool(args.ros),
                    map_name=args.map,
                    num_targets=args.nb_targets,
                    im_size=args.im_size,
                    )
    with tf.device(args.device):
        with tf.compat.v1.variable_scope('seed_%d'%seed):
            hiddens = args.hiddens.split(':')
            hiddens = [int(h) for h in hiddens]
            if args.env == 'TargetTracking-v5':
                model = models.cnn_plus_mlp(
                                convs=[(4, 8, 4), (8, 4, 2)],
                                hiddens= hiddens,
                                dueling=bool(args.dueling),
                                init_mean = args.init_mean,
                                init_sd = args.init_sd,
                                inpt_dim = (args.im_size, args.im_size),
                )
            else:
                model = models.mlp(hiddens, init_mean=args.init_mean, init_sd=args.init_sd)
            act = deepadfq.learn(
                env,
                q_func=model,
                lr=args.learning_rate,
                lr_decay_factor=args.learning_rate_decay_factor,
                lr_growth_factor=args.learning_rate_growth_factor,
                max_timesteps=args.nb_train_steps,
                buffer_size=args.buffer_size,
                batch_size=args.batch_size,
                exploration_fraction=args.eps_fraction,
                exploration_final_eps=args.eps_min,
                target_network_update_freq=args.target_update_freq,
                checkpoint_freq=args.checkpoint_freq,
                learning_starts=args.nb_warmup_steps,
                gamma=args.gamma,
                prioritized_replay=bool(args.prioritized),
                prioritized_replay_alpha=args.prioritized_replay_alpha,
                callback=None,#callback,
                alg=args.alg,
                scope=args.scope,
                sdMin=np.sqrt(args.varth),
                noise=args.noise,
                act_policy=args.act_policy,
                epoch_steps=args.nb_epoch_steps,
                eval_logger=Logger(args.env,
                                env_type='target_tracking',
                                save_dir=save_dir_0,
                                render=bool(args.render),
                                figID=1,
                                ros=bool(args.ros),
                                map_name=args.map,
                                num_targets=args.nb_targets,
                                im_size=args.im_size,
                                eval_type=args.eval_type,
                                init_file_path=args.init_file_path,
                                ),
                save_dir=save_dir_0,
                test_eps=args.test_eps,
                gpu_memory=args.gpu_memory,
                render=(bool(args.render) or bool(args.ros)),
            )
            print("Saving model to model.pkl")
            act.save(os.path.join(save_dir_0, "model.pkl"))
    if args.record == 1:
        env.moviewriter.finish()
Esempio n. 10
0
    def test(self, args, env, act):
        seed = args.seed
        env.seed(seed)
        set_global_seeds(seed)

        if args.eval_type == 'random':
            params_set = [{}]
        elif args.eval_type == 'fixed_nb':
            if args.env == 'setTracking-v1':
                params_set = [{}]
            elif args.env == 'setTracking-v2':
                params_set = SET_EVAL_v4
            elif args.env == 'setTracking-v3':
                params_set = SET_EVAL_v3
                # params_set = SET_EVAL_8a
            elif args.env == 'setTracking-v4':
                params_set = SET_EVAL_v4
            elif args.env == 'setTracking-v5':
                params_set = SET_EVAL_v4
            elif args.env == 'maTracking-v4':
                params_set = MA_EVAL
            elif args.env == 'setTracking-v6':
                params_set = SET_EVAL_v3
            elif args.env == 'setTracking-v7':
                params_set = SET_EVAL_v3
                # params_set = SET_EVAL_8a
            else:
                raise ValueError("Eval set not created for this env.")
        elif args.eval_type == 'fixed_2':
            params_set = EVAL_BEHAVIOR_2
            tot_eplen = 60
        elif args.eval_type == 'fixed_4':
            params_set = EVAL_BEHAVIOR_4
            tot_eplen = 100

        else:
            raise ValueError("Wrong evaluation type for ttenv.")

        timelimit_env = env
        while (not hasattr(timelimit_env, '_elapsed_steps')):
            timelimit_env = timelimit_env.env

        if args.ros_log:
            from envs.target_tracking.ros_wrapper import RosLog
            ros_log = RosLog(num_targets=args.nb_targets,
                             wrapped_num=args.ros + args.render + args.record +
                             1)

        init_pose_list = get_init_pose_list(args.nb_test_steps, args.eval_type)
        total_nlogdetcov = []
        for params in params_set:
            ep = 0
            ep_nlogdetcov = []  #'Episode nLogDetCov'
            time_elapsed = ['Elapsed Time (sec)']
            test_observations = np.zeros(args.nb_test_steps)

            while (ep < args.nb_test_steps):  # test episode
                ep += 1
                episode_rew, nlogdetcov, ep_len = 0, 0, 0
                done = {}
                obs = env.reset(init_pose_list=init_pose_list, **params)

                s_time = time.time()

                all_observations = np.zeros(env.nb_targets, dtype=bool)
                action_dict = {}
                bigq0 = []
                bigq1 = []
                # while type(done) is dict:
                while ep_len < tot_eplen:
                    if args.render:
                        env.render()
                    if args.ros_log:
                        ros_log.log(env)
                    for agent_id, a_obs in obs.items():
                        action_dict[agent_id] = act(np.array(a_obs)[None])[0]
                        # record target observations
                        all_observations = np.logical_or(
                            all_observations, a_obs[:, 5].astype(bool))
                    if all(all_observations) == True:
                        test_observations[ep - 1] = 1
                    obs, rew, done, info = env.step(action_dict)
                    episode_rew += rew['__all__']
                    nlogdetcov += info['mean_nlogdetcov']

                    rearrange = [0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11]
                    qs0 = np.zeros((12))
                    qs1 = np.zeros((12))
                    q0 = np.zeros((12))
                    q1 = np.zeros((12))
                    qs0[action_dict['agent-0']] = 1
                    qs1[action_dict['agent-1']] = 1
                    for ii, val in enumerate(rearrange):
                        q0[ii] = qs0[val]
                        q1[ii] = qs1[val]

                    bigq0.append(q0)
                    bigq1.append(q1)
                    ep_len += 1
                bigq0 = np.asarray(bigq0)
                bigq1 = np.asarray(bigq1)

                time_elapsed.append(time.time() - s_time)
                ep_nlogdetcov.append(nlogdetcov)
                if args.render:
                    print(
                        "Ep.%d - Episode reward : %.2f, Episode nLogDetCov : %.2f"
                        % (ep, episode_rew, nlogdetcov))
                if ep % 50 == 0:
                    print(
                        "Ep.%d - Episode reward : %.2f, Episode nLogDetCov : %.2f"
                        % (ep, episode_rew, nlogdetcov))

            if args.record:
                env.moviewriter.finish()
            if args.ros_log:
                ros_log.save(args.log_dir)

            # Stats
            # meanofeps = np.mean(ep_nlogdetcov)
            # total_nlogdetcov.append(meanofeps)
            # # Eval plots and saves
            # if args.env == 'setTracking-v7':
            #     eval_dir = os.path.join(os.path.split(args.log_dir)[0], 'v7_eval_seed%d_'%(seed)+args.map)
            # else:
            #     eval_dir = os.path.join(os.path.split(args.log_dir)[0], 'eval_seed%d_'%(seed)+args.map)
            # model_seed = os.path.split(args.log_dir)[-1]
            # # eval_dir = os.path.join(args.log_dir, 'eval_seed%d_'%(seed)+args.map)
            # # model_seed = os.path.split(args.log_fname)[0]
            # if not os.path.exists(eval_dir):
            #     os.makedirs(eval_dir)
            # # matplotlib.use('Agg')
            # f0, ax0 = plt.subplots()
            # _ = ax0.plot(ep_nlogdetcov, '.')
            # _ = ax0.set_title(args.env)
            # _ = ax0.set_xlabel('episode number')
            # _ = ax0.set_ylabel('mean nlogdetcov')
            # _ = ax0.axhline(y=meanofeps, color='r', linestyle='-', label='mean over episodes: %.2f'%(meanofeps))
            # _ = ax0.legend()
            # _ = ax0.grid()
            # _ = f0.savefig(os.path.join(eval_dir, "%da%dt_%d_eval_"%(env.nb_agents, env.nb_targets, args.nb_test_steps)
            #                                         +model_seed+".png"))
            # plt.close()
            # pickle.dump(ep_nlogdetcov, open(os.path.join(eval_dir,"%da%dt_%d_eval_"%(env.nb_agents, env.nb_targets, args.nb_test_steps))
            #                                                         +model_seed+".pkl", 'wb'))

            f2 = plt.figure()
            ax2 = f2.add_subplot(121, projection='3d')
            ax3 = f2.add_subplot(122, projection='3d')

            lx = len(bigq0[0])
            ly = len(bigq0[:, 0])
            xpos = np.arange(0, lx, 1)
            ypos = np.arange(0, ly, 1)
            xpos, ypos = np.meshgrid(xpos + 0.25, ypos + 0.25)

            xpos = xpos.flatten()
            ypos = ypos.flatten()
            zpos = np.zeros(lx * ly)

            dx = 0.5 * np.ones_like(zpos)
            dy = dx.copy()
            dz0 = bigq0.flatten()
            dz1 = bigq1.flatten()

            cs = ['r', 'r', 'r', 'r', 'g', 'g', 'g', 'g', 'b', 'b', 'b', 'b'
                  ] * ly

            ax2.bar3d(xpos, ypos, zpos, dx, dy, dz0, color=cs)
            ax3.bar3d(xpos, ypos, zpos, dx, dy, dz1, color=cs)

            print(test_observations)
            print("Cooperation ratio over total evals: %.2f" %
                  (np.sum(test_observations) / args.nb_test_steps))
            plt.show()
Esempio n. 11
0
def train():
    set_global_seeds(args.seed)
    directory = os.path.join(
        args.log_dir,
        '_'.join([args.env,
                  datetime.datetime.now().strftime("%m%d%H%M")]))
    if not os.path.exists(directory):
        os.makedirs(directory)
    else:
        ValueError("The directory already exists...", directory)
    json.dump(vars(args),
              open(os.path.join(directory, 'learning_prop.json'), 'w'))

    env = envs.make(
        args.env,
        render=bool(args.render),
        record=bool(args.record),
        ros=bool(args.ros),
        dirname=directory,
        map_name=args.map,
        num_targets=args.nb_targets,
        im_size=args.im_size,
    )
    hiddens = args.hiddens.split(':')
    hiddens = [int(h) for h in hiddens]
    with tf.device(args.device):
        if args.env == 'TargetTracking-v5':
            import simple_imtracking as simple
            model = models.cnn_plus_mlp(
                convs=[(8, 4, 2), (16, 3, 1)],
                hiddens=hiddens,
                dueling=bool(args.dueling),
                init_mean=args.init_mean,
                init_sd=args.init_sd,
            )
        else:
            import simple_tracking as simple
            model = models.mlp(hiddens,
                               init_mean=args.init_mean,
                               init_sd=args.init_sd)

        act, records = simple.learn(
            env,
            q_func=model,
            lr=args.learning_rate,
            lr_decay_factor=args.learning_rate_decay_factor,
            lr_growth_factor=args.learning_rate_growth_factor,
            max_timesteps=args.nb_train_steps,
            buffer_size=args.buffer_size,
            batch_size=args.batch_size,
            exploration_fraction=args.eps_fraction,
            exploration_final_eps=args.eps_min,
            target_network_update_freq=args.target_update_freq,
            print_freq=10,
            checkpoint_freq=int(args.nb_train_steps / 10),
            learning_starts=args.nb_warmup_steps,
            gamma=args.gamma,
            prioritized_replay=bool(args.prioritized),
            prioritized_replay_alpha=args.prioritized_replay_alpha,
            callback=None,  #callback,
            epoch_steps=args.nb_epoch_steps,
            noise=args.noise,
            varTH=args.varth,
            alg=args.alg,
            gpu_memory=args.gpu_memory,
            act_policy=args.act_policy,
            save_dir=directory,
            nb_test_steps=args.nb_test_steps,
            scope=args.scope,
            test_eps=args.test_eps,
            render=(bool(args.render) or bool(args.ros)),
            map_name=args.map,
            num_targets=args.nb_targets,
            im_size=args.im_size,
        )
        print("Saving model to model.pkl")
        act.save(os.path.join(directory, "model.pkl"))
        plot(records, directory)
    memo = input("Memo for this experiment?: ")
    f = open(os.path.join(directory, "memo.txt"), 'w')
    f.write(memo)
    f.close()
    if args.record == 1:
        env.moviewriter.finish()
Esempio n. 12
0
def train(seed, save_dir):
    set_global_seeds(seed)
    save_dir_0 = os.path.join(save_dir, 'seed_%d'%seed)
    os.makedirs(save_dir_0)

    env = envs.make(args.env,
                    'target_tracking',
                    render=bool(args.render),
                    record=bool(args.record),
                    directory=save_dir_0,
                    ros=bool(args.ros),
                    map_name=args.map,
                    num_targets=args.nb_targets,
                    im_size=args.im_size,
                    )

    with tf.device(args.device):
        with tf.compat.v1.variable_scope('seed_%d'%seed):
            hiddens = args.hiddens.split(':')
            hiddens = [int(h) for h in hiddens]
            model = deepq.models.mlp(hiddens)
            act = deepq.learn(
                env,
                q_func=model,
                lr=args.learning_rate,
                lr_decay_factor=args.learning_rate_decay_factor,
                lr_growth_factor=args.learning_rate_growth_factor,
                max_timesteps=args.nb_train_steps,
                buffer_size=args.buffer_size,
                batch_size=args.batch_size,
                exploration_fraction=args.eps_fraction,
                exploration_final_eps=args.eps_min,
                target_network_update_freq=args.target_update_freq,
                print_freq=10,
                checkpoint_freq=int(args.nb_train_steps/10),
                learning_starts=args.nb_warmup_steps,
                gamma = args.gamma,
                prioritized_replay=bool(args.prioritized),
                prioritized_replay_alpha=args.prioritized_replay_alpha,
                callback=None,#callback,
                double_q = args.double_q,
                scope=args.scope,
                epoch_steps = args.nb_epoch_steps,
                eval_logger=Logger(args.env,
                                env_type='target_tracking',
                                save_dir=save_dir_0,
                                render=bool(args.render),
                                figID=1,
                                ros=bool(args.ros),
                                map_name=args.map,
                                num_targets=args.nb_targets,
                                eval_type=args.eval_type,
                                init_file_path=args.init_file_path,
                                ),
                save_dir=save_dir_0,
                test_eps = args.test_eps,
                gpu_memory=args.gpu_memory,
                render = (bool(args.render) or bool(args.ros)),
            )
            print("Saving model to model.pkl")
            act.save(os.path.join(save_dir_0,"model.pkl"))
    if args.record == 1:
        env.moviewriter.finish()
Esempio n. 13
0
    def test(self, args, env, act):
        seed = args.seed
        env.seed(seed)
        set_global_seeds(seed)

        if args.eval_type == 'random':
            params_set = [{}]
        elif args.eval_type == 'fixed_nb':
            if args.env == 'setTracking-v1':
                params_set = [{}]
            elif args.env == 'setTracking-v2':
                params_set = SET_EVAL_v4
            elif args.env == 'setTracking-v3':
                params_set = SET_EVAL_v3
                # params_set = SET_EVAL_8a
            elif args.env == 'setTracking-v4':
                params_set = SET_EVAL_v4
            elif args.env == 'setTracking-v5':
                params_set = SET_EVAL_v4
            elif args.env == 'maTracking-v4':
                params_set = MA_EVAL
            elif args.env == 'setTracking-v6':
                params_set = SET_EVAL_v3
            elif args.env == 'setTracking-v7':
                params_set = SET_EVAL_v3
                # params_set = SET_EVAL_8a
            else:
                raise ValueError("Eval set not created for this env.")
        else:
            raise ValueError("Wrong evaluation type for ttenv.")

        timelimit_env = env
        while (not hasattr(timelimit_env, '_elapsed_steps')):
            timelimit_env = timelimit_env.env

        if args.ros_log:
            from envs.target_tracking.ros_wrapper import RosLog
            ros_log = RosLog(num_targets=args.nb_targets,
                             wrapped_num=args.ros + args.render + args.record +
                             1)

        total_nlogdetcov = []
        for params in params_set:
            ep = 0
            ep_nlogdetcov = []  #'Episode nLogDetCov'
            time_elapsed = ['Elapsed Time (sec)']

            while (ep < args.nb_test_steps):  # test episode
                ep += 1
                episode_rew, nlogdetcov = 0, 0
                done = {}
                obs = env.reset(**params)

                s_time = time.time()

                action_dict = {}
                while type(done) is dict:
                    if args.render:
                        env.render()
                    if args.ros_log:
                        ros_log.log(env)
                    for agent_id, a_obs in obs.items():
                        action_dict[agent_id] = act(np.array(a_obs)[None])[0]
                    obs, rew, done, info = env.step(action_dict)
                    episode_rew += rew['__all__']
                    nlogdetcov += info['mean_nlogdetcov']

                time_elapsed.append(time.time() - s_time)
                ep_nlogdetcov.append(nlogdetcov)
                if args.render:
                    print(
                        "Ep.%d - Episode reward : %.2f, Episode nLogDetCov : %.2f"
                        % (ep, episode_rew, nlogdetcov))
                if ep % 50 == 0:
                    print(
                        "Ep.%d - Episode reward : %.2f, Episode nLogDetCov : %.2f"
                        % (ep, episode_rew, nlogdetcov))

            if args.record:
                env.moviewriter.finish()
            if args.ros_log:
                ros_log.save(args.log_dir)

            # Stats
            meanofeps = np.mean(ep_nlogdetcov)
            total_nlogdetcov.append(meanofeps)
            # Eval plots and saves
            if args.env == 'setTracking-v7':
                eval_dir = os.path.join(
                    os.path.split(args.log_dir)[0],
                    'v7_eval_seed%d_' % (seed) + args.map)
            else:
                eval_dir = os.path.join(
                    os.path.split(args.log_dir)[0],
                    'eval_seed%d_' % (seed) + args.map)
            model_seed = os.path.split(args.log_dir)[-1]
            # eval_dir = os.path.join(args.log_dir, 'eval_seed%d_'%(seed)+args.map)
            # model_seed = os.path.split(args.log_fname)[0]
            if not os.path.exists(eval_dir):
                os.makedirs(eval_dir)
            matplotlib.use('Agg')
            f0, ax0 = plt.subplots()
            _ = ax0.plot(ep_nlogdetcov, '.')
            _ = ax0.set_title(args.env)
            _ = ax0.set_xlabel('episode number')
            _ = ax0.set_ylabel('mean nlogdetcov')
            _ = ax0.axhline(y=meanofeps,
                            color='r',
                            linestyle='-',
                            label='mean over episodes: %.2f' % (meanofeps))
            _ = ax0.legend()
            _ = ax0.grid()
            _ = f0.savefig(
                os.path.join(
                    eval_dir, "%da%dt_%d_eval_" %
                    (env.nb_agents, env.nb_targets, args.nb_test_steps) +
                    model_seed + ".png"))
            plt.close()
            pickle.dump(
                ep_nlogdetcov,
                open(
                    os.path.join(
                        eval_dir, "%da%dt_%d_eval_" %
                        (env.nb_agents, env.nb_targets, args.nb_test_steps)) +
                    model_seed + ".pkl", 'wb'))
        #Plot over all example episode sets
        f1, ax1 = plt.subplots()
        _ = ax1.plot(total_nlogdetcov, '.')
        _ = ax1.set_title(args.env)
        _ = ax1.set_xlabel('example episode set number')
        _ = ax1.set_ylabel('mean nlogdetcov over episodes')
        _ = ax1.grid()
        _ = f1.savefig(
            os.path.join(
                eval_dir,
                'all_%d_eval' % (args.nb_test_steps) + model_seed + '.png'))
        plt.close()
        pickle.dump(
            total_nlogdetcov,
            open(
                os.path.join(eval_dir, 'all_%d_eval' %
                             (args.nb_test_steps)) + model_seed + '%da%dt' %
                (args.nb_agents, args.nb_targets) + '.pkl', 'wb'))