Ejemplo n.º 1
0
    def load(path, num_cpu=16):
        with open(path, "rb") as f:
            model_data, act_params = dill.load(f)
        act = distdeepq.build_act(**act_params)
        sess = U.make_session(num_cpu=num_cpu)
        sess.__enter__()
        with tempfile.TemporaryDirectory() as td:
            arc_path = os.path.join(td, "packed.zip")
            with open(arc_path, "wb") as f:
                f.write(model_data)

            zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td)
            U.load_state(os.path.join(td, "model"))

        return ActWrapper(act, act_params)
Ejemplo n.º 2
0
def main():
    set_global_seeds(1)
    args = parse_args()

    with U.make_session(4) as sess:  # noqa
        _, env = make_env(args.env)
        model_parent_path = distdeepq.parent_path(args.model_dir)
        old_args = json.load(open(model_parent_path + '/args.json'))

        act = distdeepq.build_act(make_obs_ph=lambda name: U.Uint8Input(
            env.observation_space.shape, name=name),
                                  p_dist_func=distdeepq.models.atari_model(),
                                  num_actions=env.action_space.n,
                                  dist_params={
                                      'Vmin': old_args['vmin'],
                                      'Vmax': old_args['vmax'],
                                      'nb_atoms': old_args['nb_atoms']
                                  })
        U.load_state(os.path.join(args.model_dir, "saved"))
        wang2015_eval(args.env, act, stochastic=args.stochastic)
Ejemplo n.º 3
0
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            num_episodes = len(info["rewards"])


if __name__ == '__main__':
    with U.make_session(4) as sess:
        args = parse_args()
        env = make_env(args.env)

        model_parent_path = distdeepq.parent_path(args.model_dir)
        old_args = json.load(open(model_parent_path + '/args.json'))

        act = distdeepq.build_act(make_obs_ph=lambda name: U.Uint8Input(
            env.observation_space.shape, name=name),
                                  p_dist_func=distdeepq.models.atari_model(),
                                  num_actions=env.action_space.n,
                                  dist_params={
                                      'Vmin': old_args['vmin'],
                                      'Vmax': old_args['vmax'],
                                      'nb_atoms': old_args['nb_atoms']
                                  })
        U.load_state(os.path.join(args.model_dir, "saved"))
        play(env, act, args.stochastic, args.video)
Ejemplo n.º 4
0
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            num_episodes = len(info["rewards"])


if __name__ == '__main__':
    with U.make_session(4) as sess:
        args = parse_args()
        env, _ = distdeepq.make_env(args.env)

        model_parent_path = distdeepq.parent_path(args.model_dir)
        old_args = json.load(open(model_parent_path + '/args.json'))

        dist_params = {
            'Vmin': old_args['vmin'],
            'Vmax': old_args['vmax'],
            'nb_atoms': old_args['nb_atoms']
        }
        act = distdeepq.build_act(make_obs_ph=lambda name: U.Uint8Input(
            env.observation_space.shape, name=name),
                                  p_dist_func=distdeepq.models.atari_model(),
                                  num_actions=env.action_space.n,
                                  dist_params=dist_params)
        U.load_state(os.path.join(args.model_dir, "saved"))
        play(env, act, args.stochastic, args.video)