params.update({"n_actions": test_env.action_space.n})

    print(f"Number of actions: {params['n_actions']}")

    if params["do_intro_env"]:
        intro_env()

    env = make_atari(params["env_name"], episodic_life=False)

    agent = SAC(**params)
    logger = Logger(agent, **params)

    if params["do_train"]:

        if not params["train_from_scratch"]:
            episode = logger.load_weights()
            agent.hard_update_target_network()
            agent.alpha = agent.log_alpha.exp()
            min_episode = episode
            print("Keep training from previous run.")

        else:
            min_episode = 0
            print("Train from scratch.")

        stacked_states = np.zeros(shape=params["state_shape"], dtype=np.uint8)
        state = env.reset()
        stacked_states = stack_states(stacked_states, state, True)
        episode_reward = 0
        alpha_loss, q_loss, policy_loss = 0, 0, 0
        episode = min_episode + 1
예제 #2
0
if __name__ == '__main__':
    #参数初始化
    params = get_params()
    test_env = make_atari(params["env_name"])
    params.update({"n_actions": test_env.action_space.n})
    print(f"Environment: {params['env_name']}\n"
          f"Number of actions:{params['n_actions']}")
    # 创建训练环境
    env = make_atari(params["env_name"])
    env.seed(int(time.time()))

    agent = Agent(**params)
    logger = Logger(agent, **params)
    # 使用预训练模型
    if not params["train_from_scratch"]:
        chekpoint = logger.load_weights()
        agent.online_model.load_state_dict(chekpoint["online_model_state_dict"])
        agent.hard_update_of_target_network()
        params.update({"beta": chekpoint["beta"]})
        min_episode = chekpoint["episode"]
        print("Keep training from previous run.")
    # 从头开始训练模型
    else:
        min_episode = 0
        print("Train from scratch.")

    #执行训练
    if params["do_train"]:
        stacked_states = np.zeros(shape=params["state_shape"], dtype=np.uint8)
        state = env.reset()
        stacked_states = stack_states(stacked_states, state, True)