params.update({"n_actions": test_env.action_space.n}) print(f"Number of actions: {params['n_actions']}") if params["do_intro_env"]: intro_env() env = make_atari(params["env_name"], episodic_life=False) agent = SAC(**params) logger = Logger(agent, **params) if params["do_train"]: if not params["train_from_scratch"]: episode = logger.load_weights() agent.hard_update_target_network() agent.alpha = agent.log_alpha.exp() min_episode = episode print("Keep training from previous run.") else: min_episode = 0 print("Train from scratch.") stacked_states = np.zeros(shape=params["state_shape"], dtype=np.uint8) state = env.reset() stacked_states = stack_states(stacked_states, state, True) episode_reward = 0 alpha_loss, q_loss, policy_loss = 0, 0, 0 episode = min_episode + 1
if __name__ == '__main__': #参数初始化 params = get_params() test_env = make_atari(params["env_name"]) params.update({"n_actions": test_env.action_space.n}) print(f"Environment: {params['env_name']}\n" f"Number of actions:{params['n_actions']}") # 创建训练环境 env = make_atari(params["env_name"]) env.seed(int(time.time())) agent = Agent(**params) logger = Logger(agent, **params) # 使用预训练模型 if not params["train_from_scratch"]: chekpoint = logger.load_weights() agent.online_model.load_state_dict(chekpoint["online_model_state_dict"]) agent.hard_update_of_target_network() params.update({"beta": chekpoint["beta"]}) min_episode = chekpoint["episode"] print("Keep training from previous run.") # 从头开始训练模型 else: min_episode = 0 print("Train from scratch.") #执行训练 if params["do_train"]: stacked_states = np.zeros(shape=params["state_shape"], dtype=np.uint8) state = env.reset() stacked_states = stack_states(stacked_states, state, True)