Example #1
0
def main():
    # 初始化游戏
    env = flappyBird.GameState()
    # 图像输入形状和动作维度
    obs_dim = resize_shape[0]
    action_dim = env.action_dim

    # 创建模型
    model = Model(obs_dim, action_dim)
    model.load_dict(paddle.load(save_model_path))
    model.eval()

    # 开始游戏
    obs = env.reset()
    episode_reward = 0
    done = False
    # 游戏未结束执行一直执行游戏
    while not done:
        obs = preprocess(obs)
        obs = np.expand_dims(obs, axis=0)
        obs = paddle.to_tensor(obs, dtype='float32')
        action = model(obs)
        action = paddle.argmax(action).numpy()[0]
        obs, reward, done, info = env.step(action, is_train=False)
        episode_reward += reward
    print("最终得分为:{:.2f}".format(episode_reward))
Example #2
0
def main():
    # 初始化游戏
    env = flappyBird.GameState()
    # 开始游戏
    obs = env.reset()

    # 游戏未结束执行一直执行游戏
    while True:
        # 游戏生成的随机动作,int类型数值
        action = env.action_space()
        # 执行游戏
        obs, reward, done, info = env.step(action, is_train=False)
        print("=" * 50)
        print("action:", action)
        print("obs shape:", obs.shape)
        print("reward:", reward)
        print("terminal:", done)
        print("info:", info)
        if done:
            obs = env.reset()
Example #3
0
from replay_memory import ReplayMemory

# 定义训练的参数
batch_size = 256  # batch大小
num_episodes = 10000  # 训练次数
memory_size = 20000  # 内存记忆
n_step = 3 # 往后的步数
learning_rate = 1e-4  # 学习率大小
e_greed = 0.1  # 探索初始概率
gamma = 0.99  # 奖励系数
e_greed_decrement = 1e-6  # 在训练过程中,降低探索的概率
update_num = 0  # 用于计算目标模型更新次数
resize_shape = (1, 36, 52)  # 训练缩放的大小,减少模型计算,原大小(288, 512)
save_model_path = "models/model.pdparams"  # 保存模型路径

env = flappyBird.GameState()
obs_dim = resize_shape[0]
action_dim = env.action_dim

policyQ = Model(obs_dim, action_dim)
targetQ = Model(obs_dim, action_dim)
targetQ.eval()

n_step_buffer = []
rpm = ReplayMemory(memory_size)
optimizer = paddle.optimizer.Adam(parameters=policyQ.parameters(),
                                  learning_rate=learning_rate)


def preprocess(observation):
    observation = observation[:observation.shape[0] - 100, :]