Ejemplo n.º 1
0
    target_model = Model()

    memory = Memory() # メモリ
    agent  = Agent()  # エージェント

    learned_flag = False # 学習が終了したか否か

    for epoch in range(N_EPOCHS):
        if learned_flag:
            break

        print('Epoch: {}'.format(epoch + 1))

        # 初期状態設定
        env.reset()
        state, reward, finished = env.random_step() 
        target_model.model.set_weights(main_model.model.get_weights())

        while not finished:
            action = agent.get_action(state, epoch, main_model)
            learned_flag = env.is_learned()
            next_state, reward, finished = env.step(action.argmax())

            memory.add((state, action, reward, next_state))

            state = next_state

            if len(memory.buffer) > S_BATCH:
                main_model.replay(memory, S_BATCH, GAMMA, target_model)

            target_model.model.set_weights(main_model.model.get_weights())