Ejemplo n.º 1
0
batch_size = args.BatchSize

Model_Name = "Acrobot-dqn.h5"
agent = AI(action_size, input_shape, batch_size)
if "Acrobot-dqn.h5" in os.listdir():
    agent.load(Model_Name)

Epochs = args.Epochs
temp = []
for e in range(Epochs):
    state = env.reset()
    state = np.reshape(state, [1,input_shape[0],input_shape[1]])
    logger.info("Creating Observation ")
    for state_count in range(1,1000):
        env.render()
        logger.info("Sate no {}".format(state_count))
        action = agent.act(state)
        next_state, reward, done, info = env.step(action)
        next_state = np.reshape(next_state, [1,input_shape[0],input_shape[1]])
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        if done:
            break
        if state_count % batch_size == 0:
            agent.replay()
        if state_count % 100 == 0:
            logging.info("Saving Model")
            agent.save(Model_Name)

a = np.amax(agent.getModel().predict(state))