print('lr:', lr)
        print('df:', df)
        print('nhl:', nhl)
        print('ef:', ef)
        # Q-Network, set parameters
        QN = DQNAgent(state_size, action_size, discount_factor=df, learning_rate=lr, expl_decay=ef, nhl=nhl, sl_f=1)
        batch_size = 16
        loss_overall = []
        error_avg = []
        error = []
        states = sim_env.state
        for e in range(episodes):
            sim_env.reset()
            for k in range(training):
                states = np.reshape(states, [1, state_size])
                action = QN.act(states)
                next_state, rewards, overall_err = sim_env.Assign_Cores(action)
                next_state = np.reshape(next_state, [1, state_size])
                QN.remember(states, action, rewards, next_state)
                states = next_state
                if len(QN.memory) > batch_size:
                    QN.replay(batch_size)

            loss_overall = np.append(loss_overall, QN.loss_avg/training)
            QN.loss_avg = 0

            sim_env.reset()
            for u in range(testing):
                states = np.reshape(states, [1, state_size])
                action = QN.act_test(states)
                # print('SNR:', sim_env.SNR[-1])
예제 #2
0
env = wrap_dqn(gym.make('PongNoFrameskip-v4'))

agent = DQNAgent(env=env, num_actions=NUM_ACTIONS, lr=LR, discount=GAMMA)

# Load model
# agent.load_model(weights_file="snaps/model")

# Train agent
agent.train(TRAIN_STEPS, weights_file="snaps/model")

# Evaluate
success = 0
for tr in range(TRIALS):
    state = env.reset()
    t = 0
    acc_r = 0
    while True:
        env.render()
        action = agent.act(state)
        state, reward, done, _ = env.step(action)
        acc_r += reward
        t += 1
        if done:
            print("Trial {} finished after {} timesteps".format(tr, t))
            if acc_r > 0:
                success += 1
            break
print("Success: %d/%d" % (success, TRIALS))

env.close()
예제 #3
0
        br = tetris.detect_figure(colors)

        num += 1
        dont_burn_my_cpu.tick(maxfps)

        if br == None:
            app.gameover = True

        if br != None:

            if episodes == EPISODES:
                app.gameover = True
                br = None
                continue

            action = agent.act(numpy.reshape(tetris.state, [20, 10]), tetris,
                               dict, br)

            state = tetris.generate_state_based_on_action_and_figure(
                dict, action, br)

            reward1 = app.score

            for i in range(action[2]):
                app.rotate_stone()

            if action[1] == 0:
                app.move(-1 * action[0])
            elif action[1] == 1:
                app.move(1 * action[0])

            app.insta_drop()