print('lr:', lr) print('df:', df) print('nhl:', nhl) print('ef:', ef) # Q-Network, set parameters QN = DQNAgent(state_size, action_size, discount_factor=df, learning_rate=lr, expl_decay=ef, nhl=nhl, sl_f=1) batch_size = 16 loss_overall = [] error_avg = [] error = [] states = sim_env.state for e in range(episodes): sim_env.reset() for k in range(training): states = np.reshape(states, [1, state_size]) action = QN.act(states) next_state, rewards, overall_err = sim_env.Assign_Cores(action) next_state = np.reshape(next_state, [1, state_size]) QN.remember(states, action, rewards, next_state) states = next_state if len(QN.memory) > batch_size: QN.replay(batch_size) loss_overall = np.append(loss_overall, QN.loss_avg/training) QN.loss_avg = 0 sim_env.reset() for u in range(testing): states = np.reshape(states, [1, state_size]) action = QN.act_test(states) # print('SNR:', sim_env.SNR[-1])
env = wrap_dqn(gym.make('PongNoFrameskip-v4')) agent = DQNAgent(env=env, num_actions=NUM_ACTIONS, lr=LR, discount=GAMMA) # Load model # agent.load_model(weights_file="snaps/model") # Train agent agent.train(TRAIN_STEPS, weights_file="snaps/model") # Evaluate success = 0 for tr in range(TRIALS): state = env.reset() t = 0 acc_r = 0 while True: env.render() action = agent.act(state) state, reward, done, _ = env.step(action) acc_r += reward t += 1 if done: print("Trial {} finished after {} timesteps".format(tr, t)) if acc_r > 0: success += 1 break print("Success: %d/%d" % (success, TRIALS)) env.close()
br = tetris.detect_figure(colors) num += 1 dont_burn_my_cpu.tick(maxfps) if br == None: app.gameover = True if br != None: if episodes == EPISODES: app.gameover = True br = None continue action = agent.act(numpy.reshape(tetris.state, [20, 10]), tetris, dict, br) state = tetris.generate_state_based_on_action_and_figure( dict, action, br) reward1 = app.score for i in range(action[2]): app.rotate_stone() if action[1] == 0: app.move(-1 * action[0]) elif action[1] == 1: app.move(1 * action[0]) app.insta_drop()