def testAgent(RL, test, frames=2000): test_env = tetrisML.TetrisGame("Testing " + test[0], test[1], test[2], test[3], log=True) total_steps = 0 observation = test_env.reset() while True: actions_value = RL.get_reading(observation, kp=1)[0] action = np.argmax(actions_value) if action == 40 and not test_env.canUseHold: # Prevent it from using hold when not available action = np.argsort(actions_value)[-2] observation_, reward = test_env.nextFrame(action) if total_steps > frames: # stop game break observation = observation_ total_steps += 1 return np.average(test_env.scores), np.average( test_env.gamelengths), np.average(test_env.scoreChanges), np.average( test_env.heuristicChanges)
def testRandom(test, frames=2000): test_env = tetrisML.TetrisGame("Testing " + test[0], test[1], test[2], test[3], log=True) total_steps = 0 observation = test_env.reset() while True: action = np.random.randint(0, 41) observation_, reward = test_env.nextFrame(action) if total_steps > frames: # stop game break observation = observation_ total_steps += 1 return np.average(test_env.scores), np.average( test_env.gamelengths), np.average(test_env.scoreChanges), np.average( test_env.heuristicChanges)
total_steps += 1 return np.average(test_env.scores), np.average( test_env.gamelengths), np.average(test_env.scoreChanges), np.average( test_env.heuristicChanges) tests = [["Controle", True, True, 6], ["Onzichtbaar veld", False, True, 6], ["Onzichtbare hold", True, False, 6], ["Een next piece zichtbaar", True, True, 1], ["Geen next piece zichtbaar", True, True, 0]] folder = path.dirname(__file__) for test in tests: env = tetrisML.TetrisGame("Training " + test[0], test[1], test[2], test[3]) MEMORY_SIZE = 100000 ACTION_SPACE = env.num_actions FEATURES = env.num_features FEATURESHAPE = env.featureShape STATESHAPE = env.stateShape sess = tf.Session() with tf.variable_scope('Double_DQN'): DQN = DeepQNetwork(n_actions=ACTION_SPACE, n_features=FEATURES, memory_size=MEMORY_SIZE, e_greedy_increment=0.0000045, e_greedy=0.9,