def __init__(self): self.App = tetris.TetrisApp() self.App.run() self.r = {} self.rows = self.App.get_rows() self.cols = self.App.get_cols() for i in range(2 * self.cols + 3): self.r["r" + str(i)] = 1 self.alpha = 0.9 self.N = 100 self.M = 10 self.samples = [] self.number_stones = self.App.get_number_stones()
def fight(wt_a, wt_b): wins_a = 0 wins_b = 0 for _ in range(number_of_rounds): try: app = tetris.TetrisApp(wt_a, True) app.run() score_a = app.score except ValueError: print(app.board) print(app.block) try: app = tetris.TetrisApp(wt_b, True) app.run() score_b = app.score except ValueError: print(app.board) print(app.block) if score_a > score_b: wins_a += 1 print("Player A wins with score of {}".format(score_a)) else: wins_b += 1 print("Player B wins with score of {}".format(score_b)) if wins_a > wins_b: winner = "A" else: winner = "B" print("Player {} advances".format(winner)) print("************************") return wt_a if winner == "A" else wt_b
def train_neural_network(x): prediction = neural_network_model(x) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)) optimizer = tf.train.AdamOptimizer().minimize(cost) hm_epochs = 40 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(hm_epochs): epoch_loss = 0 batch_index = 0 # for _ in range(int(mnist.train.num_examples / batch_size)): # epoch_x, epoch_y = mnist.train.next_batch(batch_size) for _ in range(int(len(states) / batch_size)): epoch_x = states[batch_index:batch_index + batch_size] # print(epoch_x) epoch_y = actions[batch_index:batch_index + batch_size] batch_index += batch_size _, c = sess.run([optimizer, cost], feed_dict={ x: epoch_x, y: epoch_y }) epoch_loss += c print('Epoch', epoch, 'completed out of', hm_epochs, 'loss:', epoch_loss) correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct, 'float')) # print('Accuracy:', accuracy.eval({x: mnist.test.images, y: mnist.test.labels})) print('Accuracy:', accuracy.eval({x: states, y: actions})) game = tetris.TetrisApp() game.init_game() while 1: state = game.readboard(game.prep_current_board()) action = prediction.eval(session=sess, feed_dict={x: [state]}) print(action) max_index = np.argmax(action) action = [0, 0, 0, 0, 0] action[max_index] = 1 # action[max_index] = 0 # max_index = np.argmax(action) # action = [0, 0, 0, 0, 0] # action[max_index] = 1 # print(action) _, __, ___ = game.step_act(action)
def simulate_for_results(wt_arr): try: #run the game and get back the result app = tetris.TetrisApp(wt_arr, True) app.run() print(app.score) except ValueError: print("Error") print(app.board) print(app.block.value) print(app.block.x) print(app.block.y) #appends to file with open("output.txt", "a") as f: f.write("{},{}\n".format(app.score, ",".join(wt_arr.astype(str)))) return app.score
import tetris import neuralnetwork as NN import losses import numpy as np em = tetris.TetrisApp(10, 20, 750, True, 40, 30 * 10) net = NN.DQN(em.get_state_size(), 1, losses.MSE_loss) em.pcrun() em.reset() done = False gene = np.loadtxt("evolution\\generation16.csv", delimiter=',') index = 0 net.L1.W = gene[index:index + net.L1.W.size].reshape(net.L1.W.shape) index += net.L1.W.size net.L1.B = gene[index:index + net.L1.B.size].reshape(net.L1.B.shape) index += net.L1.B.size net.L2.W = gene[index:index + net.L2.W.size].reshape(net.L2.W.shape) index += net.L2.W.size net.L2.B = gene[index:index + net.L2.B.size].reshape(net.L2.B.shape) index += net.L2.B.size net.L3.W = gene[index:index + net.L3.W.size].reshape(net.L3.W.shape) index += net.L3.W.size net.L3.B = gene[index:index + net.L3.B.size].reshape(net.L3.B.shape) while not done: next_state = em.get_next_states() predicted_qs = {} for i, (*data, ) in enumerate(next_state):
import tetris import neuralnetwork as NN import losses import numpy as np import matplotlib.pyplot as plt N = 655 N2 = 3 score = np.zeros((N, N2)) for i in range(N): em = tetris.TetrisApp(8, 16, 0.01*750, False, 40, 30*100) net = NN.DQN(em.get_state_size(), 1, losses.MSE_loss) gene = np.loadtxt("data\\evolutionNNstate168\\generation" + str(i) + ".csv", delimiter=',') index = 0 net.L1.W = gene[index:index + net.L1.W.size].reshape(net.L1.W.shape) index += net.L1.W.size net.L1.B = gene[index:index + net.L1.B.size].reshape(net.L1.B.shape) index += net.L1.B.size net.L2.W = gene[index:index + net.L2.W.size].reshape(net.L2.W.shape) index += net.L2.W.size net.L2.B = gene[index:index + net.L2.B.size].reshape(net.L2.B.shape) index += net.L2.B.size net.L3.W = gene[index:index + net.L3.W.size].reshape(net.L3.W.shape) index += net.L3.W.size net.L3.B = gene[index:index + net.L3.B.size].reshape(net.L3.B.shape) for j in range(N2): em.pcrun() em.reset()
eps_end = 0.0 eps_decay = 0.002 memory_size = 20000 lr = 0.001 * 0.001 num_episodes = 3000 filename = "TEST_" + str(lr) def moving_average(a, n=30): ret = np.cumsum(a, dtype=float) ret[n:] = ret[n:] - ret[:-n] return ret[n - 1:] / n em = tetris.TetrisApp(10, 20, 750, False, 40, 30 * 100) em.pcrun() policy_net = nn.DQNsimple(em.get_state_size(), 1, losses.MSE_loss) memory = nn.ReplayMemory(memory_size) strategy = nn.EpsilonGreedyStrategy(eps_start, eps_end, eps_decay) # fig = plt.figure() # thismanager = plt.get_current_fig_manager() # thismanager.window.wm_geometry("+500+0") # plt.ion() score = np.zeros(num_episodes) * np.nan lossess = np.zeros(num_episodes) current_step = -1 for episode in range(num_episodes): current_step += 1
def cal_pop_fitness(pop, pieceLimit, seed): fitness = [] for indv in range(len(pop)): fitness.append(tetris.TetrisApp(False, seed).run(pop[indv], pieceLimit)) return fitness
def run(N=6, num_generations=10000): em = tetris.TetrisApp(10, 10, 750, False, 40, 30 * 100) em.pcrun() net = NN.DQN(em.get_state_size(), 1, losses.MSE_loss) dimension = net.L1.W.size + net.L1.B.size + net.L2.W.size + net.L2.B.size + net.L3.W.size + net.L3.B.size size_population = 4 * N pop_size = (size_population, dimension) new_population = np.random.rand(size_population, dimension) fitness = np.ndarray(size_population) generations = np.linspace(1, num_generations, num_generations) maxscore = np.zeros(num_generations) for generation in range(num_generations): ## compute the fitness of each individual for it, row in enumerate(new_population): index = 0 net.L1.W = row[index:index + net.L1.W.size].reshape(net.L1.W.shape) index += net.L1.W.size net.L1.B = row[index:index + net.L1.B.size].reshape(net.L1.B.shape) index += net.L1.B.size net.L2.W = row[index:index + net.L2.W.size].reshape(net.L2.W.shape) index += net.L2.W.size net.L2.B = row[index:index + net.L2.B.size].reshape(net.L2.B.shape) index += net.L2.B.size net.L3.W = row[index:index + net.L3.W.size].reshape(net.L3.W.shape) index += net.L3.W.size net.L3.B = row[index:index + net.L3.B.size].reshape(net.L3.B.shape) em.reset() done = False while not done: next_state = em.get_next_states() predicted_qs = {} for i, (*data, ) in enumerate(next_state): predicted_qs[(data[0], data[1])] = net.f_pass( np.array([next_state[data[0], data[1]]]).T)[0, 0] best_move = max(predicted_qs, key=predicted_qs.get) reward, done = em.pcplace(best_move[0], best_move[1]) if em.get_game_score() > 20000: break fitness[it] = em.get_game_score() ## sort this such that the best is on top, etc new_population = new_population[fitness.argsort()[::-1]] ## help: argsort maxscore[generation] = max(fitness) print(generation, max(fitness)) if max(fitness) > 20000: break np.savetxt("evolution\\generation" + str(generation) + ".csv", new_population[0], delimiter=',') offspring_crossover = cross_and_mutate(new_population, pop_size) new_population = offspring_crossover np.savetxt("evolution\\scores.csv", np.array([generations, maxscore]).T, delimiter=',') return (fitness[0], new_population[0])