def main(): pygame.init() clock = pygame.time.Clock() white = (255, 255, 255) block_size = 40 map_size = (8, 8) # [blocked_left, blocked_up, blocked_right, blocked_down, food_left, food_up, food_right, food_down] states = 2**4 actions = 4 Q = np.zeros((states, actions)) lr = 0.4 gamma = 0.4 epsilon = 0.8 epochs = 50000 Qs = np.zeros((epochs, states, actions)) play_flag = True for epoch in range(epochs): if epoch % 100 == 0: print(epoch) if epsilon > 0.05: epsilon = epsilon - 0.01 game = snake.SnakeGame(map_size, block_size) state = game.get_state() state_num = get_state_num(state) while play_flag: if random.uniform(0, 1) < epsilon: action = random.randint(0, actions - 1) else: p = Q[state_num, :] action = np.argmax(p) end_game, reward = game.move(action_to_dir(action)) new_state = game.get_state() new_state_num = get_state_num(new_state) # Q[state_num, action] = Q[state_num, action] + lr * \ # (reward + gamma * # np.max(Q[new_state_num, np.argmax(Q[new_state_num, :])]) - Q[state_num, np.argmax(Q[state_num, :])]) Q[state_num, action] = reward + \ gamma * \ np.max(Q[new_state_num, np.argmax(Q[new_state_num, :])]) state = new_state state_num = new_state_num if epoch % 1000 == 0: print(state) game.show_text(str(epoch), (10, 10)) clock.tick(0.5) game.draw() if end_game: play_flag = False play_flag = True Qs[epoch, :, :] = np.copy(Q) pass
def main(): global oldscreen, game with cursebox.Cursebox() as cb: # TODO: ask level within curses game = snake.SnakeGame(cb.width, cb.height - 1) cb.screen.timeout(game.timeout) keypress = "" escapekeys = ["CTRL+C", "q", "ESC"] i = 0 oldscreen = [[0 for x in range(cb.width)] for y in range(cb.height - 1)] while keypress not in escapekeys: # and game.running: game.tick(keypress) drawscreen(game, cb) drawstatus(game.status_left, game.status_right, cb) cb.screen.timeout(game.timeout) keypress = str(cb.poll_event()) # TODO: Maintain highscore list here pass
def _select_current_option(self): option = self.options[self.selected_option] print("Selected", option) if option == 'Snake': self.text_scroller.pause() game = snake.SnakeGame(self.matrix, self.joystick) utaskmanager.add_task(game) self.current_task = game elif option == 'net': # show ip address sta = network.WLAN(network.STA_IF) tc = text.TextScroller(self.matrix) tc.scroll_text(" IP: " + sta.ifconfig()[0] + " ") elif option == 'mem': mem_free = gc.mem_free() tc = text.TextScroller(self.matrix) tc.scroll_text("mem free " + str(mem_free) + " Bytes ") elif option == 'demo': main.run_rotating_plasma()
def trainGraph(inp, out): #to calculate the argmax, we multiply the predicted output with a vector with one value 1 and rest as 0 argmax = tf.placeholder("float", [None, ACTIONS]) gt = tf.placeholder("float", [None]) #ground truth global_step = tf.Variable(0, name='global_step') #action action = tf.reduce_sum(tf.multiply(out, argmax), reduction_indices=1) #cost function we will reduce through backpropagation cost = tf.reduce_mean(tf.square(action - gt)) #optimization function to minimize our cost function train_step = tf.train.AdamOptimizer(1e-6).minimize(cost) #initialize our game game = snake.SnakeGame() #create a queue for experience replay to store policies D = deque() #intial frame frame = game.GetPresentFrame() #convert rgb to gray scale for processing frame = cv2.cvtColor(cv2.resize(frame, (IMG_SIZE, IMG_SIZE)), cv2.COLOR_BGR2GRAY) #binary colors, black or white ret, frame = cv2.threshold(frame, 1, 255, cv2.THRESH_BINARY) #stack frames, that is our input tensor inp_t = np.stack((frame, frame, frame, frame), axis=2) saver = tf.train.Saver(tf.global_variables()) sess = tf.InteractiveSession(config=tf.ConfigProto( log_device_placement=True)) #restore latest checkpoint or initialize a new graph checkpoint = tf.train.latest_checkpoint('./checkpoints') if checkpoint != None: print('Restore Checkpoint %s' % (checkpoint)) saver.restore(sess, checkpoint) print("Model restored.") else: init = tf.global_variables_initializer() sess.run(init) print("Initialized new Graph") t = global_step.eval() c = 0 epsilon = INITIAL_EPSILON #Training time while (1): #output tensor out_t = out.eval(feed_dict={inp: [inp_t]})[0] #argmax function argmax_t = np.zeros([ACTIONS]) #exploration or exploitation if (random.random() <= epsilon and not USE_MODEL): # make 0 the most choosen action for realistic randomness maxIndex = choice((0, 1, 2, 3, 4), 1, p=(0.80, 0.05, 0.05, 0.05, 0.05)) else: maxIndex = np.argmax(out_t) argmax_t[maxIndex] = 1 if epsilon > FINAL_EPSILON: epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE mode = 'observing' if t > OBSERVE: mode = 'training' if USE_MODEL: mode = 'model only' #move game forward by one frame, get reward and frame reward_t, frame = game.GetNextFrame( argmax_t, [t, np.max(out_t), epsilon, mode]) #get frame pixel data frame = cv2.cvtColor(cv2.resize(frame, (IMG_SIZE, IMG_SIZE)), cv2.COLOR_BGR2GRAY) ret, frame = cv2.threshold(frame, 1, 255, cv2.THRESH_BINARY) frame = np.reshape(frame, (IMG_SIZE, IMG_SIZE, 1)) #new input tensor inp_t1 = np.append(frame, inp_t[:, :, 0:3], axis=2) #add our input tensor, argmax tensor, reward and updated input tensor to replay memory D.append((inp_t, argmax_t, reward_t, inp_t1)) #if we run out of replay memory, make room if len(D) > REPLAY_MEMORY: D.popleft() #training iteration if c > OBSERVE and not USE_MODEL: #get random sample from our replay memory minibatch = random.sample(D, BATCH) inp_batch = [d[0] for d in minibatch] argmax_batch = [d[1] for d in minibatch] reward_batch = [d[2] for d in minibatch] inp_t1_batch = [d[3] for d in minibatch] gt_batch = [] out_batch = out.eval(feed_dict={inp: inp_t1_batch}) #add values to our batch for i in range(0, len(minibatch)): gt_batch.append(reward_batch[i] + GAMMA * np.max(out_batch[i])) #train on that train_step.run(feed_dict={ gt: gt_batch, argmax: argmax_batch, inp: inp_batch }) #update our input tensor the the next frame inp_t = inp_t1 t = t + 1 c = c + 1 #print our where we are after saving where we are if t % SAVE_STEP == 0 and not USE_MODEL: sess.run(global_step.assign(t)) saver.save(sess, './checkpoints/model.ckpt', global_step=t) print("TIMESTEP", t, "/ EPSILON", epsilon, "/ ACTION", maxIndex, "/ REWARD", reward_t, "/ Q_MAX %e" % np.max(out_t))
model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(30, input_shape=(4, ))) model.add(tf.keras.layers.Activation("relu")) #model.add(tf.keras.layers.Dense(100)) #model.add(tf.keras.layers.Activation("relu")) #model.add(tf.keras.layers.Dense(10, tf.keras.layers.Activation("linear"))) model.add(tf.keras.layers.Dense(3)) # model.add(tf.keras.layers.Dense(3, tf.keras.layers.Activation("softmax"))) model.compile(tf.keras.optimizers.Adam(lr=0.005), "mean_squared_error", ['accuracy']) model.load_weights("snak_weights_{}.hdf5".format(games)) global snak snak = snake.SnakeGame(1000, 600) snak.updateCallback = Update #snak.deathCallback = Death isGood, inp, states = snak.Generate(1000) # inp = [x for g, x in zip(isGood, inp) if g != 0] # states = [x for g, x in zip(isGood, states) if g != 0] # isGood = [x for x in isGood if x != 0] for i, x in enumerate(isGood): #isGood[i] = (x + 1)/2 if x == -1: isGood[i] = -1 elif x == 1: isGood[i] = 1
import snake from snakenn import SnakeNeuralNetwork from Vector import Vector import arcade snak = snake.SnakeGame(Vector(30, 20)) nn = SnakeNeuralNetwork(snak, 0) snak.updateCallback = nn.Update arcade.run()