예제 #1
0
def main():
    pygame.init()
    clock = pygame.time.Clock()
    white = (255, 255, 255)

    block_size = 40
    map_size = (8, 8)
    # [blocked_left, blocked_up, blocked_right, blocked_down, food_left, food_up, food_right, food_down]
    states = 2**4
    actions = 4
    Q = np.zeros((states, actions))
    lr = 0.4
    gamma = 0.4
    epsilon = 0.8
    epochs = 50000
    Qs = np.zeros((epochs, states, actions))
    play_flag = True

    for epoch in range(epochs):
        if epoch % 100 == 0:
            print(epoch)
            if epsilon > 0.05:
                epsilon = epsilon - 0.01

        game = snake.SnakeGame(map_size, block_size)
        state = game.get_state()

        state_num = get_state_num(state)
        while play_flag:
            if random.uniform(0, 1) < epsilon:
                action = random.randint(0, actions - 1)
            else:
                p = Q[state_num, :]
                action = np.argmax(p)

            end_game, reward = game.move(action_to_dir(action))
            new_state = game.get_state()
            new_state_num = get_state_num(new_state)
            # Q[state_num, action] = Q[state_num, action] + lr * \
            #     (reward + gamma *
            #      np.max(Q[new_state_num, np.argmax(Q[new_state_num, :])]) - Q[state_num, np.argmax(Q[state_num, :])])

            Q[state_num, action] = reward + \
                gamma * \
                np.max(Q[new_state_num, np.argmax(Q[new_state_num, :])])
            state = new_state
            state_num = new_state_num
            if epoch % 1000 == 0:

                print(state)
                game.show_text(str(epoch), (10, 10))
                clock.tick(0.5)
                game.draw()
            if end_game:
                play_flag = False
        play_flag = True
        Qs[epoch, :, :] = np.copy(Q)
    pass
예제 #2
0
def main():
    global oldscreen, game
    with cursebox.Cursebox() as cb:
        # TODO: ask level within curses
        game = snake.SnakeGame(cb.width, cb.height - 1)
        cb.screen.timeout(game.timeout)
        keypress = ""
        escapekeys = ["CTRL+C", "q", "ESC"]
        i = 0
        oldscreen = [[0 for x in range(cb.width)]
                     for y in range(cb.height - 1)]
        while keypress not in escapekeys:  # and game.running:
            game.tick(keypress)
            drawscreen(game, cb)
            drawstatus(game.status_left, game.status_right, cb)
            cb.screen.timeout(game.timeout)
            keypress = str(cb.poll_event())
    # TODO: Maintain highscore list here
    pass
예제 #3
0
    def _select_current_option(self):
        option = self.options[self.selected_option]
        print("Selected", option)
        if option == 'Snake':
            self.text_scroller.pause()
            game = snake.SnakeGame(self.matrix, self.joystick)
            utaskmanager.add_task(game)
            self.current_task = game

        elif option == 'net':
            # show ip address
            sta = network.WLAN(network.STA_IF)
            tc = text.TextScroller(self.matrix)
            tc.scroll_text(" IP: " + sta.ifconfig()[0] + "  ")

        elif option == 'mem':
            mem_free = gc.mem_free()
            tc = text.TextScroller(self.matrix)
            tc.scroll_text("mem free " + str(mem_free) + " Bytes ")

        elif option == 'demo':
            main.run_rotating_plasma()
def trainGraph(inp, out):

    #to calculate the argmax, we multiply the predicted output with a vector with one value 1 and rest as 0
    argmax = tf.placeholder("float", [None, ACTIONS])
    gt = tf.placeholder("float", [None])  #ground truth
    global_step = tf.Variable(0, name='global_step')

    #action
    action = tf.reduce_sum(tf.multiply(out, argmax), reduction_indices=1)
    #cost function we will reduce through backpropagation
    cost = tf.reduce_mean(tf.square(action - gt))
    #optimization function to minimize our cost function
    train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)

    #initialize our game
    game = snake.SnakeGame()

    #create a queue for experience replay to store policies
    D = deque()

    #intial frame
    frame = game.GetPresentFrame()
    #convert rgb to gray scale for processing
    frame = cv2.cvtColor(cv2.resize(frame, (IMG_SIZE, IMG_SIZE)),
                         cv2.COLOR_BGR2GRAY)
    #binary colors, black or white
    ret, frame = cv2.threshold(frame, 1, 255, cv2.THRESH_BINARY)
    #stack frames, that is our input tensor
    inp_t = np.stack((frame, frame, frame, frame), axis=2)

    saver = tf.train.Saver(tf.global_variables())

    sess = tf.InteractiveSession(config=tf.ConfigProto(
        log_device_placement=True))

    #restore latest checkpoint or initialize a new graph
    checkpoint = tf.train.latest_checkpoint('./checkpoints')
    if checkpoint != None:
        print('Restore Checkpoint %s' % (checkpoint))
        saver.restore(sess, checkpoint)
        print("Model restored.")
    else:
        init = tf.global_variables_initializer()
        sess.run(init)
        print("Initialized new Graph")

    t = global_step.eval()
    c = 0

    epsilon = INITIAL_EPSILON

    #Training time
    while (1):
        #output tensor
        out_t = out.eval(feed_dict={inp: [inp_t]})[0]
        #argmax function
        argmax_t = np.zeros([ACTIONS])

        #exploration or exploitation
        if (random.random() <= epsilon and not USE_MODEL):
            # make 0 the most choosen action for realistic randomness
            maxIndex = choice((0, 1, 2, 3, 4),
                              1,
                              p=(0.80, 0.05, 0.05, 0.05, 0.05))
        else:
            maxIndex = np.argmax(out_t)
        argmax_t[maxIndex] = 1

        if epsilon > FINAL_EPSILON:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

        mode = 'observing'
        if t > OBSERVE:
            mode = 'training'
        if USE_MODEL:
            mode = 'model only'

        #move game forward by one frame, get reward and frame
        reward_t, frame = game.GetNextFrame(
            argmax_t, [t, np.max(out_t), epsilon, mode])
        #get frame pixel data
        frame = cv2.cvtColor(cv2.resize(frame, (IMG_SIZE, IMG_SIZE)),
                             cv2.COLOR_BGR2GRAY)
        ret, frame = cv2.threshold(frame, 1, 255, cv2.THRESH_BINARY)
        frame = np.reshape(frame, (IMG_SIZE, IMG_SIZE, 1))
        #new input tensor
        inp_t1 = np.append(frame, inp_t[:, :, 0:3], axis=2)

        #add our input tensor, argmax tensor, reward and updated input tensor to replay memory
        D.append((inp_t, argmax_t, reward_t, inp_t1))

        #if we run out of replay memory, make room
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        #training iteration
        if c > OBSERVE and not USE_MODEL:

            #get random sample from our replay memory
            minibatch = random.sample(D, BATCH)

            inp_batch = [d[0] for d in minibatch]
            argmax_batch = [d[1] for d in minibatch]
            reward_batch = [d[2] for d in minibatch]
            inp_t1_batch = [d[3] for d in minibatch]

            gt_batch = []
            out_batch = out.eval(feed_dict={inp: inp_t1_batch})

            #add values to our batch
            for i in range(0, len(minibatch)):
                gt_batch.append(reward_batch[i] + GAMMA * np.max(out_batch[i]))

            #train on that
            train_step.run(feed_dict={
                gt: gt_batch,
                argmax: argmax_batch,
                inp: inp_batch
            })

        #update our input tensor the the next frame
        inp_t = inp_t1
        t = t + 1
        c = c + 1

        #print our where we are after saving where we are
        if t % SAVE_STEP == 0 and not USE_MODEL:
            sess.run(global_step.assign(t))
            saver.save(sess, './checkpoints/model.ckpt', global_step=t)

        print("TIMESTEP", t, "/ EPSILON", epsilon, "/ ACTION", maxIndex,
              "/ REWARD", reward_t, "/ Q_MAX %e" % np.max(out_t))
예제 #5
0

model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(30, input_shape=(4, )))
model.add(tf.keras.layers.Activation("relu"))
#model.add(tf.keras.layers.Dense(100))
#model.add(tf.keras.layers.Activation("relu"))
#model.add(tf.keras.layers.Dense(10, tf.keras.layers.Activation("linear")))
model.add(tf.keras.layers.Dense(3))
# model.add(tf.keras.layers.Dense(3, tf.keras.layers.Activation("softmax")))
model.compile(tf.keras.optimizers.Adam(lr=0.005), "mean_squared_error",
              ['accuracy'])
model.load_weights("snak_weights_{}.hdf5".format(games))

global snak
snak = snake.SnakeGame(1000, 600)
snak.updateCallback = Update
#snak.deathCallback = Death

isGood, inp, states = snak.Generate(1000)

# inp = [x for g, x in zip(isGood, inp) if g != 0]
# states = [x for g, x in zip(isGood, states) if g != 0]
# isGood = [x for x in isGood if x != 0]

for i, x in enumerate(isGood):
    #isGood[i] = (x + 1)/2
    if x == -1:
        isGood[i] = -1
    elif x == 1:
        isGood[i] = 1
예제 #6
0
import snake
from snakenn import SnakeNeuralNetwork
from Vector import Vector
import arcade


snak = snake.SnakeGame(Vector(30, 20))
nn = SnakeNeuralNetwork(snak, 0)
snak.updateCallback = nn.Update

arcade.run()