Beispiel #1
0
class test2048:
    def __init__(self, manual_input=True, random=True, steps=10, sleep=0):
        self.gamegrid = GameGrid(manual_input=manual_input)
        self.random = random
        self.steps = steps
        self.sleep = sleep

    def run(self, input_value=None):
        if self.random:
            self.run_random()

    def run_random(self):
        for k in range(self.steps):
            num = randint(0, 3)
            event_rn.char = chr(num)
            self.gamegrid.key_down(event_rn)
            if game_state(self.gamegrid.matrix) == 'win' \
            or game_state(self.gamegrid.matrix) == 'lose':
                #time.sleep(1)
                return
            time.sleep(self.sleep)

    def get_status(self):
        return self.gamegrid.matrix

    def take_step(self, inp):
        event_rn.char = chr(inp)
        self.gamegrid.key_down(event_rn)
Beispiel #2
0
    def __init__(self, numEpochs, numItterations, agentCode):
        self._numEpochs = numEpochs
        self._numItterations = numItterations
        self._agentCode = agentCode

        self._gamegrid = GameGrid()
        self._gamegrid.hide()

        self._trainingRecord = []
Beispiel #3
0
    def refreshGameGrid(self):
        agent = self._gamegrid.getAgent()
        self._gamegrid = GameGrid()
        self._gamegrid.hide()

        agent.reset()

        self._gamegrid.setAgent(agent)
        self._gamegrid.getAgent().setGameGrid(self._gamegrid)
Beispiel #4
0
def dialog1():
    game_grid = GameGrid(window)
    game_grid.tkraise()
    return
    username=entry1.get()
    password = entry2.get()
    #check if valid
    r = subprocess.run(['rp_user_validator', username , password])
    print (r.returncode)
    if (r.returncode == 0):
        box.showinfo('info','Correct Login')
    else:
        box.showinfo('info','Username or Password incorrect')
Beispiel #5
0
    def __init__(self,
                 manual_input=True,
                 random=True,
                 steps=10,
                 sleep=0,
                 log=False):
        self.gamegrid = GameGrid(manual_input)
        if log:
            print('Initial GameGrid')
            print(self.gamegrid.matrix)

        self.random = random
        self.steps = steps
        self.sleep = sleep
        self.log = log

        self.gamegrid.win_status = False

        self.step = 0
        self.old_matrix = []
        self.check_value = 0
Beispiel #6
0
def main():
    gamegrid = GameGrid()
    gamegrid.hide()
    agent = DNNAgent(None,
                     waitTime=0,
                     trainDataPickle="ULRD_train_2000_20.pickle")
    gamegrid.setAgent(agent)

    with open(('ULRD_trained_model_20_game_layers_64.pickle'), 'wb') as f:
        pickle.dump(agent, f)
        print("Train data stored in {}".format(f))
# Do not edit this #
game_logic = {
    'make_new_game': make_new_game,
    'game_status': game_status,
    'get_score': get_score,
    'get_matrix': get_matrix,
    'up': up,
    'down': down,
    'left': left,
    'right': right,
    'undo': lambda state: (state, False)
}

# UNCOMMENT THE FOLLOWING LINE TO START THE GAME (WITHOUT UNDO)
gamegrid = GameGrid(game_logic)




#################
# Optional Task #
#################

###########
# Task 5i #
###########

def make_new_record(mat, increment):
    "Your answer here"
Beispiel #8
0
def trainNetwork(s, readout, h_fc1, sess):
    # define the cost function
    a = tf.placeholder("float", [None, ACTIONS])
    y = tf.placeholder("float", [None])
    readout_action = tf.reduce_sum(tf.multiply(readout, a), reduction_indices=1)
    cost = tf.reduce_mean(tf.square(y - readout_action))
    train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)

    # open up a game state to communicate with emulator
    game_state = showscreen.game()
    game_state = GameGrid()

    # store the previous observations in replay memory
    D = deque()

    # printing
    a_file = open("logs_" + GAME + "/readout.txt", 'w')
    h_file = open("logs_" + GAME + "/hidden.txt", 'w')

    # get the first state by doing nothing and preprocess the image to 80x80x4
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] = 1
    x_t, r_0, score, terminal = game_state.frame_step(do_nothing)
    x_t = cv2.cvtColor(cv2.resize(x_t, (80, 80)), cv2.COLOR_BGR2GRAY)
    # ret, x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY)
    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)

    # saving and loading networks
    saver = tf.train.Saver()
    sess.run(tf.initialize_all_variables())
    checkpoint = tf.train.get_checkpoint_state("saved_networks")
    if checkpoint and checkpoint.model_checkpoint_path:
        saver.restore(sess, checkpoint.model_checkpoint_path)
        print("Successfully loaded:", checkpoint.model_checkpoint_path)
    else:
        print("Could not find old network weights")

    # start training
    epsilon = INITIAL_EPSILON
    t = 0
    while "flappy bird" != "angry bird":
        # choose an action epsilon greedily
        readout_t = readout.eval(feed_dict={s: [s_t]})[0]
        a_t = np.zeros([ACTIONS])
        action_index = 0
        if t % FRAME_PER_ACTION == 0:
            if random.random() <= epsilon:
                print("----------Random Action----------")
                action_index = random.randrange(ACTIONS)
                a_t[action_index] = 1
            else:
                action_index = np.argmax(readout_t)
                a_t[action_index] = 1
        else:
            a_t[0] = 1 # do nothing

        # scale down epsilon
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

        # run the selected action and observe next state and reward
        x_t1_colored, r_t, score, terminal = game_state.frame_step(a_t, score)
        x_t1 = cv2.cvtColor(cv2.resize(x_t1_colored, (80, 80)), cv2.COLOR_BGR2GRAY)
        # ret, x_t1 = cv2.threshold(x_t1, 1, 255, cv2.THRESH_BINARY)
        x_t1 = np.reshape(x_t1, (80, 80, 1))
        #s_t1 = np.append(x_t1, s_t[:,:,1:], axis = 2)
        s_t1 = np.append(x_t1, s_t[:, :, :3], axis=2)

        # store the transition in D
        D.append((s_t, a_t, r_t, s_t1, terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        # only train if done observing
        if t > OBSERVE:
            # sample a minibatch to train on
            minibatch = random.sample(D, BATCH)

            # get the batch variables
            s_j_batch = [d[0] for d in minibatch]
            a_batch = [d[1] for d in minibatch]
            r_batch = [d[2] for d in minibatch]
            s_j1_batch = [d[3] for d in minibatch]

            y_batch = []
            readout_j1_batch = readout.eval(feed_dict = {s : s_j1_batch})
            for i in range(0, len(minibatch)):
                terminal = minibatch[i][4]
                # if terminal, only equals reward
                if terminal:
                    y_batch.append(r_batch[i])
                else:
                    y_batch.append(r_batch[i] + GAMMA * np.max(readout_j1_batch[i]))

            # perform gradient step
            train_step.run(feed_dict = {
                y : y_batch,
                a : a_batch,
                s : s_j_batch}
            )

        # update the old values
        s_t = s_t1
        t += 1

        # save progress every 10000 iterations
        if t % 10000 == 0:
            saver.save(sess, 'saved_networks/' + GAME + '-dqn', global_step = t)

        # print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"

        print("TIMESTEP", t, "/ STATE", state, \
            "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t, \
            "/ Q_MAX %e" % np.max(readout_t))
        # write info to files
        '''
    epoches = []
    ddqn_scores = []
    ddqn_biggest_tiles = []
    epsilon_history = [] # um zu sehen wie gut der score sich verbessert wenn epsilon weniger wird
    avg_scores = []

    #save output
    #will overwrite anything in the folder -> diffrent directories with diffrent models
    #env = wrappers.Nobitor(env, 'tmp/lunar-lander', video_callable=lambda episode_id True, force=True)

    #for i in range(n_games):

    #TODO: AB hier hat er schon einmal durch also fehler ausbessern das er hier das erste mal macht und nicht schon einmal davor
    for i in range(n_games):
        score = 0
        gamegrid = GameGrid(agent=ddqn_agent)


        print('\nScore: ', gamegrid.score, ' Biggest tile: ', gamegrid.biggest_tile, ' Epsilon: ', gamegrid.ddqn_agent.epsilon)
        ddqn_scores.append(gamegrid.score)
        ddqn_biggest_tiles.append(gamegrid.biggest_tile)
        epsilon_history.append(gamegrid.ddqn_agent.epsilon)

        # Um zu sehen ob der Agent besser wird und dazu lernt printen wir den avg score von den letzten 100 spielen aus
        avg_score = np.mean(ddqn_scores[max(0, i-100):(i+1)])
        avg_scores.append(avg_score)
        print('\nepisode: ', i, 'score %.2f' %gamegrid.score, 'average score %.2f' %avg_score)
        print('\n---------\n')
        if i%5 == 0 and i > 0:
            print('------------------- hier')
            #gamegrid.ddqn_agent.save_model()
Beispiel #10
0
 def __init__(self, manual_input=True, random=True, steps=10, sleep=0):
     self.gamegrid = GameGrid(manual_input=manual_input)
     self.random = random
     self.steps = steps
     self.sleep = sleep
Beispiel #11
0
class Runner:
    def __init__(self, numEpochs, numItterations, agentCode):
        self._numEpochs = numEpochs
        self._numItterations = numItterations
        self._agentCode = agentCode

        self._gamegrid = GameGrid()
        self._gamegrid.hide()

        self._trainingRecord = []

    def createAgent(self,
                    gameSessionFile=None,
                    trainName=None,
                    trainData=None,
                    trainDataPickle=None,
                    existingAgent=None):
        if (self._agentCode == 0):
            self._gamegrid.setAgent(RandomAgent(None, waitTime=0))
        elif (self._agentCode == 1):
            self._gamegrid.setAgent(PatternAgentULRD(None, waitTime=0))
        elif (self._agentCode == 2):
            self._gamegrid.setAgent(PatternAgentLURD(None, waitTime=0))
        elif (self._agentCode == 3):
            self._gamegrid.setAgent(ManualAgent(None, waitTime=0))
        elif (self._agentCode == 4):
            self._gamegrid.setAgent(
                DNNAgent(None,
                         waitTime=0,
                         gameSessionFile=gameSessionFile,
                         trainName=trainName,
                         trainData=trainData,
                         trainDataPickle=trainDataPickle))

        self._gamegrid.getAgent().setGameGrid(self._gamegrid)

    def refreshGameGrid(self):
        agent = self._gamegrid.getAgent()
        self._gamegrid = GameGrid()
        self._gamegrid.hide()

        agent.reset()

        self._gamegrid.setAgent(agent)
        self._gamegrid.getAgent().setGameGrid(self._gamegrid)

    def runTraining(self):
        for epochNum in range(0, self._numEpochs):
            for itterNum in range(0, self._numItterations):
                self.refreshGameGrid()
                self._gamegrid.setAgent(self._agent)

                print("Epoch: ", epochNum, " Iteration: ", itterNum)
                self._gamegrid.mainloop()

                # print(gamegrid.matrix)
                print("Score: ", self._gamegrid.scoreMatrix())
                self._agent.setScore(self._gamegrid.scoreMatrix())

                # The current code running AI games needs to know the current epochNum for encoding filename
                (boards, moves, score) = self._agent.getGameRecord()
                #                 self._agent.pikPakGame()
                self._trainingRecord.append(
                    (epochNum, itterNum, boards, moves, score))
        return self._trainingRecord
Beispiel #12
0
parser.add_argument('--replay_memory_length', default=40960, type=int)

args = parser.parse_args()

if __name__ == '__main__':

    policy, target = DQN(4).to(device), DQN(4).to(device)

    try:
        policy.load_state_dict(torch.load('my_policy.pt'))
        target.load_state_dict(torch.load('my_target.pt'))
    except:
        print('Exception Raised: Files not found...')

    rm = ReplayMemory(args.replay_memory_length)
    optimizer = optim.RMSprop(policy.parameters(), eps=1e-5)

    try:
        gamegrid = GameGrid(rm, policy, target, optimizer, args.epsilon,
                            args.min_epsilon, args.eps_decay_rate,
                            args.update_every, args.n_train, args.batch_size,
                            args.gamma)
    except KeyboardInterrupt:
        print('\nKeyboard Interrupt!!!')
        try:
            print('Saving...')
            torch.save(policy.state_dict(), 'my_policy.pt')
            torch.save(target.state_dict(), 'my_target.pt')
        except Exception as e:
            print('Error :{}'.format(e))
Beispiel #13
0
from agent import Agent
from puzzle import GameGrid
import sys
import numpy as np

# normalize input values

episodes = 61234


if len(sys.argv) == 2:
    MODE = sys.argv[1]
else:
    MODE = 'train'

environment = GameGrid()
bot = Agent(MODE, episodes)


# five tup is (state, action, state_after, reward, terminal)
for episode in range(episodes):
    if MODE != "play":
        if episode % 75 == 0 and episode != 0:
            bot.target_model.set_weights(bot.model.get_weights())

    still_playing = True
    state_before_action = environment.give_recent_state()
    step = 0
    while still_playing:

        action = bot.decide_move(state_before_action)
Beispiel #14
0
class run2048:
    def __init__(self,
                 manual_input=True,
                 random=True,
                 steps=10,
                 sleep=0,
                 log=False):
        self.gamegrid = GameGrid(manual_input)
        if log:
            print('Initial GameGrid')
            print(self.gamegrid.matrix)

        self.random = random
        self.steps = steps
        self.sleep = sleep
        self.log = log

        self.gamegrid.win_status = False

        self.step = 0
        self.old_matrix = []
        self.check_value = 0

    def run(self, input_value=None):
        self.old_matrix = self.gamegrid.matrix
        # if game_state(self.gamegrid.matrix) == 'lose':
        #
        if self.random and input_value is None:
            input_value = randint(0, 3)

        assert input_value in range(4)

        event_rn.char = chr(input_value)
        self.take_step(event_rn)

        if self.log:
            print(self.gamegrid.matrix)

        time.sleep(self.sleep)
        self.step += 1

    #def run_random(self):
    #    for k in range(self.steps):
    #        num = randint(0,3)
    #        event_rn.char = chr(num)
    #        self.take_step(event_rn)
    #        if game_state(self.gamegrid.matrix) == 'lose':
    #            return
    #        self.step += 1
    #        time.sleep(self.sleep)

    def get_status(self):
        # Need to figure out 'lose' state from check_matrix
        return self.gamegrid.matrix, int(self.check_matrix())

    def take_step(self, inp):
        self.gamegrid.key_down(inp)

    def check_matrix(self):
        if self.old_matrix == self.gamegrid.matrix:
            self.check_value += 1
        else:
            self.check_value = 0

        return self.check_value
Beispiel #15
0
def main():
    existingAgent1 = None
    with open("TrainingPartialCountRunner_100_20_4_5.pickle", 'rb') as f:
        existingAgent0 = pickle.load(f)

    with open("ULRD_trained_model_20_game_layers_32_16.pickle", 'rb') as f:
        existingAgent1 = pickle.load(f)

    with open("ULRD_trained_model_20_game_layers_64_16.pickle", 'rb') as f:
        existingAgent2 = pickle.load(f)

    with open("ULRD_trained_model_20_game_layers_64_16_8.pickle", 'rb') as f:
        existingAgent3 = pickle.load(f)

    with open("ULRD_trained_model_20_game_layers_64_32_8.pickle", 'rb') as f:
        existingAgent4 = pickle.load(f)

    with open("ULRD_trained_model_20_game_layers_64.pickle", 'rb') as f:
        existingAgent5 = pickle.load(f)

    agentDict = {
        1: RandomAgent(None, waitTime=0),
        2: PatternAgentULRD(None, waitTime=0),
        0: existingAgent0,
        3: DNNAgent(None, waitTime=0, trainName="ULRD_train.pickle"),
        4: existingAgent1,
        5: existingAgent2,
        6: existingAgent2,
        7: existingAgent2,
        8: existingAgent2
    }
    agentDescription = {
        1: "Random",
        2: "Up-Left-Right-Down",
        0: "Online learning NN",
        3: "DNN Agent",
        4: "DNN Agent with layers [32, 16]",
        5: "DNN Agent with layers [64, 16]",
        6: "DNN Agent with layers [64, 16, 8]",
        7: "DNN Agent with layers [64, 32, 8]",
        8: "DNN Agent with layers [64]"
    }
    agentScoreDict = {
        1: [],
        2: [],
        0: [],
        3: [],
        4: [],
        5: [],
        6: [],
        7: [],
        8: []
    }
    agentColors = {
        1: "b",
        2: "r",
        0: "#1f004d",
        3: "g",
        4: "c",
        5: "m",
        6: "y",
        7: "k",
        8: "#3CFE6E"
    }

    gameIDs = []
    for i in range(0, 15):
        gameIDs.append(i)
        random.seed(i)
        for (agentKey, agent) in agentDict.items():
            gamegrid = GameGrid()
            gamegrid.hide()
            gamegrid.setAgent(agent)
            agent.setGameGrid(gamegrid)
            gamegrid.mainloop()
            agentScoreDict[agentKey].append(sumScoreMatrix(gamegrid.matrix))
            print(agentScoreDict[agentKey])
            agent.reset()

    plotTrainingRecord(gameIDs, agentDict, agentDescription, agentScoreDict,
                       agentColors)