class Environment(object): def __init__(self): self.S = TwentyFortyEight() self.S.make_tables() self.score = 0 self.S.new_tile() print("New Episode") def reset(self): self.S.score = 0 self.S.cells = 0 self.score = 0 self.S.new_tile() return self.S.vectorize_state() def step(self, action): score_prev = self.S.score1() cells = self.S.cells self.S.move(action + 1) r = self.S.score1() - score_prev self.score = self.S.score1() if not cells == self.S.cells: self.S.new_tile() if not self.S.canMove(): return self.S.vectorize_state(), r, True return self.S.vectorize_state(), r, False def seed(self, a): return
# print(dir) # print("Score:"+str(x.score)+"\t Max Tile:"+str(x.maxValue())) # x.move(dir) # x.new_tile() # # print("GAME ENDs") # # print(x.maxValue()) # print("Score:"+str(x.score)+"\t Max Tile:"+str(x.maxValue())) # # x.__str__() # # print(x.get_available_moves()) occ = np.zeros(16) for i in range(500): x=TwentyFortyEight() x.make_tables() # x.print_tables() # print("Generated Tables") x.new_tile() # avail_moves = x.get_available_moves() while(True): # x.__str__() # print("-----------------------") temp = x.vectorize_state(); tempx = np.zeros((1, 256)) for j in range(16): tempx[0, j*16 + temp[j]] = 1 probs=y.eval(feed_dict={x1: tempx})[0] # print(probs) # dir = eminimax(x,2)