def generate_fingerprint(AgentClass, **kwargs): sess = tf.Session() with open("board_cases.json") as f: board_json = json.load(f) game = Game(size=4, enable_rewrite_board=True) agent = AgentClass(game=game, sess=sess) agent.build() trace = [] num = len(board_json) for index, board in enumerate(board_json): print('{} left.'.format(num - index)) game.board = np.array(board) direction = agent.step() trace.append(direction) fingerprint = "".join(str(i) for i in trace) return fingerprint
def single_run(logger, models, size, score_to_win, AgentClass, **kwargs): game = Game(size, score_to_win) agent = AgentClass(logger=logger, models=models, game=game, display=Display(), **kwargs) agent.play(verbose=True) return game.score
def single_run(size, model1, model2, model3, score_to_win, AgentClass, **kwargs): game = Game(size, score_to_win) agent = AgentClass(model1, model2, model3, game, display=Display(), **kwargs) agent.play(verbose=False) return game.score
def data_generator_for_RNN(score_to_begin, score_to_win, batch_size): datas = [] labels = [] cnt = 0 while 1: game = Game(score_to_win = score_to_win, random = False) agent = ExpectiMaxAgent(game) while game.end == 0: step = agent.step() if game.score >= score_to_begin: datas.append(board2array(game).reshape(256)) labels.append(step2array(step)) cnt += 1 game.move(step) if cnt == batch_size: cnt = 0 datas = np.array(datas) labels = np.array(labels) yield (datas, labels) datas = [] labels = []
def self_test(self): import time totoal_time = 0 cnt = 0 stat = {2048: 0, 1024: 0, 512: 0, 256: 0, 128: 0, 64: 0, 32: 0, 16: 0} total = 0 for i in range(1000): if i % 10 == 0: print("Test: ", i) game = Game(4, 2048) while not game.end: start = time.clock() oht = self.one_hot(game.board) direction = self.model.predict(oht[np.newaxis, :, :, :]) dir = direction.argmax() end = time.clock() totoal_time += end - start cnt += 1 game.move(dir) total += game.score for s in [2048, 1024, 512, 256, 128, 64, 32, 16]: if game.score >= s: stat[s] += 1 if i % 10 == 0: print("Test: ", i) print("Score: ", game.score) print("Average Score currently is: ", float(total) / 1000.0) print("stat: ", stat) print("Time for one step (x second/step): ", float(totoal_time) / float(cnt))
def data_generator(batch_size): datas = [] labels = [] cnt = 0 while 1: game = Game(score_to_win = 2048, random = False) agent = ExpectiMaxAgent(game) while game.end == 0: step = agent.step() board = game.board / 11 board1 = board.T datas.append(np.hstack((board, board1))) labels.append(step2array(step)) cnt += 1 game.move(step) if cnt == batch_size: cnt = 0 datas = np.array(datas) labels = np.array(labels) yield (datas, labels) datas = [] labels = []
def generator(self, max_score=999999, filepath='', is_delete=True): data = [] # csv_saver def csv_saver(path): with open(path, 'a') as f: np.savetxt(f, data, fmt='%d', delimiter=',') if is_delete: cycle_last_lines_with_delete(path, len(data)) # save board&step def saver(direction, board_s): def mini_saver(log_board, dire): log_board = log_board.flatten() log_board = np.concatenate((np.array([dire]), log_board), axis=0) data.append(log_board) for i in range(4): b, d = Data.board_rot(board_s, direction, i) b[b == 0] = 1 log_board_s = np.log2(b).astype(int) mini_saver(log_board_s, d) # # TODO test do no use data agumentation # break mini_saver(*Data.board_transpostion(log_board_s, d)) def run(): if self.game.end: # win or lose return cnt_step = 0 # end(self):0: continue, 1: lose, 2: win while not self.game.end and self.game.score <= max_score: step = self.step() # save best_step & current board saver(self.best_step(), self.game.board) # use ai to move self.game.move(step) cnt_step += 1 return np.sum(self.game.board) self.game = Game(score_to_win=2**15, random=False, enable_rewrite_board=True) score = run() path = "{}data_{}.csv".format(filepath, 'train') csv_saver(path) return score
def testAgent(n_tests, game_size, score_to_win, model, max_iter=1000): acc, total = 0, 0 for i in trange(n_tests): game, n_iter = Game(game_size, score_to_win), 0 target = ExpectiMaxAgent(game) while n_iter < max_iter and not target.game.end: dir_ = Greedy_Action(game, model) target_dir = target.step() n_iter += 1 total += 1 if dir_ == target_dir: acc += 1 target.game.move(dir_) return acc / total
def eval(self): self.model.eval() scores = [] n_iter = 0 for i in range(N_EVAL): game = Game(4, 2048) path = MODEL_PATH if LOAD_MODEL else None n_iter += NaiveAgent(game, model_path=path).play() scores.append(game.score) average_iter = n_iter / N_EVAL average_score = sum(scores) / len(scores) print(F"Average_iter={average_iter}," F"average_score={average_score}", Counter(scores)) if average_score > self.high_score: self.high_score = average_score self.model.save(F"./model/model_{average_score}.pth")
def intuition(board): # ------------------------------ # 正常情况,使用直觉快速思考(近似贪心法) score_dict = {0: 0, 1: 0, 2: 0, 3: 0} game = Game(4, 4096, enable_rewrite_board=True) for i in range(4): # 每次模拟前置零 game.board = board game.score_move = 0 game.only_move(i) # 无合并,就是菜 # print(game.board) if game.score_move == 0: score_dict[i] = -100 + board_score(game.board) # 有合并,计算权值 else: score_dict[i] += game.score_move + board_score(game.board) # print('score_dict {}'.format(score_dict)) direction = max(score_dict, key=score_dict.get) return direction
from keras.optimizers import Adam import numpy as np from sklearn.model_selection import train_test_split BATCH_SIZE = 128 NUM_CLASSES = 4 NUM_EPOCHS = 20 display1 = Display() display2 = IPythonDisplay() model = keras.models.load_model('model.h5') image = [] label = [] for i in range(0, 10): game = Game(4, score_to_win=2048, random=False) agent = ExpectiMaxAgent(game, display=display1) while game.end == False: direction = agent.step() image.append(game.board) label.append(direction) game.move(direction) display1.display(game) #运行10次游戏并记录棋盘和方向 x_train = np.array(image) y_train = np.array(label) x_train = np.log2(x_train + 1)
x = Dense(128, kernel_initializer='he_uniform', activation='relu')(x) x = BatchNormalization()(x) outputs = Dense(4, activation='softmax', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.001))(x) # outputs=np.argmax() model = Model(inputs, outputs) model.summary() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) M1 = ModelWrapper(model, capacity) game = Game(4, 2048) while M1.training_step < epoch_size: # print(game.board) if (game.board.max() <= 2048 and game.board.min() == 0): game = M1.train(batch_size, game) else: game = Game(4, 2048) game = M1.train(batch_size, game) print("new game", str(game.board.max())) if (M1.training_step == flag): model.save("./k_model/CNN_old_16_" + str(round(M1.training_step / 10000) + 200) + ".h5") flag += 5000 with open('oldtrainvalue.txt', 'a') as f: f.write("\nnewmodel") f.write("\n./k_model/CNN_old_16_" +
guides = self.mem.sample(batch) X = [] Y = [] for guide in guides: X.append(guide.state) ohe_action = [0] * 4 ohe_action[guide.action] = 1 Y.append(ohe_action) loss, acc = self.model.train_on_batch(np.array(X), np.array(Y)) print('#%d \t loss:%.3f \t acc:%.3f' % (self.trainning_step, float(loss), float(acc))) self.trainning_step += 1 MEMORY = 65536 BATCH = 4096 model = keras.models.load_model('dev/model.h5') mw = ModelWrapper(model, MEMORY) while True: game = Game(4, random=False, score_to_win=1024) while not game.end: mw.move(game) print('score:', game.score, end='\t') mw.train(BATCH) if (mw.trainning_step % 100 == 0): model.save('dev/model.h5') if (mw.trainning_step % 1000 == 0): model.save('dev/model_%d.h5' % mw.trainning_step)
input_shape = (4, 4, board_class) #棋盘one-hot编码 boards = [] directions = [] while count < 98: score_train = [] score_test = [] boards = [] directions = [] count = 0 print('array deleted') for i in range(30): game = Game(4, score_to_win=2048, random=False) agent1 = ExpectiMaxAgent(game, display=display1) while game.end == False: if np.sum(game.board) > 384: break a = np.array(game.board) a = np.log2(a + 1) a = np.trunc(a) a = keras.utils.to_categorical(a, board_class) a = a.reshape(1, 4, 4, board_class) prediction = model.predict(a, batch_size=128) b = prediction[0] b = b.tolist() direction2 = b.index(max(b)) direction1 = agent1.step()
import pandas as pd display1 = Display() display2 = IPythonDisplay() def log2(board): for i in range(16): if board[i] != 0: board[i] = np.log2(board[i]) return board for i in range(0, 100): print(i, "is running") game = Game(4, 2048, random=False) agent = ExpectiMaxAgent(game, display=display2) n_iter = 0 max_iter = np.inf data = np.zeros((0, 17), dtype=float) while (n_iter < max_iter) and (not game.end): arr1 = log2(np.reshape(agent.game.board, newshape=(16, ))) direction = agent.step() arr3 = np.hstack((arr1, direction)) data = np.vstack([data, arr3]) agent.game.move(direction) n_iter += 1 df = pd.DataFrame(data, columns=None, index=None) df.to_csv('/home/huanning/big_project/2048-api-master/data_test.csv', index=0, mode='a',
import numpy as np from game2048.game import Game from game2048.agents import ExpectiMaxAgent from game2048.agents import MyAgent from game2048.displays import Display import csv import os game_size = 4 score_to_win = 2048 iter_num = 3000 game = Game(game_size, score_to_win) board = game.board agenta = ExpectiMaxAgent(game, Display()) agentb = MyAgent(game, Display()) directiona = agenta.step() directionb = agentb.step() board = game.move(directionb) i = 0 dic = {} idx = 0 # save file filename = '/home/olivia/PycharmProjects/2048/game2048/data/traindata10.csv' if os.path.exists(filename): start = True else: start = False os.mknod(filename)
def single_run(size, score_to_win, AgentClass, model): game = Game(size, score_to_win) agent = AgentClass(game, display=Display()) agent.import_model(model) agent.play(max_iter=5e3, verbose=False) return game.score
zit_score_to_win = 2048 n_epoch = 20 # 跑之前修改 n_steps_per_epoch = 7000 ite = 7000 # the number of iteration to generate data_path = r"./cnn_train_data/" count = 0 rnn_filter = 64 drop = 0.15 lr= 0.001 decay= lr/n_epoch game = Game(game_size, zit_score_to_win) from keras.callbacks import LearningRateScheduler as LRS import keras.backend as K def scheduler(epoch): if epoch% 3 == 0 and epoch != 0: lr = K.get_value(zit_model.optimizer.lr) K.set_value(zit_model.optimizer.lr, lr*0.1) print('lr change to {}'.format(lr*0.1)) return K.get_value(zit_model.optimizer.lr) reduce_lr = LRS(scheduler)
def single_run(size, ds, AgentClass, **kwargs): game = Game(size, 2048) agent = AgentClass(game, display=Display(), **kwargs) agent.play(dataset=ds, verbose=False, train=1)
import numpy as np import sys sys.path.append("..") from game2048.expectimax import board_to_move from game2048.game import Game from game2048.agents import ExpectiMaxAgent from game2048.displays import Display import csv, os GAME_SIZE = 4 SCORE_TO_WIN = 2048 iter_num = 300 game = Game(GAME_SIZE, SCORE_TO_WIN) board = game.board agent = ExpectiMaxAgent(game, Display()) direction = agent.step() board = game.move(direction) i = 0 dic = {} idx = 0 # ------------------------------------------------------ # save each board and its direction to a dict # ------------------------------------------------------- filename = '/home/zhouykai/Workspace/MachinceLearning/Dataset_2048/Train.csv' # filename = './Dataset/Train.csv' if os.path.exists(filename): head = True
def single_run(board_data, move_data, size, score_to_win, AgentClass, **kwargs): game = Game(size, score_to_win) agent = AgentClass(board_data=board_data, move_data=move_data, game=game, display=Display(), **kwargs) agent.play(verbose=True) return game.score
from game2048.game import Game from game2048.displays import Display, IPythonDisplay from game2048.agents import Agent, RandomAgent, ExpectiMaxAgent for i in range(0, 1): game = Game(4, random=False, score_to_win=2048) agent = ExpectiMaxAgent(game) agent.play(verbose=False, save=True, savedir='data/2048.txt') print(str(i + 1) + "th game completed")
if direction == -1: direction = agent.step() control = 'AGENT' game.move(direction) return jsonify({ "board": game.board.tolist(), "score": game.score, "end": game.end, "direction": direction, "control": control }) return app if __name__ == "__main__": GAME_SIZE = 4 SCORE_TO_WIN = 2048 APP_PORT = 5005 APP_HOST = "0.0.0.0" from game2048.game import Game game = Game(size=GAME_SIZE, score_to_win=SCORE_TO_WIN) agent = MyAgent(game, display=None) print("Run the webapp at http://<any address for your local host>:%s/" % APP_PORT) app = get_flask_app(game, agent) app.run(port=APP_PORT, threaded=False, host=APP_HOST ) # IMPORTANT: `threaded=False` to ensure correct behavior
# # print (game.board) # # print ("direction: ", direction) # for i in range(4): # for j in range(4): # #f.write(game.board[i,j]) # print(game.board[i, j], file = f2) # print(direction, file = f2) # #f.write(direction) # game.move(direction) # #f.write('\n') for i in range(300): print("i = ", i) game = Game(size=GAME_SIZE, score_to_win=SCORE_TO_WIN0) agent = ExpectiMaxAgent(game=game) while True: direction = agent.step() if (game.end != 0): break # print (game.board) # print ("direction: ", direction) if game.board.max() < 256: for i in range(4): for j in range(4): #f.write(game.board[i,j]) print(game.board[i, j], file=f1) print(direction, file=f1)
def __init__(self, game, display=None): super().__init__(game, display) self.testgame = Game(4, random=False) self.testgame.enable_rewrite_board = True
import time from game2048.displays import Display from MYADDmodel import model from MYADDtrain import ModelWrapper, Guides from game2048.game import Game from keras.models import load_model from game2048.MYADDagent import MyOwnAgent as TestAgent import keras import numpy as np import tensorflow as tf from collections import namedtuple import random from MYADDini import grid_ohe mygame = Game(4, 2048) mymodel = model = load_model("logs/iris_model.h5") myTrain = ModelWrapper(mymodel, 100000) ohe_board = grid_ohe(mygame.board) a = time.time() direction = myTrain.predict(ohe_board).argmax() b = time.time() print("time is %f" % (b - a))
def single_run(size, score_to_win, AgentClass, sess): game = Game(size, score_to_win) agent = AgentClass(game, display=Display(), sess=sess) agent.build() agent.play(verbose=True) return game.score
Y = [] for guide in guides: X.append(guide.state) ohe_action = [0] * 4 ohe_action[guide.action] = 1 Y.append(ohe_action) loss, acc = self.model.train_on_batch(np.array(X), np.array(Y)) print('#%d \t loss:%.3f \t acc:%.3f' % (self.trainning_step, float(loss), float(acc))) self.trainning_step += 1 MEMORY = 262144 BATCH = 16384 model = keras.models.load_model('best/model.h5') mw = ModelWrapper(model, MEMORY) while True: game = Game(4, random=False) while not game.end: mw.move(game) print('score:', game.score, end='\t') mw.train(BATCH) if (mw.trainning_step % 10 == 0): model.save('best/model.h5') if (mw.trainning_step % 1000 == 0): model.save('best/model_%d.h5' % mw.trainning_step)
def single_run(size, score_to_win, AgentClass, **kwargs): game = Game(size, score_to_win) agent = AgentClass(game, display=Display(), **kwargs) agent.play(verbose=True) return game.score
from game2048.game import Game from game2048.agents import GenerateAgent size = 4 score_to_win = 2048 epoch = 5000 for i in range(epoch): game = Game(size, score_to_win) agent = GenerateAgent(game, display=None, direction='./dataSet/') agent.play(verbose=True)