def main(): import argparse parser = argparse.ArgumentParser( "Play the Gomoku Game!", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-g', '--n_games', type=int, default=100, help='Number of games to play.') args = parser.parse_args() game = Gomoku(board_size=15, first_center=False) import tarfile with tarfile.open('input.tar.gz') as tar: tar.extractall() import construct_dnn model = construct_dnn.construct_dnn() model.load('tf_model/tf_model') import player_A, player_B player_A.tf_predict_u.model = player_B.tf_predict_u.model = model player_A.initialize() player_B.initialize() p1 = Player('Black') p1.strategy = player_A.strategy p2 = Player('White') p2.strategy = player_B.strategy p1.strategy.learndata = pickle.load(open('black.learndata', 'rb')) p2.strategy.learndata = pickle.load(open('white.learndata', 'rb')) game.players = [p1, p2] if args.n_games > 1: game.fastmode = 2 def playone(i, game_output, winner_board): game.reset() winner = game.play() winner_board[winner] += 1 game_output.write('Game %-4d: Winner is %s\n' % (i + 1, winner)) game_output.flush() winner_board = collections.OrderedDict([(p.name, 0) for p in game.players]) winner_board["Draw"] = 0 with open('game_results.txt', 'w') as game_output: for i_game in xrange(args.n_games): playone(i_game, game_output, winner_board) print("Name | Games Won") for name, nwin in winner_board.items(): print("%-7s | %7d" % (name, nwin)) # save the learndata to disk pickle.dump(p1.strategy.learndata, open('newblack.learndata', 'wb')) pickle.dump(p2.strategy.learndata, open('newwhite.learndata', 'wb')) print('%d black learndata and %d white learndata saved!' % (len(p1.strategy.learndata), len(p2.strategy.learndata))) with tarfile.open('output.tar.gz', 'w:gz') as tar: tar.add('newblack.learndata') tar.add('newwhite.learndata')
def initialize(): color = 'black' if strategy.playing == 0 else 'white' # initialize zobrist for u caching if not hasattr(strategy, 'zobrist_me'): np.random.seed(19890328) # use the same random matrix for storing strategy.zobrist_me = np.random.randint(np.iinfo(np.int64).max, size=board_size**2).reshape(board_size,board_size) strategy.zobrist_opponent = np.random.randint(np.iinfo(np.int64).max, size=board_size**2).reshape(board_size,board_size) #strategy.zobrist_code = np.random.randint(np.iinfo(np.int64).max) # reset the random seed to random for other functions np.random.seed() if not hasattr(best_action_q, 'move_interest_values'): best_action_q.move_interest_values = np.zeros(board_size**2, dtype=np.float32).reshape(board_size,board_size) if not hasattr(strategy, 'learndata'): filename = color + '.learndata' if os.path.exists(filename): strategy.learndata = pickle.load(open(filename, 'rb')) print("Successfully loaded %d previously saved learndata"%len(strategy.learndata)) else: strategy.learndata = dict() if not hasattr(tf_predict_u, 'tf_state'): tf_predict_u.tf_state = np.zeros(board_size**2 * 5, dtype=np.int32).reshape(board_size, board_size, 5) tf_predict_u.tf_state[:, :, 3] = 1 #tf_predict_u.tf_state[4, :, :] = 1 if color == 'black' else 0 if not hasattr(tf_predict_u, 'model'): tflearn.init_graph(num_cores=4, gpu_memory_fraction=0.5) import construct_dnn model = construct_dnn.construct_dnn() model.load('tf_model') tf_predict_u.model = model
def main(): import argparse parser = argparse.ArgumentParser( "Play the Gomoku Game!", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-t', '--train_step', type=int, default=100, help='Train a new model after this number of games.') args = parser.parse_args() game = Gomoku(board_size=15, first_center=False) import player_0, player_1 player_0.initialize() player_1.initialize() p1 = Player('Black') p1.strategy = player_0.strategy p2 = Player('White') p2.strategy = player_1.strategy game.players = [p1, p2] if args.train_step > 1: game.fastmode = 2 def playone(i, game_output, winner_board): game.reset() winner = game.play() winner_board[winner] += 1 game_output.write('Game %-4d: Winner is %s\n' % (i + 1, winner)) game_output.flush() # play the games winner_board = collections.OrderedDict([(p.name, 0) for p in game.players]) winner_board["Draw"] = 0 with open('game_results.txt', 'w') as game_output: for i_game in xrange(args.train_step): playone(i_game, game_output, winner_board) print("Name | Games Won") for name, nwin in winner_board.items(): print("%-7s | %7d" % (name, nwin)) # collect training data train_X, train_Y = prepare_train_data(p1.strategy.learndata, p2.strategy.learndata) # fit the tf model import construct_dnn model = construct_dnn.construct_dnn() model.load('initial_model/tf_model') model.fit(train_X, train_Y, n_epoch=100, validation_set=0.1, show_metric=True) model.save('tf_model') print("New model saved!")
def main(): import argparse parser = argparse.ArgumentParser("Play the Gomoku Game!", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-n', '--n_game', type=int, default=10, help='Play a number of games to gather statistics.') args = parser.parse_args() game = Gomoku(board_size=15, first_center=False) import construct_dnn model = construct_dnn.construct_dnn() model.load('bench_model/tf_model') #model.load('trained_model_001/trained_model_001') #model.load('trained_model_002/trained_model_002') #model.load('trained_model_003/trained_model_003') #model.load('trained_model_007/trained_model_007') import player_A player_A.tf_predict_u.model = model player_A.initialize() p1 = Player('TF') p1.strategy = player_A.strategy player_A.estimate_level = 1 player_A.t_random = 0.01 import ai_bench ai_bench.initialize() p2 = Player('BenchAI') p2.strategy = ai_bench.strategy ai_bench.estimate_level = 4 ai_bench.t_random = 0.001 game.players = [p2, p1] if args.n_game > 1: game.fastmode = 2 else: player_A.show_q = ai_bench.show_q = True def playone(i, game_output, winner_board): game.reset() winner = game.play() winner_board[winner] += 1 game_output.write('Game %-4d: Winner is %s\n'%(i+1, winner)) game_output.flush() winner_board = collections.OrderedDict([(p.name, 0) for p in game.players]) winner_board["Draw"] = 0 with open('game_results.txt','w') as game_output: for i_game in xrange(args.n_game): playone(i_game, game_output, winner_board) p1.strategy.learndata = dict() print("Name | Games Won") for name, nwin in winner_board.items(): print("%-7s | %7d"%(name, nwin))
def initialize(): # initialize zobrist for u caching if not hasattr(strategy, 'zobrist_me'): np.random.seed(2018) # use the same random matrix for storing strategy.zobrist_black = np.random.randint(np.iinfo(np.int64).max, size=board_size**2).reshape( board_size, board_size) strategy.zobrist_white = np.random.randint(np.iinfo(np.int64).max, size=board_size**2).reshape( board_size, board_size) #strategy.zobrist_code = np.random.randint(np.iinfo(np.int64).max) # reset the random seed to random for other functions np.random.seed() if not hasattr(best_action_q, 'move_interest_values'): best_action_q.move_interest_values = np.zeros( board_size**2, dtype=np.float32).reshape(board_size, board_size) if not hasattr(strategy, 'learndata'): if os.path.isfile('strategy.learndata'): strategy.learndata = pickle.load(open('strategy.learndata', 'rb')) print("strategy.learndata found, loaded %d data" % len(strategy.learndata)) else: strategy.learndata = dict() strategy.started_from_beginning = False if not hasattr(tf_predict_u, 'all_interest_states'): tf_predict_u.all_interest_states = np.zeros(board_size**4 * 3, dtype=np.int8).reshape( board_size**2, board_size, board_size, 3) if not hasattr(tf_predict_u, 'cache'): if os.path.isfile('tf_predict_u.cache'): tf_predict_u.cache = pickle.load(open("tf_predict_u.cache", 'rb')) print("tf_predict_u.cache found, loaded %d cache" % len(tf_predict_u.cache)) else: tf_predict_u.cache = dict() if not hasattr(tf_predict_u, 'model'): tflearn.init_graph(num_cores=4, gpu_memory_fraction=0.3) import construct_dnn model = construct_dnn.construct_dnn() path = os.path.realpath(__file__) folder = os.path.dirname(path) model.load(os.path.join(folder, 'tf_model')) tf_predict_u.model = model
testx = h5f['train_X'][500:520] testy = h5f['train_Y'][500:520] def draw_state(state): board_size = 15 print(' ' * 4 + ' '.join([chr(97 + i) for i in xrange(board_size)])) print(' ' * 3 + '=' * (2 * board_size)) me = state[0, 0, 4] for x in xrange(1, board_size + 1): row = ['%2s|' % x] for y in xrange(1, board_size + 1): if state[x - 1, y - 1, 0] == 1: c = 'x' if me == 1 else 'o' elif state[x - 1, y - 1, 1] == 1: c = 'o' if me == 1 else 'x' else: c = '-' row.append(c) print(' '.join(row)) import construct_dnn model = construct_dnn.construct_dnn() model.load('tf_model') py = model.predict(testx) for i in range(len(testx)): draw_state(testx[i]) print("ref y : %.5f" % testy[i][0]) print("predicted y : %.5f" % py[i][0])
def main(): import argparse parser = argparse.ArgumentParser( "Play the Gomoku Game!", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-n', '--n_train', type=int, default=10, help='Play a number of games to gather statistics.') parser.add_argument('-t', '--train_step', type=int, default=100, help='Train a new model after this number of games.') parser.add_argument('-e', '--n_epoch', type=int, default=100, help="Number of epochs for each training model") args = parser.parse_args() game = Gomoku(board_size=15, first_center=False) import construct_dnn model = construct_dnn.construct_dnn() model.load('initial_model/tf_model') import player_A, player_B player_A.tf_predict_u.model = player_B.tf_predict_u.model = model player_A.initialize() player_B.initialize() p1 = Player('Black') p1.strategy = player_A.strategy p2 = Player('White') p2.strategy = player_B.strategy # set up linked learndata and cache (allow AI to look into opponent's data) p1.strategy.learndata = p2.strategy.opponent_learndata = dict() p2.strategy.learndata = p1.strategy.opponent_learndata = dict() player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict() game.players = [p1, p2] if args.train_step > 1: game.fastmode = 2 else: player_A.show_q = player_B.show_q = True def playone(i, game_output, winner_board): game.reset() winner = game.play() winner_board[winner] += 1 game_output.write('Game %-4d: Winner is %s\n' % (i + 1, winner)) game_output.flush() print("Training the model for %d iterations." % args.n_train) for i_train in xrange(args.n_train): # check if the current model exists model_name = "trained_model_%03d" % i_train assert not os.path.exists( model_name), "Current model %s already exists!" % model_name # create and enter the model folder os.mkdir(model_name) os.chdir(model_name) # play the games print("Training model %s" % model_name) winner_board = collections.OrderedDict([(p.name, 0) for p in game.players]) winner_board["Draw"] = 0 with open('game_results.txt', 'w') as game_output: for i_game in xrange(args.train_step): playone(i_game, game_output, winner_board) print("Name | Games Won") for name, nwin in winner_board.items(): print("%-7s | %7d" % (name, nwin)) # collect training data train_X, train_Y = prepare_train_data(p1.strategy.learndata, p2.strategy.learndata) # reset the learndata and cache, release memory p1.strategy.learndata = p2.strategy.opponent_learndata = dict() p2.strategy.learndata = p1.strategy.opponent_learndata = dict() player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict() # fit the tf model model.fit(train_X, train_Y, n_epoch=args.n_epoch, validation_set=0.1, shuffle=True, show_metric=True) model.save('tf_model') print("Model %s saved!" % model_name) os.chdir('..')
def main(): import argparse parser = argparse.ArgumentParser( "Play the Gomoku Game!", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-n', '--n_train', type=int, default=10, help='Play a number of games to gather statistics.') parser.add_argument('-t', '--train_step', type=int, default=100, help='Train a new model after this number of games.') parser.add_argument('-e', '--n_epoch', type=int, default=100, help="Number of epochs for each training model") parser.add_argument('-l', '--begin_lib', help='Begin board library file') parser.add_argument('-p', '--begin_lib_p', type=float, default=1.0, help='Possibility of begin lib to be used') parser.add_argument('--new', action='store_true', help='Start from new model') args = parser.parse_args() game = Gomoku(board_size=15, first_center=False) import construct_dnn model = construct_dnn.construct_dnn() if not args.new: model.load('initial_model/tf_model') import player_A, player_B player_A.tf_predict_u.model = player_B.tf_predict_u.model = model player_A.initialize() player_B.initialize() p1 = Player('Black') p1.strategy = player_A.strategy p2 = Player('White') p2.strategy = player_B.strategy # set up linked learndata and cache (allow AI to look into opponent's data) p1.strategy.learndata = p2.strategy.opponent_learndata = dict() p2.strategy.learndata = p1.strategy.opponent_learndata = dict() player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict() game.players = [p1, p2] if args.train_step > 1: game.fastmode = 2 else: player_A.show_q = player_B.show_q = True allstones = set([(r, c) for r in range(1, 16) for c in range(1, 16)]) if args.begin_lib != None: begin_lib = __import__(args.begin_lib).begin_lib else: begin_lib = None def playone(i, game_output, winner_board): game.reset() player_A.reset() player_B.reset() if random.random() < args.begin_lib_p: game.board = gen_begin_board(allstones, begin_lib) else: game.board = gen_begin_board(allstones, None) # randomly assign a black stone to be the last move game.last_move = next(iter(game.board[0])) winner = game.play() winner_board[winner] += 1 game_output.write('Game %-4d: Winner is %s\n' % (i + 1, winner)) game_output.flush() print("Training the model for %d iterations." % args.n_train) for i_train in range(args.n_train): # check if the current model exists model_name = "trained_model_%03d" % i_train assert not os.path.exists( model_name), "Current model %s already exists!" % model_name # create and enter the model folder os.mkdir(model_name) os.chdir(model_name) # play the games print("Training model %s" % model_name) winner_board = collections.OrderedDict([(p.name, 0) for p in game.players]) winner_board["Draw"] = 0 with open('game_results.txt', 'w') as game_output: for i_game in range(args.train_step): playone(i_game, game_output, winner_board) print("Name | Games Won") for name, nwin in winner_board.items(): print("%-7s | %7d" % (name, nwin)) # collect training data train_X, train_Y = prepare_train_data(p1.strategy.learndata, p2.strategy.learndata) # reset the learndata and cache, release memory p1.strategy.learndata = p2.strategy.opponent_learndata = dict() p2.strategy.learndata = p1.strategy.opponent_learndata = dict() player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict() # fit the tf model #model.trainer.training_state.step = 0 # reset the training step so lr_decay is reset model.fit(train_X, train_Y, n_epoch=args.n_epoch, validation_set=0.1, shuffle=True, show_metric=True) model.save('tf_model') print("Model %s saved!" % model_name) os.chdir('..')
def main(): import argparse parser = argparse.ArgumentParser( "Play the Gomoku Game!", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-n', '--n_train', type=int, default=10, help='Play a number of games to gather statistics.') parser.add_argument( '-b', '--n_batches', type=int, default=10, help='Number of batches of games to play in each iteration.') parser.add_argument('-w', '--n_workers', type=int, default=30, help='Number of workers to use in each batch.') parser.add_argument( '-g', '--worker_games', type=int, default=100, help='Number of games each worker play each time before returning data.' ) parser.add_argument('-p', '--wq_port', type=int, default=50123, help='Port to use in work queue.') args = parser.parse_args() createWorkQueue(args.wq_port, name='gtrain') wq = getWorkQueue() cmdstr = 'python gomoku_worker.py -g %d' % args.worker_games input_files = ['construct_dnn.py', 'player_A.py', 'player_B.py'] print( "Training the model for %d iterations, each will run %d batches of games." % (args.n_train, args.n_batches)) model_name = 'initial_model' # we start with tf_model saved in initial_model import construct_dnn model = construct_dnn.construct_dnn() model.load(os.path.join(model_name, 'tf_model', 'tf_model')) for i_train in xrange(args.n_train): prev_model_name = model_name # check if the current model exists model_name = "trained_model_%03d" % i_train if os.path.exists(model_name): backup_name = model_name + '_backup' if os.path.exists(backup_name): shutil.rmtree(backup_name) shutil.move(model_name, backup_name) print("Current model %s already exists, backed up to %s" % (model_name, backup_name)) # create black_learndata and white_learndata dict black_learndata, white_learndata = dict(), dict() # create and enter the model folder os.mkdir(model_name) os.chdir(model_name) for i_batch in xrange(args.n_batches): print( "Batch %d, launching %d workers, each play %d games, then update strategy.learndata" % (i_batch, args.n_workers, args.worker_games)) batch_name = "batch_%03d" % i_batch os.mkdir(batch_name) os.chdir(batch_name) pickle.dump(black_learndata, open('black.learndata', 'wb')) pickle.dump(white_learndata, open('white.learndata', 'wb')) # put all input files in a tar.gz file for transfer with tarfile.open("input.tar.gz", "w:gz") as tar: for f in input_files: tar.add("../../" + f, arcname=f) tar.add('black.learndata') tar.add('white.learndata') # add the previous tf model to the input files, rename to tf_model tar.add(os.path.join('../..', prev_model_name, 'tf_model'), arcname='tf_model') for i_worker in xrange(args.n_workers): worker_name = "worker_%03d" % i_worker os.mkdir(worker_name) os.chdir(worker_name) LinkFile('../input.tar.gz', 'input.tar.gz') LinkFile('../../../gomoku_worker.py', 'gomoku_worker.py') queue_up(wq, command=cmdstr, input_files=['input.tar.gz', 'gomoku_worker.py'], output_files=['output.tar.gz']) os.chdir('..') # after all workers in this batch finish, collect and update strategy.learndata wq_wait(wq) print( "All workers finished! Extracting and updating learndata files." ) black_learndata = dict() white_learndata = dict() for i_worker in xrange(args.n_workers): worker_name = "worker_%03d" % i_worker os.chdir(worker_name) with tarfile.open("output.tar.gz") as tar: tar.extractall() newblack_learndata = pickle.load(open('newblack.learndata')) print("%d new black learndata loaded from %s" % (len(newblack_learndata), worker_name)) update_learn_data(black_learndata, newblack_learndata) print("black.learndata updated to %d data" % len(black_learndata)) newwhite_learndata = pickle.load(open('newwhite.learndata')) print("%d new white learndata loaded from %s" % (len(newwhite_learndata), worker_name)) update_learn_data(white_learndata, newwhite_learndata) print("white.learndata updated to %d data" % len(white_learndata)) os.chdir('..') os.chdir('..') # when all batches of games finished, the final learndata should be used to train a model train_X, train_Y = prepare_train_data(black_learndata, white_learndata) # fit the tf model model.fit(train_X, train_Y, n_epoch=10, validation_set=0.1, show_metric=True) os.mkdir('tf_model') model.save('tf_model/tf_model') print("\n ---=== Model %s saved! ===---" % model_name) # finished current model, goto next iteration os.chdir("..")
for i in (np.arange(int(cant_ejecucion) + 1)): name = 'data/perc_hidden' + str(i) + '.csv' if os.path.isfile(name): os.remove(name) #eliminamos archivos de resumen de la configuracion anterior delete_data_resume(cant_ejecucion) #eliminamos archivos de la ejecucion anterior delete_data(cant_ejecucion) ####Ejecutamos la red N cantidad de veces for i in (np.arange(int(cant_ejecucion))): print('ejecucion nro ::::::::::: ' + str(i)) construct_dnn(X, encoded_Y, int(cant_input), cant_capas, cant_neuronas, cant_epochs, int(batch_size), activations, optimizer, loss, dropout, intervalo, X_test, Y_test) for epoch in range(10, (cant_epochs + intervalo), intervalo): fields = [('epochs'), str(epoch)] with open('data/resume/resume.csv', 'a') as f: writer = csv.writer(f) writer.writerow(fields) write_activation(cant_capas, cant_neuronas, cant_input, epoch, X, Y, activations) sum_columns('data/perc_hidden1') sum_columns('data/perc_hidden0') sum_columns('data/dnn_accuracy') delete_data(cant_ejecucion) resume_function(cant_ejecucion, config['ints']['cant_capas'])
def main(): import argparse parser = argparse.ArgumentParser( description='Player Gomoku on playok.com', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-t', '--time', default=5, type=int, help='Time limit in minutes') parser.add_argument('-l', '--level', default=3, type=int, help='Estimate Level') parser.add_argument('-d', '--detect', default=False, action='store_true', help='Detect game board at beginning') args = parser.parse_args() if args.detect: # detect the game board print("Detecting the game board...") x1, y1, x2, y2 = detect_board_edge() else: x1, y1, x2, y2 = (2186, 237, 3063, 1114) b2 = (3245, 300, 3315, 400) print("Set board in the square (%d,%d) -> (%d,%d)" % (x1, y1, x2, y2)) print("Please do not move game window from now on.") scnshot = ScreenShot(border=(x1, y1, x2, y2)) # 2nd scnshot for checking me playing scnshot2 = ScreenShot(border=b2) # load the AI player import construct_dnn import player_AI model = construct_dnn.construct_dnn() model.load('tf_model') player_AI.tf_predict_u.model = model player_AI.initialize() time_spent = 0 total_time = args.time * 60 # loop to play multiple steps while True: try: time.sleep(0.5) status = game_paused(scnshot) if status == -1: continue elif status == 1: time.sleep(1) # try to click the start button if click_start(scnshot) == True: # if game started, we check if we are the black first time_spent = 0 player_AI.estimate_level = args.level print("Game started with AI level = %d" % args.level) else: # check if i'm playing, will wait here if not playing = check_me_playing(scnshot2) if playing != None: _, _, state_playing, _ = read_game_state(scnshot) if playing != state_playing: print("Warning: The current player is not consistent!") print("Rechecking state") continue time_spent += play_one_move(scnshot, player_AI.strategy) # check how much time left time_left = total_time - time_spent print("Time Left: %02d:%02d " % divmod(time_left, 60)) tdown2 = min(total_time * 0.6, 60) if time_left < tdown2 and player_AI.estimate_level > 2: print("Switching to fast mode, AI level = 2") player_AI.estimate_level = 2 tdown1 = min(total_time * 0.3, 30) if time_left < tdown1 and player_AI.estimate_level > 1: print("Switching to ultrafast mode, AI level = 1") player_AI.estimate_level = 1 except (KeyboardInterrupt, pyautogui.FailSafeException): new_total_time = input( "Stopped by user, enter new time limit in minutes, or enter to continue..." ) try: total_time = float(new_total_time) * 60 print("New total time has been set to %.1f s" % total_time) except: pass