def chess_worker(connection, args): ''' spawns thread to play game ''' while True: board = chess.Board() boards = [board] while not board.is_game_over(claim_draw=True): if random.random() > args.gamma: connection.send({'type': 'board', 'boards': boards[-8:]}) move = connection.recv() else: move = random.choice(list(board.legal_moves)) board.push(move) boards.append(board.copy()) res = board.result() if res == '1-0' or res == '0-1': rewards = [ 0 if (i % 2 == 0) else 1 for i in range(len(boards) - 1) ] else: rewards = [0 for _ in boards] rewards = np.expand_dims(np.array(rewards[::-1]).T, 1) in_boards, in_rewards = build_input(boards, rewards, args.history) connection.send({ 'type': 'data', 'inp': in_boards, 'rewards': in_rewards }) connection.send({'type': 'end'})
def chess_worker(connection, args): lam = 0.7 while True: steps = min(np.random.poisson(args.lam), 100) board = random_board(steps) player = 1 vals = [] rewards = [] pid = 1 boards = [copy.deepcopy(board)] while not board.is_game_over(claim_draw=True): connection.send({'type': 'eval', 'boards': boards[-8:]}) val = connection.recv() vals.append(val) if random.random() > args.gamma: connection.send({'type': 'board', 'boards': boards[-8:]}) move = connection.recv() else: move = random.choice(list(board.legal_moves)) rewards.append(0) board.push(move) boards.append(copy.deepcopy(board)) if len(boards) > 8: boards.pop(0) pid = (pid % 2 + 1) res = board.result() if res in ['1-0', '0-1']: rewards[-1] = 1 rewards[-2] = -1 targets = [] for i in range(len(boards) - 2): targets.append(rewards[i] + vals[i + 2]) targets.append(rewards[-2]) targets.append(rewards[-1]) targets = np.expand_dims(np.array(targets), 1) in_boards, in_targets = build_input(boards, targets, args.history) connection.send({ 'type': 'data', 'inp': in_boards, 'rewards': in_targets }) connection.send({'type': 'end'})
def chess_worker(connection, args): lam = 0.7 engine = chess.uci.popen_engine(os.environ['SFSH']) engine.uci() expo = lambda l, t: l * np.exp(-1 * l * t) while True: board = random_board(50) rewards = [] pid = 1 boards = [copy.deepcopy(board)] i = 0 while not board.is_game_over(claim_draw=True): if random.random() > expo(args.lam, i): engine.position(board) move, _ = engine.go() else: move = random.choice(list(board.legal_moves)) board.push(move) boards.append(copy.deepcopy(board)) pid = (pid % 2 + 1) i += 1 size = min(len(boards), 11) res = board.result() if res == '1-0': rewards = [0 if (i % 2) == 0 else 1 for i in range(size - 1)] rewards = rewards[::-1] elif res == '0-1': rewards = [0 if (i % 2) == 0 else 1 for i in range(size - 1)] rewards = rewards[::-1] else: rewards = [0] * (size - 1) boards = boards[-size:] rewards = np.expand_dims(np.array(rewards), 1) in_boards, in_targets = build_input(boards, rewards, args.history) connection.send({ 'type': 'data', 'inp': in_boards, 'rewards': in_targets }) connection.send({'type': 'end'})
def chess_worker(connection, args): engine = chess.uci.popen_engine(os.environ['SFSH']) engine.setoption({'Skill Level': 1}) engine.uci() while True: board = chess.Board() rewards = [] pid = 1 boards = [copy.deepcopy(board)] i = 0 while not board.is_game_over(claim_draw=True): if pid == 1: connection.send({'type': 'board', 'boards': boards[-8:]}) move = connection.recv() else: engine.position(board) move, _ = engine.go() board.push(move) boards.append(copy.deepcopy(board)) pid = (pid % 2 + 1) i += 1 res = board.result() size = len(boards) - 1 if res == '1-0': rewards = [0 if (i % 2) == 0 else 1 for i in range(size)] rewards = rewards[::-1] elif res == '0-1': rewards = [0 if (i % 2) == 0 else 1 for i in range(size)] rewards = rewards[::-1] else: rewards = [0] * (size) rewards = np.expand_dims(np.array(rewards), 1) in_boards, in_targets = build_input(boards, rewards, args.history) connection.send({ 'type': 'data', 'inp': in_boards, 'rewards': in_targets }) connection.send({'type': 'end'})
def chess_worker(connection, args): while True: game = TicTacToe() rewards = [] pid = 1 boards = [copy.deepcopy(game)] i = 0 while game.check_win() is None: if random.random() > args.gamma: connection.send({'type': 'board', 'boards': boards[-8:]}) move = connection.recv() else: move = random.choice(game.legal_moves) game = game.push(move) boards.append(copy.deepcopy(game)) pid = (pid % 2 + 1) i += 1 res = game.check_win() size = len(boards) - 1 if res != 0: rewards = [1 if (i % 2) == 0 else 0 for i in range(size)] rewards = rewards[::-1] else: rewards = [0] * (size) rewards = np.expand_dims(np.array(rewards), 1) in_boards, in_targets = build_input(boards, rewards, args.history) connection.send({ 'type': 'data', 'inp': in_boards, 'rewards': in_targets }) connection.send({'type': 'end'})
def chess_worker(connection, args): while True: board = chess.Board() player = 1 boards = [] while not board.is_game_over(claim_draw=True): if random.random() > args.gamma: connection.send({'type': 'board', 'boards': boards[-8:]}) move = connection.recv() else: move = random.choice(list(board.legal_moves)) player = (player % 2) + 1 board.push(move) boards.append(copy.copy(board)) res = board.result() if res == '1-0': rewards = [-1 if (i % 2) == 0 else 1 for i in range(len(boards))] elif res == '0-1': rewards = [1 if (i % 2) == 0 else -1 for i in range(len(boards))] else: rewards = [0 for i in range(len(boards))] rewards = np.expand_dims(np.array(rewards).T, 1) in_boards, in_rewards = build_input(boards, rewards, args.history) connection.send({ 'type': 'data', 'inp': in_boards, 'rewards': in_rewards }) connection.send({'type': 'end'})