コード例 #1
0
ファイル: player.py プロジェクト: xmgfx/doudizhu
def main():
	connection = connect()
	config.set_device(args.gpu)

	# while True:
	name = None
	while name is None:
		name = connection.recv()
	model = Model('model_' + name)
	model.forced_restore()
	# print(model)
	agent = Agent(model)

	simulations = None
	while simulations is None:
		simulations = connection.recv()
	simulations = int(simulations)
	config.simulations = simulations

	env = GameEnv()
	init = deal_init(connection)

	env.load(init)

	game = Game(env, args.player)
	game_start(agent, simulations, game, connection)
コード例 #2
0
    def __call__(self):
        if self.empty():
            return None

        file, init, player, p, v = self.data[self.iter]
        env = GameEnv(False)
        env.load(init)
        game = Game(env, player)

        self.iter += 1
        return game, p, v
コード例 #3
0
ファイル: unpack.py プロジェクト: xmgfx/doudizhu
def unpack(init, ps, player):
    env = GameEnv(False)
    env.load(init)
    game = Game(env, player)
    histories = []
    for action in ps:
        state = game.to_model_input()
        histories.append((histories[-1] if len(histories) else []) + [state])
        game.move(action)
    for i in range(len(histories)):
        histories[i] = np.array(histories[i], dtype=np.int8)
    return histories
コード例 #4
0
ファイル: ai.py プロジェクト: TimothyHyndman/tictactoe
def evaluate_candidate(candidate_player, reference_player):
    wins = 0
    draws = 0
    losses = 0

    # Always choose best move
    candidate_player.tryhard_mode = True

    for _ in range(400):
        env = GameEnv()
        current_player = candidate_player if random.random() < 0.5 else reference_player
        while True:
            move = current_player.select_move(env)
            env.play_move(*move)  # Current player flips
            env.check_win()
            if env.winner and current_player is candidate_player:
                wins += 1
                break
            elif env.winner and current_player is reference_player:
                losses += 1
                break
            elif env.draw:
                draws += 1
                break
            else:
                current_player = reference_player if current_player is candidate_player else candidate_player

    candidate_player.tryhard_mode = False

    return wins, draws, losses
コード例 #5
0
ファイル: random_init.py プロジェクト: xmgfx/doudizhu
import re

if re.search('gen$', os.getcwd()):
    sys.path.append('..')

from game import Game
from game import GameEnv
import numpy as np
from utils import dump
import os
import shutil

path = 'train'

for cnt in range(30):
    env = GameEnv()
    games = [Game(env, player) for player in range(3)]
    datas = [[env.hand_cards(), None, None] for _ in range(3)]
    while not env.game_over():
        period = env.period()
        if period != 3:
            curr_player = env.curr_player()
            game = games[curr_player]
            action = np.random.choice(game.action_list())
            for game in games:
                game.move(action)
            env.move(action)
        else:
            for player, game in enumerate(games):
                response = env.response(player)
                game.move(response)
コード例 #6
0
ファイル: test.py プロジェクト: xmgfx/doudizhu
def test_once():
	env = GameEnv()

	# cards = [[3, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0],
	# 		 [0, 2, 2, 2, 2, 2, 2, 2, 3, 0, 0, 0, 0, 0, 0],
	# 		 [0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 4, 4, 4, 0, 0]]
	# env.load(np.array(cards))
	# history = [355, 352, 352]
	# history = [355, 352, 352, 337, 350, 351, 109, 124, 0, 14]
	history = []
	
	games = [Game(env, 0), Game(env, 1), Game(env, 2)]
	print(games[0].period())
	return
	
	for action in history:
		games[0].move(action)
		games[1].move(action)
		games[2].move(action)
		env.move(action)
	
	# print(games[0])
	# print(games[0].curr_player())
	# print(games[1])
	# print(games[1].curr_player())
	# print(games[2])
	# print(games[2].curr_player())
	# print(env)
	print('=================')
	
	# env.load(np.array(cards, dtype=np.int32))
	# print(games[0], games[1], games[2])
	print(env.bottom())
	
	while not env.game_over():
		period = env.period()
		curr_player = env.curr_player()
		print('-----------------------------------')
		print('period:', period)
		print('curr_player:', curr_player)
		
		if period == 3:  # bottom
			print('response:', env.response(0), env.response(1), env.response(2))
			print(games[0].action_list())
			print('remaining cards:')
			print(games[0].remaining_cards())
			print(games[1].remaining_cards())
			print(games[2].remaining_cards())
			games[0].move(env.response(0))
			games[1].move(env.response(1))
			games[2].move(env.response(2))
			print('bottom:')
			print(games[0].bottom())
			print(games[1].bottom())
			print(games[2].bottom())
			env.move(-1)
			print(env.hand_cards_num())
			model_input = games[0].to_model_input()
		# for channel in range(18):
		# 	print(channel)
		# 	print(model_input[channel])
		else:
			action_list = games[curr_player].action_list()
			action = np.random.choice(action_list)
			print('action_list:')
			print(games[0].action_list())
			print(games[1].action_list())
			print(games[2].action_list())
			action_c = Action(action)
			print('action:', action)
			print('', Action(action).to_array())
			games[0].move(action)
			games[1].move(action)
			games[2].move(action)
			env.move(action)
			print(env.hand_cards_num())
			# if action_c.need_attach() or action_c.is_attach() or action_c.is_bottom() or action_c.is_bid():
			# 	model_input = games[0].to_model_input()
			# 	for channel in range(18):
			# 		print(channel)
			# 		print(model_input[channel])
		print(env)
		print(games[0].eval())
		print(games[1].eval())
		print(games[2].eval())
コード例 #7
0
from time import sleep
import numpy as np
import os
import time
# os.environ["SDL_VIDEODRIVER"] = "dummy"
from collections import deque
from keras.models import Model, load_model, Sequential, load_model
from keras.layers import Input, Dense, Conv2D, Flatten
from keras.optimizers import Adam, RMSprop
import time
import keras
import matplotlib.pyplot as plt

from skimage.transform import resize
from skimage.color import rgb2gray
env = GameEnv()
stateSize = len(env.reset())
fakeState = env.reset()
print(stateSize)
env.getGameState()
# PREPROCESSING HYPERPARAMETERS
stack_size = 8  # Number of frames stacked
# MODEL HYPERPARAMETERS
action_size = 4  # 4 possible actions

# Initialize deque with zero-images one array for each image
stacked_frames = deque(
    [np.zeros((stateSize), dtype=np.int) for i in range(stack_size)],
    maxlen=stack_size)

コード例 #8
0
ファイル: match.py プロジェクト: xmgfx/doudizhu
def match(AIs):
    def read(popen):
        while True:
            output = popen.stdout.readline()
            if output is not None:
                return output.decode('utf-8')

    def write(popen, obj):
        if popen == AIs[args.p]:
            return
        popen.stdin.write((str(obj) + '\n').encode('utf-8'))
        popen.stdin.flush()

    (name0, simulation0), (name1, simulation1), (name2,
                                                 simulation2) = players_attr
    write(AIs[0], name0)
    write(AIs[0], simulation0)

    write(AIs[1], name1)
    write(AIs[1], simulation1)

    write(AIs[2], name2)
    write(AIs[2], simulation2)

    env = GameEnv()
    hand_cards = env.hand_cards()

    data.append(env.hand_cards())
    game = Game(env, args.p)

    for player in range(3):
        write(AIs[player], list(hand_cards.flatten()))
    slot = []
    ig = False

    print('Filename:', filename)
    print('Your position:', args.p)
    print('Your hand cards:', to_chars(hand_cards[args.p]))

    while not env.game_over():
        period = env.period()
        player = env.curr_player()
        if period == 1 and player == args.p:
            print('Your position:', args.p)
            print('Your hand cards:', to_chars(env.hand_cards()[args.p]))
            print('Hand cards num of 3 players:', env.hand_cards_num())
            print('-----')
        if period == 1 or period == 2:
            if player == args.p:
                if period == 1:
                    _game = game.copy()
                    print('Your turn:', end=' ')
                    chars = input()
                    try:
                        if chars == 'pass':
                            actions = [0]
                        else:
                            chars = chars.upper().rstrip().replace(
                                ',', '').split(' ')
                            while '' in chars:
                                chars.remove('')
                            # for char in chars:
                            # 	if char not in card_dict:
                            # 		raise KeyError('input error')
                            actions = to_action(chars)
                        for action in actions:
                            if action in _game.action_list():
                                _game.move(action)
                            else:
                                raise RuntimeError('couldn\'t move')
                    except (RuntimeError, KeyError):
                        print('Invalid action! Please retry.')
                        print('=====')
                        continue
                    game = _game

                    for action in actions:
                        env.move(action)
                        for target in range(3):
                            write(AIs[target], action)
                    print('=====')
                if period == 2:
                    print('Your bet:', end=' ')
                    action = int(input()) + 352
                    if action not in game.action_list():
                        print('Invalid action! Please retry.')
                        continue
                    env.move(action)
                    game.move(action)
                    for target in range(3):
                        write(AIs[target], action)
            else:
                action = int(read(AIs[player]))
                # print(action)
                env.move(action)
                game.move(action)
                for target in range(3):
                    if target != player:
                        write(AIs[target], action)
                if period == 1:
                    slot += to_chars(Action(action).to_array()).replace(
                        '[', '').replace(']', '').replace(',', '').split(' ')
                    if env.curr_player() != player:
                        if action == 0:
                            print('Player%d pass' % player)
                        else:
                            print('Player%d\'s turn:' % player,
                                  str(slot).replace('\'', ''))
                        print('=====')
                        slot = []
                if period == 2:
                    print('Player%d\'s bet: %d' % (player, action - 352))
            if period == 2 and env.period() == 3:
                print('Landlord is Player%d' % env.lord_player())
                print('Bottom cards:', to_chars(env.bottom()))
                print('===== Game Start =====')
        if period == 3:
            for player in range(3):
                write(AIs[player], env.response(player))
            game.move(env.response(args.p))
            env.move(-1)
    evals = [None, None, None]
    for player in range(3):
        if player != args.p:
            evals[player] = (float(read(AIs[player])) + 1) / 2
            AIs[player].terminate()
    # print(evals)
    data.append(game.policy())
    with open('human/%s.pkl' % filename, 'wb') as f:
        pickle.dump(data, f)

    if args.p == env.lord_player():
        is_winner = env.hand_cards_num()[args.p] == 0
    else:
        is_winner = env.hand_cards_num()[env.lord_player()] != 0
    if game.lord_player() == -1:
        print('<<<<<<<<<<<<<<< DRAW >>>>>>>>>>>>>>>')
    elif is_winner:
        print('<<<<<<<<<<<<<<< YOU WIN >>>>>>>>>>>>>>>')
    else:
        print('<<<<<<<<<<<<<<< YOU LOSE >>>>>>>>>>>>>>>')
コード例 #9
0
ファイル: ai.py プロジェクト: TimothyHyndman/tictactoe
def main():
    model_name = "models/model_conv.h5"
    reference_player = TinyBrain()
    # candidate_player = BigBrain(load_model=model_name, tryhard_mode=False)  # For continuing training
    candidate_player = BigBrain(tryhard_mode=False)  # For starting training

    episode = 1
    while True:
        env = GameEnv()
        first_move = True
        # immediate store is all the information we have immediately after a move
        # (current state, possible actions, move)
        immediate_store = []
        # delayed store is all the information we get after the next move
        # (next state, reward, terminated)
        delayed_store = []

        # Randomly choose who goes first
        current_player = candidate_player if random.random() < 0.5 else reference_player

        while True:
            state = env.state()
            move = current_player.select_move(env)
            if current_player is candidate_player:
                if first_move:
                    first_move = False
                else:
                    # Finish providing information for candidate player's last move
                    possible_actions = env.possible_actions()
                    delayed_store.append((possible_actions, 0, state, False))
                # Provide starting information for candidate player's current move
                do_explore = random.random() < 0.3
                move = current_player.select_move(env, explore=do_explore)
                immediate_store.append((state, move, do_explore))

            env.play_move(*move)  # Current player flips
            env.check_win()

            if env.winner or env.draw:
                # If game has ended we need to give rewards to both players
                if env.draw:
                    delayed_store.append((None, DRAW_REWARD, None, True))
                elif current_player is candidate_player:
                    # Winner is always whoever played last
                    delayed_store.append((None, WIN_REWARD, None, True))
                else:
                    delayed_store.append((None, LOSS_REWARD, None, True))

                for immediate, delayed in zip(immediate_store, delayed_store):
                    state, move, do_explore = immediate
                    if not do_explore:
                        candidate_player.store(state, move, *delayed)
                break

            current_player = reference_player if current_player is candidate_player else candidate_player

        if episode % GAMES_PER_SET == 0:
            print(f"Training after {episode} episodes")
            candidate_player.retrain(batch_size=TRAINING_SIZE)
            candidate_player.align_target_model()
            candidate_player.save(model_name)
            wins, draws, losses = evaluate_candidate(candidate_player, TinyBrain())
            print(f"{wins}, {draws}, {losses}")
            if wins + losses > 0:
                percentage_wins = wins / (wins + losses)
            else:
                percentage_wins = 0
            print(f"percentage wins: {percentage_wins}")

            reference_player = candidate_player
            reference_player.tryhard_mode = False
            candidate_player = BigBrain(tryhard_mode=False)
            candidate_player.q_network = reference_player.q_network
            candidate_player.align_target_model()

        episode += 1
コード例 #10
0
ファイル: ai.py プロジェクト: TimothyHyndman/tictactoe
def print_diagnostics(candidate_player, episode):
    test_env_start = GameEnv()
    test_env_about_to_win = GameEnv()
    test_env_about_to_win.play_move(0, 0)
    test_env_about_to_win.play_move(2, 2)
    test_env_about_to_win.play_move(0, 1)
    test_env_about_to_win.play_move(1, 1)

    test_env_about_to_win_p2 = GameEnv()
    test_env_about_to_win_p2.play_move(0, 0)
    test_env_about_to_win_p2.play_move(2, 2)
    test_env_about_to_win_p2.play_move(1, 1)
    test_env_about_to_win_p2.play_move(2, 1)
    test_env_about_to_win_p2.play_move(1, 0)

    initial_preferences = candidate_player.move_probabilities(test_env_start.state(), test_env_start.possible_actions())
    about_to_win_preferences = candidate_player.move_probabilities(test_env_about_to_win.state(),
                                                                   test_env_about_to_win.possible_actions())
    about_to_win_preferences_p2 = candidate_player.move_probabilities(
        test_env_about_to_win_p2.state(),
        test_env_about_to_win_p2.possible_actions()
    )
    print(f"Initial move preference after {episode} games")
    print(initial_preferences)
    print(f"Preferences when winning in top right")
    print(about_to_win_preferences)
    print(f"Preferences when winning in bottom left")
    print(about_to_win_preferences_p2)
コード例 #11
0
ファイル: agent.py プロジェクト: xmgfx/doudizhu
def main():
    model: Model

    config.set_device(1)
    env = GameEnv()
    # model = Model('model')
    model = Model('model_201912080009')
    # model = Model('model_tencent0824')
    model.forced_restore()
    # agent = Agent(model)
    agent = Agent(model)

    init = [[2, 1, 2, 1, 0, 1, 3, 1, 2, 1, 0, 1, 2, 0, 0],
            [2, 1, 1, 2, 1, 1, 0, 3, 1, 2, 0, 2, 0, 0, 1],
            [0, 1, 1, 1, 3, 2, 0, 0, 1, 1, 3, 1, 2, 1, 0]]
    actions = [
        352, 352, 353, 338, 343, 347, 123, 0, 0, 20, 22, 23, 24, 26, 0, 28, 0,
        29, 0, 0, 39, 0, 0, 116, 0, 0, 76, 324, 0, 0, 41, 42, 0, 0, 92, 317,
        320, 0, 0, 31
    ]  #, 42, 0, 0, 15, 18]
    init = np.array(init, dtype=np.int32)

    env.load(init)
    print(env)

    root, player = new_Root(), 2

    game = Game(env, player)
    for action in actions:
        mct = MCT(game, root)
        agent.simulates([mct], store_rnn_states='Always')
        root.root(action)
        game.move(action)
        print(game)
        print('GAUSS', game.gauss())
        print(game.curr_player(), game.my_player())

    print('====')
    print(game.curr_player())
    print(game.my_player())
    print(game.lord_player())
    print(game.hand_cards_num())
    print(game.bottom())
    print('====')
    mct = MCT(game, root)
    for cnt in range(2000):
        agent.simulates([mct], store_rnn_states='Always')

        # if cnt == 0:
        # 	history, rnn_state = root.get_model_input()
        # 	print(history)
        # if (cnt + 1) % 10 == 0:
        if (cnt + 1) % 10 == 0:
            print(cnt + 1)
            for action, P, son in mct.root.edges():
                print('%d: %.8f %d %.3f' % (action, P, son.N(), son.Q()))
    print('-------------------------')
    t = 1.0
    s = np.array([son.N() for action, P, son in mct.root.edges()])
    p = np.array([P for action, P, son in mct.root.edges()])
    print(s)
    print(np.mean(s))
    w = s + 0.001 - np.mean(s)
    w[w < 0] = 0
    w = (w**t) / (w**t).sum()
    print(w)
    print(s / s.sum())
    print(p)
    mct.json()
コード例 #12
0
ファイル: manual.py プロジェクト: updatesvc/Bobs-Invaders
from game import GameEnv
import random

env = GameEnv(1 / 60)
env.stepReward = 0
env.playerShotReward = 0
env.enemyShotReward = 0
env.missedShotReward = 0
env.allDeadReward = 0
env.invasionReward = 0
env.bottomReward = 0
env.underReward = 0
env.anchorReward = 0
env.cornerReward = -0.001
env.reset()
actions = [i for i in range(4)]
done = False

# print(env.reset())
env.loop()


def randMove():
    done = False
    while not done:
        state, reward, done, win = env.step(random.sample(actions, 1)[0])


# for i in range(4):
#     env.reset()
#     randMove()
コード例 #13
0
ファイル: test.py プロジェクト: xmgfx/doudizhu
def main():
    np.set_printoptions(precision=2, linewidth=128, suppress=True)
    path = 'test'
    config.device = torch.device('cuda:1')
    config.set_device(-1)
    model = Model('model_201912080009')
    model.restore()

    files = os.listdir('gen/%s/' % path)
    files.sort(key=lambda k: k.split('_')[0], reverse=True)
    # print(files[-1])
    file = np.random.choice(files[:100])
    # file = '201912101326_85d94af6fe1a588b.pkl'

    print(file)
    data = pickle_load('gen/%s/%s' % (path, file))
    # data = pickle_load('gen/test/' + files[-2])
    # np.random.shuffle(data)

    player = 2

    init, actions, _v = [None, [], -1.0]

    init = [[2, 1, 2, 1, 0, 1, 3, 1, 2, 1, 0, 1, 2, 0, 0],
            [2, 1, 1, 2, 1, 1, 0, 3, 1, 2, 0, 2, 0, 0, 1],
            [0, 1, 1, 1, 3, 2, 0, 0, 1, 1, 3, 1, 2, 1, 0]]
    actions = [
        352, 352, 353, 338, 343, 347, 123, 0, 0, 20, 22, 23, 24, 26, 0, 28, 0,
        29, 0, 0, 39, 0, 0, 116, 0, 0, 76, 324, 0, 0, 41, 42, 0, 0, 92, 317,
        320, 0, 0, 31, 42, 0, 0, 15, 18
    ]
    init = np.array(init, dtype=np.int32)

    # actions = [353, 352, 352, 339, 349, 349, 15]

    # init = [[2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0],
    # 		[2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0],
    # 		[0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 4, 4, 4, 1, 1]]
    # init = np.array(init, dtype=np.int32)
    # actions = [353, 352, 352, 344, 345, 346, 151]

    # init, actions, _v = data[player]
    print(_v, player)
    print(init)
    print(actions)
    print('=============================================')
    print('player:', player)

    histories = unpack(init, actions, player)
    histories, lengths = history_regularize(histories)
    histories, lengths = to_cuda(histories, lengths)

    vs, ps, _ = model(histories, lengths, None)

    env = GameEnv(False)
    env.load(init)
    game = Game(env, player)
    for v, p, action in zip(vs, ps, actions):
        print('----------------------------------')
        print('my_player: %d, curr_player: %d' % (player, game.curr_player()))
        # for action, _dist in enumerate(dist):
        # 	print(action, _dist)
        idx = np.argsort(p)[::-1]
        for i in range(8):
            print(game.could_move(idx[i]), end=' ')
            print('(%d, %.2f%%)' % (idx[i], p[idx[i]] * 100), end='\n')
        print('action: %d, %.2f%%' % (action, p[action] * 100))

        if idx[0] == 60 and p[idx[0]] > 0.3:
            print(game)
            print(game.policy())
            print(game.hand_cards_num())
            print(game.bottom())
            print(game.curr_player(), game.lord_player())
            return 0

        # model_input = game.to_model_input()
        # for channel in range(26, 28):
        # 	print(channel)
        # 	print(model_input[channel])
        print('%.1f, %.3f' % (_v, v[0]))

        game.move(action)
        print(game)
        print('Gauss:', game.gauss())
コード例 #14
0
ファイル: train.py プロジェクト: xzhangeb/SnakeAI_RL
import random
from game import GameEnv
from DeepQNet import createDQN
from Memory import Memory

gamma = 0.9
num_episode = 100
num_frames = 4
epsilon = 0.2
memoryPool = Memory()
batch_size = 32
checkpoint_freq = 10

for episode in range(num_episode):

    gameEnv = GameEnv((20, 20))
    model = createDQN(num_frames, gameEnv)
    over = False
    loss = 0.0
    last_frames = gameEnv.getLastFrames(4)
    total_reward = 0

    while over is not True:

        decision = random.random()
        direction = 0

        if decision < epsilon:
            direction = random.randint(-1, 1)
        else:
            shape = (1, ) + last_frames.shape
コード例 #15
0
from utils import pickle_load
import time
from game import Game, GameEnv
import numpy as np

path = 'data/'

files = os.listdir(path)
file = np.random.choice(files)

print(file)
data = pickle_load(path + file)

init, policy, v = data[0]
env = GameEnv()
print(init)
env.load(init)
print(policy)
for action in policy:
    print(env.curr_player(), env.lord_player())
    print(action)
    if env.period() != 3:
        env.move(action)
    else:
        env.move(-1)
    print(env)
    print('---------------------')
    time.sleep(0.5)
print(v)
コード例 #16
0
ファイル: AIplay.py プロジェクト: xzhangeb/SnakeAI_RL
        pygame.display.update()
        gameEnv.stepForward(turn)
        over = gameEnv.isOver()

        display.fill(window_color)
        display.blit(apple, (gameEnv.fruitLocation[0] * 10, gameEnv.fruitLocation[1] * 10))

        clock.tick(10)
    return gameEnv.score


model_name = 'dqn-00000090.model'
model = load_model(model_name)
width = 20
height = 20
gameEnv = GameEnv((height, width))

display_width = width * 10
display_height = height * 10
green = (0, 255, 0)
red = (255, 0, 0)
black = (0, 0, 0)
window_color = (200, 200, 200)
apple_image = pygame.image.load('apple.jpg')
clock = pygame.time.Clock()


pygame.init()

display = pygame.display.set_mode((display_height, display_width))
display.fill(window_color)