예제 #1
0
class GameEngine:
    def __init__(self, game_settings, display_settings):
        self.display = Display(display_settings)
        self.game = Game(game_settings)
        self.running = False

    def start(self):
        self.display.render_board(self.game.board)

        self.running = True
        while self.running:
            # Process events
            for event in pygame.event.get():
                self.handle_event(event)

            self.update_game()
            # Eventually, this should take in a list of all tiles that need to rendering, but
            # until we hit performance issues, we'll just render the whole board every update
            self.display.render_updates(self.game.board)

    def stop(self):
        self.running = False

    def update_game(self):
        self.game.advance_game()

    def handle_event(self, event):
        if event.type == pygame.QUIT:
            self.stop()

        if event.type == pygame.KEYUP:
            print('up')

        if event.type == pygame.KEYDOWN:
            print('down')
예제 #2
0
class Server:
    secret_key = 'shouldintermittentvengeancearmagainhisredrighthandtoplagueus'

    def __init__(self, ip='0.0.0.0', port=8956):

        form = '[%(asctime)s]  %(levelname)s: %(message)s'
        self.logger = logging.getLogger("Server")
        logging.basicConfig(level=logging.INFO, format=form)
        log_handler = logging.FileHandler('logs/log.txt')
        log_handler.setFormatter(logging.Formatter(form))
        self.logger.addHandler(log_handler)

        self.main_game = Game(db.main_channel)

        self.ip = ip
        self.port = port

        self.connections = {}

        self.reactor = reactor
        self.udp = UDProtocol(ip, port, reactor, self)
        reactor.listenUDP(port, self.udp)

    def run(self):
        self.logger.info(f'Started at {IP}:{PORT}')
        self.main_game.start()
        Console(self)
        self.reactor.run()
예제 #3
0
파일: game.py 프로젝트: tvetan/Reversi
class TestGame(unittest.TestCase):

    def setUp(self):
        self.board = Board()
        self.first_player = Player(State.white, self.board)
        self.second_player = Player(State.black, self.board)
        self.game = Game(self.board, self.first_player, self.second_player)

    def test_initialize(self):
        self.assertEqual(self.game.first_player.colour, State.white)
        self.assertEqual(self.game.second_player.colour, State.black)
        self.assertEqual(self.game.current_player_colour, State.black)

    def test_get_current_player(self):
        self.assertEqual(self.game.get_current_player(), self.second_player)

    def test_get_other_player(self):
        self.assertEqual(self.game.get_other_player(), self.first_player)

    def test_is_game_won_should_return_false(self):
        self.assertFalse(self.game.is_game_won())

    def test_is_game_won_should_return_true(self):
        for x in range(8):
            for y in range(8):
                self.board.make_white(x, y)
        self.assertTrue(self.game.is_game_won())

    def test_get_winner_should_return_none(self):
        self.assertEqual(self.game.get_winner(), None)

    def test_change_current_player(self):
        self.game.change_current_player()
        self.assertEqual(self.game.get_current_player(), self.first_player)
예제 #4
0
    def __init__(self, ip='0.0.0.0', port=8956):

        form = '[%(asctime)s]  %(levelname)s: %(message)s'
        self.logger = logging.getLogger("Server")
        logging.basicConfig(level=logging.INFO, format=form)
        log_handler = logging.FileHandler('logs/log.txt')
        log_handler.setFormatter(logging.Formatter(form))
        self.logger.addHandler(log_handler)

        self.main_game = Game(db.main_channel)

        self.ip = ip
        self.port = port

        self.connections = {}

        self.reactor = reactor
        self.udp = UDProtocol(ip, port, reactor, self)
        reactor.listenUDP(port, self.udp)
예제 #5
0
explore_start = 1.0            # exploration probability at start
explore_stop = 0.01            # minimum exploration probability
decay_rate = 0.000001            # exponential decay rate for exploration prob

# Network parameters
hidden_size = 200               # number of units in each Q-network hidden layer
learning_rate = 0.0001         # Q-network learning rate

# Memory parameters
memory_size = 10000            # memory capacity
batch_size = 50                # experience mini-batch size
pretrain_length = batch_size   # number experiences to pretrain the memory
tf.reset_default_graph()
mainQN = QNetwork(name='main', hidden_size=hidden_size, learning_rate=learning_rate)
#p2QN = QNetwork(name='p2', hidden_size=hidden_size, learning_rate=learning_rate)
game = Game(verbose=False)
memory = Memory(max_size=memory_size)
saver = tf.train.Saver()
action = game.random_space()
game.move(action, 1)
state , reward = game.step()

space = game.random_space()
game.move(space, 2)

for ii in range(pretrain_length):

    action = game.random_space()
    game.move(action,1)
    next_state , reward = game.step()
예제 #6
0
def train(sess, scaler, a1, c1, a2, c2):
    game = Game(verbose=False)
    player1 = PlayerTrainer(actor=a1,
                            critic=c1,
                            buffersize=BUFFER_SIZE,
                            game=game,
                            player=1,
                            batch_size=MINIBATCH_SIZE,
                            gamma=GAMMA)
    player2 = PlayerTrainer(actor=a2,
                            critic=c2,
                            buffersize=BUFFER_SIZE,
                            game=game,
                            player=2,
                            batch_size=MINIBATCH_SIZE,
                            gamma=GAMMA)
    # Set up summary Ops
    summary_ops, summary_vars = build_summaries()

    sess.run(tf.global_variables_initializer())
    writer = tf.summary.FileWriter(SUMMARY_DIR, sess.graph)

    # Initialize target network weights
    a1.update_target_network()
    c1.update_target_network()
    a2.update_target_network()
    c2.update_target_network()
    # Initialize replay memory

    episode = 0
    all_wins = []
    all_logs = []
    win_p1, comp1, bloc1 = 0, 0, 0
    win_p2, comp2, bloc2 = 0, 0, 0
    stat = []
    for i in range(MAX_EPISODES):

        episode += 1
        game.setup()
        state = game.space

        ep_reward = 0
        #ep_ave_max_q = 0
        ep_reward2 = 0
        #ep_ave_max_q2 = 0
        #explore_p=1
        terminal = False
        for j in range(MAX_EP_STEPS):

            if not terminal:

                if episode < (5000):

                    move = game.random_space()
                    game.move(move, 1)
                    state, reward = game.step(player=1)
                else:
                    state, reward = player1.noisyMaxQMove()
                _, reward2 = game.step(player=2)
                ep_reward += reward
                ep_reward2 += reward2
                terminal = game.game_over

            if terminal:

                all_wins.append(game.game_over)
                log = game.setup()
                s = game.space
                all_logs.append(log)
                print(scaler, win_p1, comp1, bloc1, win_p2, comp2, bloc2,
                      " | Episode", i, ep_reward, ep_reward2)

                if episode % 1000 == 0:
                    win_p1, comp1, bloc1, win_p2, comp2, bloc2 = test(
                        sess, a1, a2)
                    stat.append(
                        [episode, win_p1, comp1, bloc1, win_p2, comp2, bloc2])
                    df = pd.DataFrame(stat)
                    print(df)
                    plt.close('all')
                    xwinp = plt.plot(df[0], df[1], label="P1wins")
                    xcomp = plt.plot(df[0], df[2], label="P1Imm Compl")
                    xbloc = plt.plot(df[0], df[3], label="p1immbloc")
                    xwinp2 = plt.plot(df[0], df[4], label="P2wins")
                    xcomp2 = plt.plot(df[0], df[5], label="P2Imm Compl")
                    xbloc2 = plt.plot(df[0], df[6], label="p2immbloc")
                    plt.legend()
                    plt.ylim(0, 1)
                    plt.ylabel('percent')
                    plt.show(block=False)

                    #for# r in range(1000):
                    #print(win_p,comp)

                    #if (comp1> .75 and win_p1 >.9)or episode>=200000 or(episode==30000 and (win_p1<.50 or comp1<.1) ):
                    #win_p1, comp1,bloc1, win_p2, comp2,bloc2 = test(sess, a1, a2)
                    #print("epi ",i,ep_ave_max_q )
                    #return win_p1,comp1,episode,stat,win_p2,comp2

                break
            else:
                if episode >= 5000 and episode < 10000:
                    move = game.random_space()
                    game.move(move, 2)
                    state, reward2 = game.step(player=2)
                else:
                    state, reward2 = player2.noisyMaxQMove()
                _, reward = game.step(player=1)
                terminal = game.game_over
                ep_reward2 += reward2
                ep_reward += reward

    return stat
예제 #7
0
def test(sess, actor1, actor2):
    game = Game(verbose=False)
    logs = []
    wins = []
    for i in range(TEST_EPISODES):
        game.setup()
        s = game.space
        terminal = False

        for j in range(MAX_EP_STEPS):
            if not terminal:
                a = actor1.predict(np.reshape(game.space, (1, *s.shape)))
                avail = game.avail()
                availQ = {}

                for i in avail:
                    availQ[i] = a[0][i]
                action = max(availQ, key=availQ.get)  #game.random_space()
                #
                # print(a)
                game.move(action, 1)
                s2, r = game.step(1)
                terminal = game.game_over
                info = None
            if terminal:
                wins.append(game.game_over)
                log = game.setup()
                logs.append(log)
                s = game.space
                break
            else:
                a = actor2.predict(np.reshape(game.space, (1, *s.shape)))
                avail = game.avail()
                availQ = {}

                for i in avail:
                    availQ[i] = a[0][i]
                action = max(availQ, key=availQ.get)
                # print(a)
                game.move(action, 2)
                s2, r = game.step(1)
                terminal = game.game_over
                info = None

    c = Counter(wins)
    r = GameRate(verbose=False, list=logs, player=1, opponent=2)
    r2 = GameRate(verbose=False, list=logs, player=2, opponent=1)
    bloc1, bloc2 = 0, 0
    r.check_games()
    r2.check_games()
    win_p1 = c[1] / (TEST_EPISODES - 1)
    print("1win percentage", win_p1)
    if r.completions + r.missed_completions > 0:
        comp1 = r.completions / (r.completions + r.missed_completions)
    else:
        comp1 = 0
    print("1immediate completions", comp1)
    if r.blocks + r.missed_blocks > 0:
        bloc1 = r.blocks / (r.blocks + r.missed_blocks)
    win_p2 = c[2] / (TEST_EPISODES - 1)
    print("2win percentage", win_p2)
    if r2.completions + r2.missed_completions > 0:

        comp2 = r2.completions / (r2.completions + r2.missed_completions)
    else:
        comp2 = 0
    print("2immediate completions", comp2)
    if r2.blocks + r2.missed_blocks > 0:
        bloc2 = r2.blocks / (r2.blocks + r2.missed_blocks)
    return win_p1, comp1, bloc1, win_p2, comp2, bloc2
예제 #8
0
파일: game.py 프로젝트: tvetan/Reversi
 def setUp(self):
     self.board = Board()
     self.first_player = Player(State.white, self.board)
     self.second_player = Player(State.black, self.board)
     self.game = Game(self.board, self.first_player, self.second_player)
예제 #9
0
def run():
    game = Game()
    game.main()
예제 #10
0
def train(sess, a1, c1, scaler, a2, c2):
    game = Game(verbose=False)
    player1 = PlayerTrainer(actor=a1,
                            critic=c1,
                            buffersize=BUFFER_SIZE,
                            game=game,
                            player=1,
                            batch_size=MINIBATCH_SIZE,
                            gamma=GAMMA)
    player2 = PlayerTrainer(actor=a2,
                            critic=c2,
                            buffersize=BUFFER_SIZE,
                            game=game,
                            player=2,
                            batch_size=MINIBATCH_SIZE,
                            gamma=GAMMA)
    # Set up summary Ops
    summary_ops, summary_vars = build_summaries()

    sess.run(tf.global_variables_initializer())
    writer = tf.summary.FileWriter(SUMMARY_DIR, sess.graph)

    # Initialize target network weights
    a1.update_target_network()
    c1.update_target_network()
    a2.update_target_network()
    c2.update_target_network()
    # Initialize replay memory

    episode = 0
    all_wins = []
    all_logs = []
    win_p1, comp1, bloc1 = 0, 0, 0
    win_p2, comp2, bloc2 = 0, 0, 0
    stat = []
    for i in range(MAX_EPISODES):

        episode += 1
        game.setup()
        state = game.space

        ep_reward = 0
        #ep_ave_max_q = 0
        ep_reward2 = 0
        #ep_ave_max_q2 = 0
        #explore_p=1
        terminal = False
        for j in range(MAX_EP_STEPS):

            if not terminal:
                state, reward = player1.noisyMaxQMove()
                _, reward2 = game.step(player=2)
                ep_reward += reward
                ep_reward2 += reward2
                terminal = game.game_over

            if terminal:

                all_wins.append(game.game_over)
                log = game.setup()
                s = game.space
                all_logs.append(log)
                print(scaler, win_p1, comp1, bloc1, win_p2, comp2, bloc2,
                      " | Episode", i, ep_reward, ep_reward2)

                if episode % 1000 == 0:
                    win_p1, comp1, bloc1, win_p2, comp2, bloc2 = test(
                        sess, a1, a2)
                    stat.append(
                        [episode, win_p1, comp1, bloc1, win_p2, comp2, bloc2])

                    #for# r in range(1000):
                    #print(win_p,comp)

                    #if (comp1> .75 and win_p1 >.9)or episode>=200000 or(episode==30000 and (win_p1<.50 or comp1<.1) ):
                    #win_p1, comp1,bloc1, win_p2, comp2,bloc2 = test(sess, a1, a2)
                    #print("epi ",i,ep_ave_max_q )
                    #return win_p1,comp1,episode,stat,win_p2,comp2

                break
            else:

                state, reward2 = player2.noisyMaxQMove()
                _, reward = game.step(player=1)
                terminal = game.game_over
                ep_reward2 += reward2
                ep_reward += reward

    with open('data/ddpgvddpg', 'wb') as f:
        p.dump(stat, f)
    return None
예제 #11
0
def train(sess, a1, c1, scaler):
    game = Game(verbose=False)
    player1 = PlayerTrainer(actor=a1, critic=c1, buffersize=BUFFER_SIZE, game=game, player=1, batch_size=MINIBATCH_SIZE,
                            gamma=GAMMA)

    sess.run(tf.global_variables_initializer())

    # Initialize target network weights
    a1.update_target_network()
    c1.update_target_network()

    episode = 0
    all_wins = []
    all_logs = []
    win_p1, comp1, bloc1 = 0, 0, 0
    win_p2, comp2, bloc2 = 0, 0, 0
    stat = []
    for i in range(MAX_EPISODES):

        episode += 1
        game.setup()

        ep_reward = 0
        ep_reward2 = 0
        reward2 = 0
        terminal = False
        for j in range(MAX_EP_STEPS):

            if not terminal:
                if episode < 7500:

                    move = game.random_space()
                    game.move(move, 1)
                    state, reward = game.step(player=1)
                else:
                    state, reward = player1.noisyMaxQMove()
                _, reward2 = game.step(player=2)
                ep_reward += reward
                ep_reward2 += reward2
                terminal = game.game_over

            if terminal:

                all_wins.append(game.game_over)
                log = game.setup()
                s = game.space
                all_logs.append(log)
                print(scaler, win_p1, comp1, bloc1, win_p2, comp2, bloc2, " | Episode", i, ep_reward, ep_reward2)

                if episode % 1000 == 0:
                    win_p1, comp1, bloc1, win_p2, comp2, bloc2 = test(sess, a1)
                    stat.append([episode, win_p1, comp1, bloc1, win_p2, comp2, bloc2])
                    df = pd.DataFrame(stat)
                    print(df)
                    plt.close('all')
                    xwinp = plt.plot(df[0], df[1], label="P1wins")
                    xcomp = plt.plot(df[0], df[2], label="P1Imm Compl")
                    xbloc = plt.plot(df[0], df[3], label="p1immbloc")
                    xwinp2 = plt.plot(df[0], df[4], label="P2wins")
                    xcomp2 = plt.plot(df[0], df[5], label="P2Imm Compl")
                    xbloc2 = plt.plot(df[0], df[6], label="p2immbloc")
                    plt.legend()
                    plt.ylim(0, 1)
                    plt.ylabel('percent')
                    plt.show(block=False)
                break
            else:
                move = game.random_space()
                game.move(move, 2)
                _, reward = game.step(player=1)
                terminal = game.game_over
                ep_reward2 += reward2
                ep_reward += reward

    return stat
예제 #12
0
def test():

    #saver = tf.train.Saver()
    #saver.restore(sess, tf.train.latest_checkpoint('chk/dqvrand'))
    e = 0
    epilog = []
    logs = []
    wins = []
    game = Game(verbose=False)
    while e <= 1000:
        e += 1
        if not game.game_over:

            action = game.random_space()
            game.move(action, 1)
            game.step(1)

        if game.game_over:
            wins.append(game.game_over)
            log = game.setup()
            logs.append(log)
            if e % 100 == 0:
                win_p1, comp1, bloc1, win_p2, comp2, bloc2 = 0, 0, 0, 0, 0, 0

                c = Counter(wins)
                r = GameRate(verbose=False, list=logs, player=1, opponent=2)
                r2 = GameRate(verbose=False, list=logs, player=2, opponent=1)

                r.check_games()
                r2.check_games()
                win_p1 = c[1] / len(wins)
                print("1win percentage", win_p1)
                if r.completions + r.missed_completions > 0:
                    comp1 = r.completions / (r.completions +
                                             r.missed_completions)
                else:
                    comp1 = 0
                print("1immediate completions", comp1)
                if r.blocks + r.missed_blocks > 0:
                    bloc1 = r.blocks / (r.blocks + r.missed_blocks)
                win_p2 = c[2] / len(wins)
                print("2win percentage", win_p2)
                if r2.completions + r2.missed_completions > 0:

                    comp2 = r2.completions / (r2.completions +
                                              r2.missed_completions)
                else:
                    comp2 = 0
                print("2immediate completions", comp2)
                if r2.blocks + r2.missed_blocks > 0:
                    bloc2 = r2.blocks / (r2.blocks + r2.missed_blocks)
                epilog.append([e, win_p1, comp1, bloc1, win_p2, comp2, bloc2])
            continue
        move = game.random_space()
        game.move(move, 2)
        game.step(2)

    return epilog
예제 #13
0
def train(mainQN,sess):
    winp, comp, blocp = 0, 0, 0
    saver = tf.train.Saver()
    game = Game(verbose=False)
    wins=[]
    logs=[]
    epi_log = []
    #memory = Memory(max_size=memory_size)

    trainer = QPlayerTrainer(qnet=mainQN,buffersize=memory_size,game=game,player=1,batch_size=batch_size,gamma=gamma,sess=sess)

    # Now train with experiences

    rewards_list = []
    loss = False
    with tf.Session() as sess:
        # Initialize variables
        sess.run(tf.global_variables_initializer())

        step = 0
        for ep in range(1, train_episodes+1):
            total_reward = 0
            t = 0
            explore_p=0
            while t < max_steps:
                if not game.game_over:
                    step += 1
                    #explore_p = explore_stop + (explore_start - explore_stop) * np.exp(-decay_rate * step)
                    #if explore_p > np.random.rand():
                        # Make a random action
                    #    next_state, reward, loss = trainer.randomMove()
                    #else:
                    next_state, reward,loss = trainer.noisyMaxQMove()
                    total_reward += reward
                if game.game_over:
                    # the episode ends so no next state
                    next_state = np.zeros(state.shape)
                    t = max_steps

                    if loss:
                        print(winp,comp,blocp,'Episode: {}'.format(ep),
                              'Total reward: {}'.format(total_reward),
                              'Training loss: {:.4f}'.format(loss),explore_p)

                    rewards_list.append((ep, total_reward))

                    # Add experience to memory

                    wins.append(game.game_over)

                    log = game.setup()
                    logs.append(log)

                    if ep % 10000 == 0:
                       #print(wins[-100:],logs[-100:])
                       #exit(0)

                        time = str(localtime())
                        saver.save(sess, "chk/dqvrand/" + time + ".ckpt")
                        winp, comp, blocp = test(mainQN,sess)
                        epi_log.append([ep,winp,comp,blocp ])



                    state = game.space
                else:
                    state = next_state
                    t += 1
                space = game.random_space()
                game.move(space,2)
                _,reward = game.step(player=2)
                total_reward += reward
        time = str(localtime())
        saver.save(sess, "chk/dqvrand/"+time+".ckpt")
        with open('data/epi2', 'wb') as f:
            p.dump(epi_log, f)
예제 #14
0
def test(mainQN,sess):

    saver = tf.train.Saver()
    saver.restore(sess, tf.train.latest_checkpoint('chk/dqvrand'))
    e = 0
    logs = []
    wins = []
    game = Game(verbose=False)
    while e <=1000:
        e+=1
        if not game.game_over:
            state = game.space
            feed = {mainQN.inputs_: state.reshape((1, *state.shape))}
            As = sess.run(mainQN.output, feed_dict=feed)
            avail = game.avail()

            availQ = {}
            for k in avail:
                availQ[k]=As[0][k]
            action = max(availQ,key=availQ.get)
            game.move(action,1)
            game.step(1)

        if game.game_over:
            wins.append(game.game_over)
            log = game.setup()
            logs.append(log)
            continue
        move = game.random_space()
        game.move(move,2)
        game.step(2)

    win, comp, bloc = 0, 0, 0
    c = Counter(wins)
    r = GameRate(verbose=False, list=logs,player=1,opponent=2)

    r.check_games()
    #print(r,c)

    win= c[1] / len(wins)
    print("win percentage",win)
    if (r.completions + r.missed_completions)>0:
        comp =  r.completions / (r.completions + r.missed_completions)
    print("immediate completions",comp)
    if (r.blocks + r.missed_blocks)>0:
        bloc = r.blocks / (r.blocks + r.missed_blocks)
    print("blocks",bloc)
    #exit(1)
    if win ==0.0:
        print(wins)
        exit(1)
    return win,comp,bloc
예제 #15
0
 def __init__(self, game_settings, display_settings):
     self.display = Display(display_settings)
     self.game = Game(game_settings)
     self.running = False
from common.benchmark import GameRate

from common.game import Game
from collections import Counter

game = Game(verbose=False)
game.setup()
logs = []
wins = []
test_episodes = 1000
for i in range(test_episodes):
    print(i)

    while not game.game_over:

        move = game.random_space()
        game.move(move, 1)

        #print(game.space)
        game.step()
        if not game.game_over:
            move = game.random_space()
            game.move(move, 2)
            game.step()
    wins.append(game.game_over)
    log = game.setup()
    logs.append(log)

r = GameRate(verbose=False, list=logs)

r.check_games(1)