Ejemplo n.º 1
0
def humanvmodel(sess, X, Y, humanfirst=False):

    game = Game()

    if humanfirst:
        game.printState()
        check = game.move(int(input("You go first...")))

    while True:
        if humanfirst:
            pos = np.concatenate((game.noughts, game.crosses))
        else:
            pos = np.concatenate((game.crosses, game.noughts))
        out = sess.run(Y, feed_dict={X: pos})
        bestprob, bestmove = -10000.0, 0
        for i in range(0, len(out)):
            if out[i] > bestprob and game.spots[0, i] == 0.0:
                bestprob = out[i]
                bestmove = i
        print(bestprob)
        check = game.move(bestmove)
        game.printState()
        if check != 0.0:
            print("Game over!")
            break
        check = game.move(int(input("Your turn...")))
        if check != 0.0:
            game.printState()
            if check == 1000.0:
                print("It's a draw!")
            else:
                print("You win!")
            break
Ejemplo n.º 2
0
def train(sess, X, Y, optimizer, cost, boards, outcomes, ends, wons, moves,
          explore_rate, memsize, batchsize, saver, directory, chckptrate,
          Ytest):

    boardm = np.zeros((memsize, 84))
    outcomem = np.zeros((memsize, 84))
    endm = np.zeros((memsize, 1))
    wonm = np.zeros((memsize, 1))
    movem = np.zeros((memsize, 1), dtype=int)
    movem = movem - 1
    test = np.zeros((memsize, 84))
    game = Game()
    firsttrain = True

    counter = 0
    noughtmoveip = False
    boardtemp = np.zeros((1, 84))
    movetemp = 0
    boardtemp2 = np.zeros((1, 84))
    movetemp2 = 0
    avcost = 0

    print("Beginning training...")

    while True:
        for i in range(0, chckptrate):

            boardtemp = np.concatenate((game.crosses, game.noughts)).T

            out = sess.run(Y, feed_dict={X: boardtemp.T})
            movetemp = findbestmove(out, game, explore_rate)
            check = game.move(movetemp)
            #game.printState()
            #input("Press any key")

            if check == 1.0:
                counter = addtomemory(boardm, outcomem,
                                      endm, wonm, movem, boardtemp,
                                      np.zeros((1, 84)), 1.0, 1.0, movetemp,
                                      counter)
                if noughtmoveip:
                    counter = addtomemory(boardm, outcomem, endm,
                                          wonm, movem, boardtemp2,
                                          np.zeros((1, 84)), 1.0, -1.0,
                                          movetemp2, counter)
                game.reset()
                noughtmoveip = False
                #print("Crosses won")
            if check == 1000.0:
                counter = addtomemory(boardm, outcomem,
                                      endm, wonm, movem, boardtemp,
                                      np.zeros((1, 84)), 1.0, 0.0, movetemp,
                                      counter)
                if noughtmoveip:
                    counter = addtomemory(boardm, outcomem, endm,
                                          wonm, movem, boardtemp2,
                                          np.zeros((1, 84)), 1.0, 0.0,
                                          movetemp2, counter)
                game.reset()
                noughtmoveip = False
                #print("Draw")

            if check == 0.0:

                if noughtmoveip:
                    counter = addtomemory(
                        boardm, outcomem, endm, wonm, movem, boardtemp2,
                        np.concatenate((game.noughts, game.crosses)).T, 0.0,
                        0.0, movetemp2, counter)

                noughtmoveip = True
                boardtemp2 = np.concatenate((game.noughts, game.crosses)).T

                out = sess.run(Y, feed_dict={X: boardtemp2.T})
                movetemp2 = findbestmove(out, game, explore_rate)
                check2 = game.move(movetemp2)
                #game.printState()
                #input("Press any key")

                if check2 == -1.0:
                    counter = addtomemory(boardm, outcomem, endm,
                                          wonm, movem, boardtemp,
                                          np.zeros((1, 84)), 1.0, -1.0,
                                          movetemp, counter)
                    counter = addtomemory(boardm, outcomem, endm,
                                          wonm, movem, boardtemp2,
                                          np.zeros((1, 84)), 1.0, 1.0,
                                          movetemp2, counter)
                    game.reset()
                    noughtmoveip = False
                    #print("Noughts won")
                if check2 == 1000.0:
                    counter = addtomemory(boardm, outcomem, endm,
                                          wonm, movem, boardtemp,
                                          np.zeros((1, 84)), 1.0, 0.0,
                                          movetemp, counter)
                    counter = addtomemory(boardm, outcomem, endm,
                                          wonm, movem, boardtemp2,
                                          np.zeros((1, 84)), 1.0, 0.0,
                                          movetemp2, counter)
                    game.reset()
                    noughtmoveip = False
                    #print("Draw")
                if check2 == 0.0:
                    counter = addtomemory(
                        boardm, outcomem, endm, wonm, movem, boardtemp,
                        np.concatenate((game.crosses, game.noughts)).T, 0.0,
                        0.0, movetemp, counter)

            if movem[memsize - 1] != -1:

                if firsttrain:
                    test = np.copy(boardm)
                    test = test[np.random.choice(np.arange(memsize),
                                                 size=1000,
                                                 replace=False)]
                    firsttrain = False

                sample = np.random.choice(np.arange(memsize),
                                          size=batchsize,
                                          replace=False)
                boardsample = boardm[sample]
                outcomesample = outcomem[sample]
                endsample = endm[sample]
                wonsample = wonm[sample]
                movesample = movem[sample]

                _, batchcost = sess.run(
                    [optimizer, cost],
                    feed_dict={
                        boards: boardsample.T,
                        outcomes: outcomesample.T,
                        ends: endsample.T,
                        wons: wonsample.T,
                        moves: movesample.T
                    })
                avcost = 0.9 * avcost + 0.1 * batchcost

        if not firsttrain:
            testvalue = np.average(
                sess.run(Ytest, feed_dict={outcomes: test.T}))
            print("Counter is " + str(counter) +
                  " and average Qmax on test set is " + str(testvalue))
            saver.save(sess, directory)
Ejemplo n.º 3
0
# -*- coding: utf-8 -*-
"""
Created on Wed May 24 21:22:30 2017

@author: Toby
"""

from connectfour import Game

turncounter = 1

game = Game()

while True:
    game.printState()
    check = game.move(int(input("No winner yet...")))
    if check != 0.0:
        game.printState()
        print("End of Game! Player " + str(check) + " has won!")
        break
    turncounter = turncounter + 1