Beispiel #1
0
def test():

    #saver = tf.train.Saver()
    #saver.restore(sess, tf.train.latest_checkpoint('chk/dqvrand'))
    e = 0
    epilog = []
    logs = []
    wins = []
    game = Game(verbose=False)
    while e <= 1000:
        e += 1
        if not game.game_over:

            action = game.random_space()
            game.move(action, 1)
            game.step(1)

        if game.game_over:
            wins.append(game.game_over)
            log = game.setup()
            logs.append(log)
            if e % 100 == 0:
                win_p1, comp1, bloc1, win_p2, comp2, bloc2 = 0, 0, 0, 0, 0, 0

                c = Counter(wins)
                r = GameRate(verbose=False, list=logs, player=1, opponent=2)
                r2 = GameRate(verbose=False, list=logs, player=2, opponent=1)

                r.check_games()
                r2.check_games()
                win_p1 = c[1] / len(wins)
                print("1win percentage", win_p1)
                if r.completions + r.missed_completions > 0:
                    comp1 = r.completions / (r.completions +
                                             r.missed_completions)
                else:
                    comp1 = 0
                print("1immediate completions", comp1)
                if r.blocks + r.missed_blocks > 0:
                    bloc1 = r.blocks / (r.blocks + r.missed_blocks)
                win_p2 = c[2] / len(wins)
                print("2win percentage", win_p2)
                if r2.completions + r2.missed_completions > 0:

                    comp2 = r2.completions / (r2.completions +
                                              r2.missed_completions)
                else:
                    comp2 = 0
                print("2immediate completions", comp2)
                if r2.blocks + r2.missed_blocks > 0:
                    bloc2 = r2.blocks / (r2.blocks + r2.missed_blocks)
                epilog.append([e, win_p1, comp1, bloc1, win_p2, comp2, bloc2])
            continue
        move = game.random_space()
        game.move(move, 2)
        game.step(2)

    return epilog
Beispiel #2
0
def test(mainQN,sess):

    saver = tf.train.Saver()
    saver.restore(sess, tf.train.latest_checkpoint('chk/dqvrand'))
    e = 0
    logs = []
    wins = []
    game = Game(verbose=False)
    while e <=1000:
        e+=1
        if not game.game_over:
            state = game.space
            feed = {mainQN.inputs_: state.reshape((1, *state.shape))}
            As = sess.run(mainQN.output, feed_dict=feed)
            avail = game.avail()

            availQ = {}
            for k in avail:
                availQ[k]=As[0][k]
            action = max(availQ,key=availQ.get)
            game.move(action,1)
            game.step(1)

        if game.game_over:
            wins.append(game.game_over)
            log = game.setup()
            logs.append(log)
            continue
        move = game.random_space()
        game.move(move,2)
        game.step(2)

    win, comp, bloc = 0, 0, 0
    c = Counter(wins)
    r = GameRate(verbose=False, list=logs,player=1,opponent=2)

    r.check_games()
    #print(r,c)

    win= c[1] / len(wins)
    print("win percentage",win)
    if (r.completions + r.missed_completions)>0:
        comp =  r.completions / (r.completions + r.missed_completions)
    print("immediate completions",comp)
    if (r.blocks + r.missed_blocks)>0:
        bloc = r.blocks / (r.blocks + r.missed_blocks)
    print("blocks",bloc)
    #exit(1)
    if win ==0.0:
        print(wins)
        exit(1)
    return win,comp,bloc
Beispiel #3
0
# Network parameters
hidden_size = 200               # number of units in each Q-network hidden layer
learning_rate = 0.0001         # Q-network learning rate

# Memory parameters
memory_size = 10000            # memory capacity
batch_size = 50                # experience mini-batch size
pretrain_length = batch_size   # number experiences to pretrain the memory
tf.reset_default_graph()
mainQN = QNetwork(name='main', hidden_size=hidden_size, learning_rate=learning_rate)
#p2QN = QNetwork(name='p2', hidden_size=hidden_size, learning_rate=learning_rate)
game = Game(verbose=False)
memory = Memory(max_size=memory_size)
saver = tf.train.Saver()
action = game.random_space()
game.move(action, 1)
state , reward = game.step()

space = game.random_space()
game.move(space, 2)

for ii in range(pretrain_length):

    action = game.random_space()
    game.move(action,1)
    next_state , reward = game.step()

    if game.game_over:
        next_state=np.zeros(state.shape)
        memory.add((state, action, reward, next_state))
def train(sess, scaler, a1, c1, a2, c2):
    game = Game(verbose=False)
    player1 = PlayerTrainer(actor=a1,
                            critic=c1,
                            buffersize=BUFFER_SIZE,
                            game=game,
                            player=1,
                            batch_size=MINIBATCH_SIZE,
                            gamma=GAMMA)
    player2 = PlayerTrainer(actor=a2,
                            critic=c2,
                            buffersize=BUFFER_SIZE,
                            game=game,
                            player=2,
                            batch_size=MINIBATCH_SIZE,
                            gamma=GAMMA)
    # Set up summary Ops
    summary_ops, summary_vars = build_summaries()

    sess.run(tf.global_variables_initializer())
    writer = tf.summary.FileWriter(SUMMARY_DIR, sess.graph)

    # Initialize target network weights
    a1.update_target_network()
    c1.update_target_network()
    a2.update_target_network()
    c2.update_target_network()
    # Initialize replay memory

    episode = 0
    all_wins = []
    all_logs = []
    win_p1, comp1, bloc1 = 0, 0, 0
    win_p2, comp2, bloc2 = 0, 0, 0
    stat = []
    for i in range(MAX_EPISODES):

        episode += 1
        game.setup()
        state = game.space

        ep_reward = 0
        #ep_ave_max_q = 0
        ep_reward2 = 0
        #ep_ave_max_q2 = 0
        #explore_p=1
        terminal = False
        for j in range(MAX_EP_STEPS):

            if not terminal:

                if episode < (5000):

                    move = game.random_space()
                    game.move(move, 1)
                    state, reward = game.step(player=1)
                else:
                    state, reward = player1.noisyMaxQMove()
                _, reward2 = game.step(player=2)
                ep_reward += reward
                ep_reward2 += reward2
                terminal = game.game_over

            if terminal:

                all_wins.append(game.game_over)
                log = game.setup()
                s = game.space
                all_logs.append(log)
                print(scaler, win_p1, comp1, bloc1, win_p2, comp2, bloc2,
                      " | Episode", i, ep_reward, ep_reward2)

                if episode % 1000 == 0:
                    win_p1, comp1, bloc1, win_p2, comp2, bloc2 = test(
                        sess, a1, a2)
                    stat.append(
                        [episode, win_p1, comp1, bloc1, win_p2, comp2, bloc2])
                    df = pd.DataFrame(stat)
                    print(df)
                    plt.close('all')
                    xwinp = plt.plot(df[0], df[1], label="P1wins")
                    xcomp = plt.plot(df[0], df[2], label="P1Imm Compl")
                    xbloc = plt.plot(df[0], df[3], label="p1immbloc")
                    xwinp2 = plt.plot(df[0], df[4], label="P2wins")
                    xcomp2 = plt.plot(df[0], df[5], label="P2Imm Compl")
                    xbloc2 = plt.plot(df[0], df[6], label="p2immbloc")
                    plt.legend()
                    plt.ylim(0, 1)
                    plt.ylabel('percent')
                    plt.show(block=False)

                    #for# r in range(1000):
                    #print(win_p,comp)

                    #if (comp1> .75 and win_p1 >.9)or episode>=200000 or(episode==30000 and (win_p1<.50 or comp1<.1) ):
                    #win_p1, comp1,bloc1, win_p2, comp2,bloc2 = test(sess, a1, a2)
                    #print("epi ",i,ep_ave_max_q )
                    #return win_p1,comp1,episode,stat,win_p2,comp2

                break
            else:
                if episode >= 5000 and episode < 10000:
                    move = game.random_space()
                    game.move(move, 2)
                    state, reward2 = game.step(player=2)
                else:
                    state, reward2 = player2.noisyMaxQMove()
                _, reward = game.step(player=1)
                terminal = game.game_over
                ep_reward2 += reward2
                ep_reward += reward

    return stat
Beispiel #5
0
def test(sess, actor2):
    game = Game(verbose=False)
    logs = []
    wins = []
    for i in range(TEST_EPISODES):
        game.setup()
        s = game.space
        terminal = False

        for j in range(MAX_EP_STEPS):
            if not terminal:
                # a = actor1.predict(np.reshape(game.space, (1, *s.shape)))
                # avail = game.avail()
                # availQ = {}
                #
                # for i in avail:
                #     availQ[i] = a[0][i]
                action = game.random_space()  # max(availQ, key=availQ.get)
                # print(a)
                game.move(action, 1)
                s2, r = game.step(1)
                terminal = game.game_over
                info = None
            if terminal:
                wins.append(game.game_over)
                log = game.setup()
                logs.append(log)
                s = game.space
                break
            else:
                a = actor2.predict(np.reshape(game.space, (1, *s.shape)))
                avail = game.avail()
                availQ = {}

                for i in avail:
                    availQ[i] = a[0][i]
                action = max(availQ, key=availQ.get)
                # print(a)
                game.move(action, 2)
                s2, r = game.step(1)
                terminal = game.game_over
                info = None

    c = Counter(wins)
    r = GameRate(verbose=False, list=logs, player=1, opponent=2)
    r2 = GameRate(verbose=False, list=logs, player=2, opponent=1)
    bloc1, bloc2 = 0, 0
    r.check_games()
    r2.check_games()
    win_p1 = c[1] / (TEST_EPISODES - 1)
    print("1win percentage", win_p1)
    if r.completions + r.missed_completions > 0:
        comp1 = r.completions / (r.completions + r.missed_completions)
    else:
        comp1 = 0
    print("1immediate completions", comp1)
    if r.blocks + r.missed_blocks > 0:
        bloc1 = r.blocks / (r.blocks + r.missed_blocks)
    win_p2 = c[2] / (TEST_EPISODES - 1)
    print("2win percentage", win_p2)
    if r2.completions + r2.missed_completions > 0:

        comp2 = r2.completions / (r2.completions + r2.missed_completions)
    else:
        comp2 = 0
    print("2immediate completions", comp2)
    if r2.blocks + r2.missed_blocks > 0:
        bloc2 = r2.blocks / (r2.blocks + r2.missed_blocks)
    return win_p1, comp1, bloc1, win_p2, comp2, bloc2
Beispiel #6
0
def train(sess, a1, c1, scaler):
    game = Game(verbose=False)
    player1 = PlayerTrainer(actor=a1, critic=c1, buffersize=BUFFER_SIZE, game=game, player=1, batch_size=MINIBATCH_SIZE,
                            gamma=GAMMA)

    sess.run(tf.global_variables_initializer())

    # Initialize target network weights
    a1.update_target_network()
    c1.update_target_network()

    episode = 0
    all_wins = []
    all_logs = []
    win_p1, comp1, bloc1 = 0, 0, 0
    win_p2, comp2, bloc2 = 0, 0, 0
    stat = []
    for i in range(MAX_EPISODES):

        episode += 1
        game.setup()

        ep_reward = 0
        ep_reward2 = 0
        reward2 = 0
        terminal = False
        for j in range(MAX_EP_STEPS):

            if not terminal:
                if episode < 7500:

                    move = game.random_space()
                    game.move(move, 1)
                    state, reward = game.step(player=1)
                else:
                    state, reward = player1.noisyMaxQMove()
                _, reward2 = game.step(player=2)
                ep_reward += reward
                ep_reward2 += reward2
                terminal = game.game_over

            if terminal:

                all_wins.append(game.game_over)
                log = game.setup()
                s = game.space
                all_logs.append(log)
                print(scaler, win_p1, comp1, bloc1, win_p2, comp2, bloc2, " | Episode", i, ep_reward, ep_reward2)

                if episode % 1000 == 0:
                    win_p1, comp1, bloc1, win_p2, comp2, bloc2 = test(sess, a1)
                    stat.append([episode, win_p1, comp1, bloc1, win_p2, comp2, bloc2])
                    df = pd.DataFrame(stat)
                    print(df)
                    plt.close('all')
                    xwinp = plt.plot(df[0], df[1], label="P1wins")
                    xcomp = plt.plot(df[0], df[2], label="P1Imm Compl")
                    xbloc = plt.plot(df[0], df[3], label="p1immbloc")
                    xwinp2 = plt.plot(df[0], df[4], label="P2wins")
                    xcomp2 = plt.plot(df[0], df[5], label="P2Imm Compl")
                    xbloc2 = plt.plot(df[0], df[6], label="p2immbloc")
                    plt.legend()
                    plt.ylim(0, 1)
                    plt.ylabel('percent')
                    plt.show(block=False)
                break
            else:
                move = game.random_space()
                game.move(move, 2)
                _, reward = game.step(player=1)
                terminal = game.game_over
                ep_reward2 += reward2
                ep_reward += reward

    return stat
Beispiel #7
0
def train(mainQN,sess):
    winp, comp, blocp = 0, 0, 0
    saver = tf.train.Saver()
    game = Game(verbose=False)
    wins=[]
    logs=[]
    epi_log = []
    #memory = Memory(max_size=memory_size)

    trainer = QPlayerTrainer(qnet=mainQN,buffersize=memory_size,game=game,player=1,batch_size=batch_size,gamma=gamma,sess=sess)

    # Now train with experiences

    rewards_list = []
    loss = False
    with tf.Session() as sess:
        # Initialize variables
        sess.run(tf.global_variables_initializer())

        step = 0
        for ep in range(1, train_episodes+1):
            total_reward = 0
            t = 0
            explore_p=0
            while t < max_steps:
                if not game.game_over:
                    step += 1
                    #explore_p = explore_stop + (explore_start - explore_stop) * np.exp(-decay_rate * step)
                    #if explore_p > np.random.rand():
                        # Make a random action
                    #    next_state, reward, loss = trainer.randomMove()
                    #else:
                    next_state, reward,loss = trainer.noisyMaxQMove()
                    total_reward += reward
                if game.game_over:
                    # the episode ends so no next state
                    next_state = np.zeros(state.shape)
                    t = max_steps

                    if loss:
                        print(winp,comp,blocp,'Episode: {}'.format(ep),
                              'Total reward: {}'.format(total_reward),
                              'Training loss: {:.4f}'.format(loss),explore_p)

                    rewards_list.append((ep, total_reward))

                    # Add experience to memory

                    wins.append(game.game_over)

                    log = game.setup()
                    logs.append(log)

                    if ep % 10000 == 0:
                       #print(wins[-100:],logs[-100:])
                       #exit(0)

                        time = str(localtime())
                        saver.save(sess, "chk/dqvrand/" + time + ".ckpt")
                        winp, comp, blocp = test(mainQN,sess)
                        epi_log.append([ep,winp,comp,blocp ])



                    state = game.space
                else:
                    state = next_state
                    t += 1
                space = game.random_space()
                game.move(space,2)
                _,reward = game.step(player=2)
                total_reward += reward
        time = str(localtime())
        saver.save(sess, "chk/dqvrand/"+time+".ckpt")
        with open('data/epi2', 'wb') as f:
            p.dump(epi_log, f)
from common.benchmark import GameRate

from common.game import Game
from collections import Counter

game = Game(verbose=False)
game.setup()
logs = []
wins = []
test_episodes = 1000
for i in range(test_episodes):
    print(i)

    while not game.game_over:

        move = game.random_space()
        game.move(move, 1)

        #print(game.space)
        game.step()
        if not game.game_over:
            move = game.random_space()
            game.move(move, 2)
            game.step()
    wins.append(game.game_over)
    log = game.setup()
    logs.append(log)

r = GameRate(verbose=False, list=logs)

r.check_games(1)