예제 #1
0
def test(mainQN,sess):

    saver = tf.train.Saver()
    saver.restore(sess, tf.train.latest_checkpoint('chk/dqvrand'))
    e = 0
    logs = []
    wins = []
    game = Game(verbose=False)
    while e <=1000:
        e+=1
        if not game.game_over:
            state = game.space
            feed = {mainQN.inputs_: state.reshape((1, *state.shape))}
            As = sess.run(mainQN.output, feed_dict=feed)
            avail = game.avail()

            availQ = {}
            for k in avail:
                availQ[k]=As[0][k]
            action = max(availQ,key=availQ.get)
            game.move(action,1)
            game.step(1)

        if game.game_over:
            wins.append(game.game_over)
            log = game.setup()
            logs.append(log)
            continue
        move = game.random_space()
        game.move(move,2)
        game.step(2)

    win, comp, bloc = 0, 0, 0
    c = Counter(wins)
    r = GameRate(verbose=False, list=logs,player=1,opponent=2)

    r.check_games()
    #print(r,c)

    win= c[1] / len(wins)
    print("win percentage",win)
    if (r.completions + r.missed_completions)>0:
        comp =  r.completions / (r.completions + r.missed_completions)
    print("immediate completions",comp)
    if (r.blocks + r.missed_blocks)>0:
        bloc = r.blocks / (r.blocks + r.missed_blocks)
    print("blocks",bloc)
    #exit(1)
    if win ==0.0:
        print(wins)
        exit(1)
    return win,comp,bloc
예제 #2
0
    for ep in range(1, train_episodes):
        total_reward = 0
        t = 0
        while t < max_steps:
            if not game.game_over:
                step += 1
                explore_p = explore_stop + (explore_start - explore_stop) * np.exp(-decay_rate * step)
                if explore_p > np.random.rand():
                    # Make a random action
                    action = game.random_space()
                else:
                    # Get action from Q-network
                    feed = {mainQN.inputs_: state.reshape((1, *state.shape))}
                    Qs = sess.run(mainQN.output, feed_dict=feed)
                    #print(Qs)
                    avail = game.avail()
                    availQ = {}

                    for i in avail:
                        availQ[i]=Qs[0][i]
                    action = max(availQ,key=availQ.get)
                game.move(action, 1)
                next_state, reward = game.step()
                total_reward += reward
            if game.game_over:
                # the episode ends so no next state
                next_state = np.zeros(state.shape)
                t = max_steps
                if loss:
                    print('Episode: {}'.format(ep),
                          'Total reward: {}'.format(total_reward),
예제 #3
0
def test(sess, actor1, actor2):
    game = Game(verbose=False)
    logs = []
    wins = []
    for i in range(TEST_EPISODES):
        game.setup()
        s = game.space
        terminal = False

        for j in range(MAX_EP_STEPS):
            if not terminal:
                a = actor1.predict(np.reshape(game.space, (1, *s.shape)))
                avail = game.avail()
                availQ = {}

                for i in avail:
                    availQ[i] = a[0][i]
                action = max(availQ, key=availQ.get)  #game.random_space()
                #
                # print(a)
                game.move(action, 1)
                s2, r = game.step(1)
                terminal = game.game_over
                info = None
            if terminal:
                wins.append(game.game_over)
                log = game.setup()
                logs.append(log)
                s = game.space
                break
            else:
                a = actor2.predict(np.reshape(game.space, (1, *s.shape)))
                avail = game.avail()
                availQ = {}

                for i in avail:
                    availQ[i] = a[0][i]
                action = max(availQ, key=availQ.get)
                # print(a)
                game.move(action, 2)
                s2, r = game.step(1)
                terminal = game.game_over
                info = None

    c = Counter(wins)
    r = GameRate(verbose=False, list=logs, player=1, opponent=2)
    r2 = GameRate(verbose=False, list=logs, player=2, opponent=1)
    bloc1, bloc2 = 0, 0
    r.check_games()
    r2.check_games()
    win_p1 = c[1] / (TEST_EPISODES - 1)
    print("1win percentage", win_p1)
    if r.completions + r.missed_completions > 0:
        comp1 = r.completions / (r.completions + r.missed_completions)
    else:
        comp1 = 0
    print("1immediate completions", comp1)
    if r.blocks + r.missed_blocks > 0:
        bloc1 = r.blocks / (r.blocks + r.missed_blocks)
    win_p2 = c[2] / (TEST_EPISODES - 1)
    print("2win percentage", win_p2)
    if r2.completions + r2.missed_completions > 0:

        comp2 = r2.completions / (r2.completions + r2.missed_completions)
    else:
        comp2 = 0
    print("2immediate completions", comp2)
    if r2.blocks + r2.missed_blocks > 0:
        bloc2 = r2.blocks / (r2.blocks + r2.missed_blocks)
    return win_p1, comp1, bloc1, win_p2, comp2, bloc2