Ejemplo n.º 1
0
 def __init__(self, *args, **kwargs):
     self.epsilon = 0.2
     if "epsilon" in kwargs:
         self.epsilon = kwargs["epsilon"] or self.epsilon
         del kwargs["epsilon"]
     self.multiplier = 3
     if "multiplier" in kwargs:
         self.multiplier = kwargs["multiplier"] or self.multiplier
         del kwargs["multiplier"]
     DualAgent.__init__(self, *args, **kwargs)
     self.tree = MonteCarloTree(self.sess, self.epsilon, self.multiplier)
Ejemplo n.º 2
0
def get_random_action(state):
    y_p = DualAgent.get_dist(agent, state)
    c = np.random.choice(225, p=y_p)
    x, y = np.unravel_index(c, dims=(15, 15))
    assert state.board[x, y] == 0, "total prob %f that prob %f" % (y_p.sum(),
                                                                   y_p[c])
    return x, y
Ejemplo n.º 3
0
from agent import Agent
from scipy.io import loadmat
from dual_agent import DualAgent
from dual_network import export_meta

import numpy as np
import tensorflow as tf
import data_util as util


""" begin training """
with tf.Session() as sess:
    export_meta("dualsup")
    agent = DualAgent(sess, "dualsup")
    matlab = loadmat("dual_minimax")
    print("processing data")
    data = util.TenaryData(matlab["X"], matlab["Y"], matlab["V"][0])
    del matlab
    print("processing complete")

    for i in range(4001):
        x_b, y_b, v_b = data.next_batch(256)
        if i % 10 == 0:
            x_b, y_b, v_b = data.test_batch(1024)
            pl, vl, rl, l = agent.loss(x_b, y_b, v_b)
            print("\nstep %d analysis" % i)
            print(">>> policy loss: %f" % pl)
            print(">>> value loss: %f" % vl)
            print(">>> regularization loss: %f" % rl)
            print(">>> loss: %f" % l)
            print(">>> accuracy: %f" % agent.accuracy(x_b, y_b))
Ejemplo n.º 4
0
parser.add_argument("model_name_1", type=str)
parser.add_argument("model_name_2", type=str)
parser.add_argument("--epsilon", "-e", type=float)
parser.add_argument("--chkpnt1", "-c1", type=int)
parser.add_argument("--chkpnt2", "-c2", type=int)
parser.add_argument("--num_games", "-n", default=100, type=int)
parser.add_argument('--save', '-s', action='store_true')
args = parser.parse_args()

a_graph = tf.Graph()
b_graph = tf.Graph()
a_sess = tf.Session(graph=a_graph)
b_sess = tf.Session(graph=b_graph)
with a_sess.as_default():
    with a_graph.as_default():
        a_agent = DualAgent(a_sess, args.model_name_1, chkpnt=args.chkpnt1)
with b_sess.as_default():
    with b_graph.as_default():
        b_agent = MCTSAgent(b_sess,
                            args.model_name_2,
                            chkpnt=args.chkpnt2,
                            epsilon=args.epsilon)
print("ARENA: DUAL %s-%d VERSES MCTS %s-%d" %
      (a_agent.model_name, a_agent.chkpnt, b_agent.model_name, b_agent.chkpnt))

stat = np.zeros(shape=(2, 2), dtype=np.int)
for i in range(args.num_games):
    t = time()
    s = State()
    a_is_black = (i % 2 == 0)
    while not s.end and len(s.history) < 225:
Ejemplo n.º 5
0
from dual_agent import DualAgent

import argparse
parser = argparse.ArgumentParser()
parser.add_argument("id", type=int)
args = parser.parse_args()
ID = args.id

NUM_GAMES_GEN = 200
NUM_SIM = 100

X = []
V = []

with tf.Session() as sess:
    agent = DualAgent(sess, "treesup")
    for i in range(NUM_GAMES_GEN):
        print("sample %d" % i)
        s = State()
        while not s.end and len(s.history) < 225:
            s.move(*agent.get_safe_action(s))
        assert not s.violate, "we have a violated game"
        which = np.random.randint(len(s.history))
        h = s.history
        s = State()
        for k in range(which):
            s.move(*h[k])
        score = 0
        for j in range(NUM_SIM):
            t = s.copy()
            while not t.end and len(t.history) < 225:
Ejemplo n.º 6
0
parser.add_argument("model_name_2", type=str)
parser.add_argument("--max_width", "-w", type=int)
parser.add_argument("--max_depth", "-d", type=int)
parser.add_argument("--chkpnt1", "-c1", type=int)
parser.add_argument("--chkpnt2", "-c2", type=int)
parser.add_argument("--num_games", "-n", default=100, type=int)
parser.add_argument('--save', '-s', action='store_true')
args = parser.parse_args()

a_graph = tf.Graph()
b_graph = tf.Graph()
a_sess = tf.Session(graph=a_graph)
b_sess = tf.Session(graph=b_graph)
with a_sess.as_default():
    with a_graph.as_default():
        a_agent = DualAgent(a_sess, args.model_name_1, chkpnt=args.chkpnt1)
with b_sess.as_default():
    with b_graph.as_default():
        b_agent = MinimaxNetworkAgent(b_sess, args.model_name_2, chkpnt=args.chkpnt2, max_width=args.max_width, max_depth=args.max_depth)
print("ARENA: DUAL %s-%d VERSES MNA %s-%d" % (a_agent.model_name, a_agent.chkpnt, b_agent.model_name, b_agent.chkpnt))

stat = np.zeros(shape=(2, 2), dtype=np.int)
for i in range(args.num_games):
    t = time()
    s = State()
    a_is_black = (i % 2 == 0)
    while not s.end and len(s.history) < 225:
        if a_is_black == (s.player > 0):
            with a_sess.as_default():
                s.move(*a_agent.get_safe_action(s))
        else:
Ejemplo n.º 7
0
parser = argparse.ArgumentParser()
parser.add_argument("model_name_1", type=str)
parser.add_argument("model_name_2", type=str)
parser.add_argument("--chkpnt1", "-c1", type=int)
parser.add_argument("--chkpnt2", "-c2", type=int)
parser.add_argument("--num_games", "-n", default=100, type=int)
parser.add_argument('--save', '-s', action='store_true')
args = parser.parse_args()

a_graph = tf.Graph()
b_graph = tf.Graph()
a_sess = tf.Session(graph=a_graph)
b_sess = tf.Session(graph=b_graph)
with a_sess.as_default():
    with a_graph.as_default():
        a_agent = DualAgent(a_sess, args.model_name_1, chkpnt=args.chkpnt1)
with b_sess.as_default():
    with b_graph.as_default():
        b_agent = DualAgent(b_sess, args.model_name_2, chkpnt=args.chkpnt2)
print("ARENA: %s-%d VERSES %s-%d" % (a_agent.model_name, a_agent.chkpnt, b_agent.model_name, b_agent.chkpnt))

stat = np.zeros(shape=(2, 2), dtype=np.int)
for i in range(args.num_games):
    t = time()
    s = State()
    a_is_black = (i % 2 == 0)
    while not s.end and len(s.history) < 225:
        if a_is_black == (s.player > 0):
            with a_sess.as_default():
                s.move(*a_agent.get_action(s))
        else: