def __init__(self, *args, **kwargs): self.epsilon = 0.2 if "epsilon" in kwargs: self.epsilon = kwargs["epsilon"] or self.epsilon del kwargs["epsilon"] self.multiplier = 3 if "multiplier" in kwargs: self.multiplier = kwargs["multiplier"] or self.multiplier del kwargs["multiplier"] DualAgent.__init__(self, *args, **kwargs) self.tree = MonteCarloTree(self.sess, self.epsilon, self.multiplier)
def get_random_action(state): y_p = DualAgent.get_dist(agent, state) c = np.random.choice(225, p=y_p) x, y = np.unravel_index(c, dims=(15, 15)) assert state.board[x, y] == 0, "total prob %f that prob %f" % (y_p.sum(), y_p[c]) return x, y
from agent import Agent from scipy.io import loadmat from dual_agent import DualAgent from dual_network import export_meta import numpy as np import tensorflow as tf import data_util as util """ begin training """ with tf.Session() as sess: export_meta("dualsup") agent = DualAgent(sess, "dualsup") matlab = loadmat("dual_minimax") print("processing data") data = util.TenaryData(matlab["X"], matlab["Y"], matlab["V"][0]) del matlab print("processing complete") for i in range(4001): x_b, y_b, v_b = data.next_batch(256) if i % 10 == 0: x_b, y_b, v_b = data.test_batch(1024) pl, vl, rl, l = agent.loss(x_b, y_b, v_b) print("\nstep %d analysis" % i) print(">>> policy loss: %f" % pl) print(">>> value loss: %f" % vl) print(">>> regularization loss: %f" % rl) print(">>> loss: %f" % l) print(">>> accuracy: %f" % agent.accuracy(x_b, y_b))
parser.add_argument("model_name_1", type=str) parser.add_argument("model_name_2", type=str) parser.add_argument("--epsilon", "-e", type=float) parser.add_argument("--chkpnt1", "-c1", type=int) parser.add_argument("--chkpnt2", "-c2", type=int) parser.add_argument("--num_games", "-n", default=100, type=int) parser.add_argument('--save', '-s', action='store_true') args = parser.parse_args() a_graph = tf.Graph() b_graph = tf.Graph() a_sess = tf.Session(graph=a_graph) b_sess = tf.Session(graph=b_graph) with a_sess.as_default(): with a_graph.as_default(): a_agent = DualAgent(a_sess, args.model_name_1, chkpnt=args.chkpnt1) with b_sess.as_default(): with b_graph.as_default(): b_agent = MCTSAgent(b_sess, args.model_name_2, chkpnt=args.chkpnt2, epsilon=args.epsilon) print("ARENA: DUAL %s-%d VERSES MCTS %s-%d" % (a_agent.model_name, a_agent.chkpnt, b_agent.model_name, b_agent.chkpnt)) stat = np.zeros(shape=(2, 2), dtype=np.int) for i in range(args.num_games): t = time() s = State() a_is_black = (i % 2 == 0) while not s.end and len(s.history) < 225:
from dual_agent import DualAgent import argparse parser = argparse.ArgumentParser() parser.add_argument("id", type=int) args = parser.parse_args() ID = args.id NUM_GAMES_GEN = 200 NUM_SIM = 100 X = [] V = [] with tf.Session() as sess: agent = DualAgent(sess, "treesup") for i in range(NUM_GAMES_GEN): print("sample %d" % i) s = State() while not s.end and len(s.history) < 225: s.move(*agent.get_safe_action(s)) assert not s.violate, "we have a violated game" which = np.random.randint(len(s.history)) h = s.history s = State() for k in range(which): s.move(*h[k]) score = 0 for j in range(NUM_SIM): t = s.copy() while not t.end and len(t.history) < 225:
parser.add_argument("model_name_2", type=str) parser.add_argument("--max_width", "-w", type=int) parser.add_argument("--max_depth", "-d", type=int) parser.add_argument("--chkpnt1", "-c1", type=int) parser.add_argument("--chkpnt2", "-c2", type=int) parser.add_argument("--num_games", "-n", default=100, type=int) parser.add_argument('--save', '-s', action='store_true') args = parser.parse_args() a_graph = tf.Graph() b_graph = tf.Graph() a_sess = tf.Session(graph=a_graph) b_sess = tf.Session(graph=b_graph) with a_sess.as_default(): with a_graph.as_default(): a_agent = DualAgent(a_sess, args.model_name_1, chkpnt=args.chkpnt1) with b_sess.as_default(): with b_graph.as_default(): b_agent = MinimaxNetworkAgent(b_sess, args.model_name_2, chkpnt=args.chkpnt2, max_width=args.max_width, max_depth=args.max_depth) print("ARENA: DUAL %s-%d VERSES MNA %s-%d" % (a_agent.model_name, a_agent.chkpnt, b_agent.model_name, b_agent.chkpnt)) stat = np.zeros(shape=(2, 2), dtype=np.int) for i in range(args.num_games): t = time() s = State() a_is_black = (i % 2 == 0) while not s.end and len(s.history) < 225: if a_is_black == (s.player > 0): with a_sess.as_default(): s.move(*a_agent.get_safe_action(s)) else:
parser = argparse.ArgumentParser() parser.add_argument("model_name_1", type=str) parser.add_argument("model_name_2", type=str) parser.add_argument("--chkpnt1", "-c1", type=int) parser.add_argument("--chkpnt2", "-c2", type=int) parser.add_argument("--num_games", "-n", default=100, type=int) parser.add_argument('--save', '-s', action='store_true') args = parser.parse_args() a_graph = tf.Graph() b_graph = tf.Graph() a_sess = tf.Session(graph=a_graph) b_sess = tf.Session(graph=b_graph) with a_sess.as_default(): with a_graph.as_default(): a_agent = DualAgent(a_sess, args.model_name_1, chkpnt=args.chkpnt1) with b_sess.as_default(): with b_graph.as_default(): b_agent = DualAgent(b_sess, args.model_name_2, chkpnt=args.chkpnt2) print("ARENA: %s-%d VERSES %s-%d" % (a_agent.model_name, a_agent.chkpnt, b_agent.model_name, b_agent.chkpnt)) stat = np.zeros(shape=(2, 2), dtype=np.int) for i in range(args.num_games): t = time() s = State() a_is_black = (i % 2 == 0) while not s.end and len(s.history) < 225: if a_is_black == (s.player > 0): with a_sess.as_default(): s.move(*a_agent.get_action(s)) else: