import tensorflow as tf from env import Environment from policy_agent import PolicyAgent from model import Model from args import parse_args _, CONFIG, args = parse_args('check-js') env = Environment() env.add_opponent(PolicyAgent(env, policy='downsize')) with tf.Session() as sess: model = Model(CONFIG, env, sess, None, name='haggle') saver = tf.train.Saver(max_to_keep=10000, name='test') saver.restore(sess, args.source) print(sess.run(model.action_probs, feed_dict={ model.input: [ [ # available actions 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 4, 0, 2, 1, 1 ] ], model.rnn_state: [ model.zero_state ], model.is_first_round: [ True ], }))
import numpy as np from env import Environment from policy_agent import PolicyAgent env = Environment() estimator = PolicyAgent(env, policy='estimator') env.add_opponent(estimator) print(env.bench(PolicyAgent(env, policy='downsize'), times=3000)) env.reset() print(env._make_state())
bench_env = { 'half_or_all': Environment(), 'downsize': Environment(), 'estimator': Environment() } bench_env['half_or_all'].add_opponent( \ PolicyAgent(bench_env['half_or_all'], policy='half_or_all')) bench_env['downsize'].add_opponent( \ PolicyAgent(bench_env['downsize'], policy='downsize')) bench_env['estimator'].add_opponent( \ PolicyAgent(bench_env['estimator'], policy='estimator')) for i in range(CONCURRENCY): env = Environment() env.add_opponent(PolicyAgent(env, policy='half_or_all')) env.add_opponent(PolicyAgent(env, policy='downsize')) env.add_opponent(PolicyAgent(env, policy='estimator')) env_list.append(env) writer = tf.summary.FileWriter(LOG_DIR) with tf.Session() as sess: print('Initializing model') model = Model(CONFIG, env_list[0], sess, writer, name='haggle') writer.add_graph(tf.get_default_graph()) saver = tf.train.Saver(max_to_keep=100, name=RUN_NAME) for i in range(NUM_ANTAGONISTS): print('Initializing antagonist {}'.format(i))