예제 #1
0
import numpy as np

from env import Environment
from policy_agent import PolicyAgent

env = Environment()

estimator = PolicyAgent(env, policy='estimator')

env.add_opponent(estimator)

print(env.bench(PolicyAgent(env, policy='downsize'), times=3000))

env.reset()
print(env._make_state())
예제 #2
0
            simple_value=no_consensus_score)
        writer.add_summary(summary, model.writer_step)

        for env in env_list:
            env.no_consensus_score = no_consensus_score

        model.explore(env_list, game_count=1024, game_off=game_off)
        game_off += 1024

        if EPOCH % SAVE_EVERY == 0:
            saver.save(sess, os.path.join(SAVE_DIR, '{:08d}'.format(EPOCH)))

        if EPOCH % BENCH_EVERY == 0:
            print('Running benchmark...')
            for tag, env in bench_env.items():
                bench = env.bench(model)

                # TODO(indutny): move logging to model?
                summary = tf.Summary()
                for key, value in bench.items():
                    summary.value.add(tag='bench_{}/{}'.format(tag, key),
                                      simple_value=value)
                writer.add_summary(summary, model.writer_step)

        if EPOCH < ANTAGONIST_EPOCH:
            continue

        if EPOCH % ANTAGONIST_UPDATE_EVERY == 0:
            print('Adding new antagonist to the pool')
            weights = model.save_weights(sess)
            ANTAGONIST_WEIGHTS.append({'epoch': EPOCH, 'weights': weights})