Exemple #1
0
  def __init__(self, config):
    self.config = config

    # Create session
    self.session = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(
        allow_growth=True)))

    # Create networks
    self.prior_network = PolicyNetwork(
        scope=config.prior_network,
        temperature=config.prior_temperature,
        use_symmetry=config.use_symmetry)

    self.rollout_network = PolicyNetwork(
        scope=config.rollout_network,
        temperature=config.rollout_temperature,
        reuse=config.prior_network == config.rollout_network,
        use_symmetry=config.use_symmetry)

    self.value_network = ValueNetwork(
        scope=config.value_network, use_symmetry=config.use_symmetry)

    # Load networks from checkpoints
    run_dir = util.run_directory(config)
    util.restore_network_or_fail(self.session, run_dir, self.prior_network)
    util.restore_network_or_fail(self.session, run_dir, self.rollout_network)
    util.restore_network_or_fail(self.session, run_dir, self.value_network)

    # Create queues
    self.prior_queue = AllQueue()
    self.rollout_queue = AllQueue(maxsize=16)
    self.value_queue = AllQueue(maxsize=16)

    self.new_game()
Exemple #2
0
def create_config():
    config = flags.FLAGS
    config.game = '_'.join(
        [g.lower() for g in re.findall('[A-Z]?[a-z]+', config.game)])
    config.num_actions = Atari.num_actions(config)
    config.frameskip = eval(str(config.frameskip))
    config.input_shape = eval(str(config.input_shape))
    config.exploration_frame_shape = eval(str(config.exploration_frame_shape))
    config.reward_clipping = config.reward_clipping and not config.reward_scaling
    config.run_dir = util.run_directory(config)

    if not config.bootstrapped: config.num_bootstrap_heads = 1

    if config. async is None:
        config.num_threads = 1
    def __init__(self, config):
        self.config = config
        self.run_dir = util.run_directory(config)
        self.position_targets = PositionTargets(config, self.run_dir)

        self.session = tf.Session(config=tf.ConfigProto(
            gpu_options=tf.GPUOptions(allow_growth=True)))

        self.value_network = ValueNetwork('value')
        util.restore_or_initialize_network(self.session, self.run_dir,
                                           self.value_network)

        # Train ops
        self.create_train_op(self.value_network)
        self.writer = tf.summary.FileWriter(self.run_dir)
        util.restore_or_initialize_scope(self.session, self.run_dir,
                                         self.training_scope.name)
Exemple #4
0
    def __init__(self, config):
        self.config = config

        session = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(
            allow_growth=True)))
        self.random_player = RandomPlayer()
        self.exploratory_network = PolicyNetwork(config.exploratory_network)
        self.exploratory_player = PolicyPlayer(self.exploratory_network,
                                               session)

        self.playout_network = PolicyNetwork(
            config.playout_network,
            reuse=config.exploratory_network == config.playout_network)
        self.playout_player = PolicyPlayer(self.playout_network, session)

        self.run_dir = util.run_directory(config)
        util.restore_network_or_fail(session, self.run_dir,
                                     self.exploratory_network)
        util.restore_network_or_fail(session, self.run_dir,
                                     self.playout_network)
  def __init__(self, config):
    self.config = config
    self.run_dir = util.run_directory(config)

    self.session = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(
        allow_growth=True)))

    self.policy_network = PolicyNetwork('policy')
    self.policy_player = PolicyPlayer(self.policy_network, self.session)
    util.restore_or_initialize_network(self.session, self.run_dir,
                                       self.policy_network)

    # Train ops
    self.create_train_op(self.policy_network)
    self.writer = tf.summary.FileWriter(self.run_dir)
    util.restore_or_initialize_scope(self.session, self.run_dir,
                                     self.training_scope.name)

    self.opponents = Opponents(
        [RandomPlayer(),
         RandomThreatPlayer(),
         MaxThreatPlayer()])
    self.opponents.restore_networks(self.session, self.run_dir)