Esempio n. 1
0
    def __init__(self, network, config, reward_engine):
        super().__init__(network, config)
        self.epoch_counter = self.counters[config['AgentEpochCounter']]
        self.iter_counter = self.counters[config['AgentIterationCounter']]
        # Placeholder
        self.temp = tf.placeholder(shape=[1], dtype=tf.float32)
        self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32)
        self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32)
        # Algorithm
        self.output = tf.reshape(self.output_layer, [-1])
        self.prob_dist = tf.nn.softmax(self.output / self.temp)
        self.weight = tf.slice(self.output, self.action_holder, [1])
        self.loss = -(tf.math.log(self.weight) * self.reward_holder)
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.config['AgentLearningRate'])
        self.update = self.optimizer.minimize(self.loss)

        # Processor
        self.exploration = ML.Exploration(self)
        self.exp_buffer = ML.ExperienceBuffer(self)
        self.state_space = ML.StateSpace(self)
        self.action_space = ML.ActionSpace(self)
        self.reward_engine = ML.RewardEngine(self, reward_engine)
        self.recorder = ML.Recorder(self)