예제 #1
0
 def _build_dependencies(self):
     self.z_mean = FullyConnected(self.mode,
                                  num_units=self.latent_dim,
                                  name='z_mean')
     self.z_log_sigma = FullyConnected(self.mode,
                                       num_units=self.latent_dim,
                                       name='z_log_sigma')
예제 #2
0
파일: base.py 프로젝트: chandu088/p
        def graph_fn(mode, features, labels=None):
            kwargs = {}
            if 'labels' in get_arguments(self._graph_fn):
                kwargs['labels'] = labels

            graph_outputs = self._graph_fn(mode=mode, features=features, **kwargs)
            a = FullyConnected(mode, num_units=self.num_actions)(graph_outputs)
            v = None

            if self.dueling is not None:
                # Q = V(s) + A(s, a)
                v = FullyConnected(mode, num_units=1)(graph_outputs)
                if self.dueling == 'mean':
                    q = v + (a - tf.reduce_mean(a, axis=1, keep_dims=True))
                elif self.dueling == 'max':
                    q = v + (a - tf.reduce_max(a, axis=1, keep_dims=True))
                elif self.dueling == 'naive':
                    q = v + a
                elif self.dueling is True:
                    q = tf.identity(a)
                else:
                    raise ValueError("The value `{}` provided for "
                                     "dueling is unsupported.".format(self.dueling))
            else:
                q = tf.identity(a)

            return QModelSpec(graph_outputs=graph_outputs, a=a, v=v, q=q)
예제 #3
0
        def graph_fn(mode, inputs):
            graph_results = self._graph_fn(mode=mode, inputs=inputs)
            a = FullyConnected(mode, num_units=self.num_actions)(graph_results)
            v = None
            q = a
            if self.dueling is not None:
                # Q = V(s) + A(s, a)
                v = FullyConnected(mode, num_units=1)(graph_results)
                if self.dueling == 'mean':
                    q = v + (a - tf.reduce_mean(a, axis=1, keep_dims=True))
                elif self.dueling == 'max':
                    q = v + (a - tf.reduce_max(a, axis=1, keep_dims=True))
                elif self.dueling == 'naive':
                    q = v + a
                elif self.dueling is True:
                    q = a

            return {'graph_results': graph_results, 'a': a, 'v': v, 'q': q}
예제 #4
0
파일: base.py 프로젝트: ysheng312/polyaxon
 def graph_fn(mode, inputs):
     graph_outputs = self._graph_fn(mode=mode, inputs=inputs)
     a = FullyConnected(mode, num_units=self.num_actions)(graph_outputs)
     if self.is_continuous:
         values = tf.concat(values=[a, tf.exp(a) + 1], axis=0)
         distribution = self._build_distribution(values=values)
     else:
         values = tf.identity(a)
         distribution = self._build_distribution(values=a)
     return PGModelSpec(graph_outputs=graph_outputs,
                        a=a,
                        distribution=distribution,
                        dist_values=values)
예제 #5
0
파일: base.py 프로젝트: chandu088/p
        def graph_fn(mode, features, labels=None):
            kwargs = {}
            if 'labels' in get_arguments(self._graph_fn):
                kwargs['labels'] = labels

            graph_outputs = self._graph_fn(mode=mode, features=features, **kwargs)
            a = FullyConnected(mode, num_units=self.num_actions)(graph_outputs)
            if self.is_continuous:
                values = tf.concat(values=[a, tf.exp(a) + 1], axis=0)
                distribution = self._build_distribution(values=values)
            else:
                values = tf.identity(a)
                distribution = self._build_distribution(values=a)
            return PGModelSpec(
                graph_outputs=graph_outputs, a=a, distribution=distribution, dist_values=values)
예제 #6
0
    def _build_loss(self, results, features, labels):
        """Creates the loss operation

        Returns:
             tuple `(losses, loss)`:
                `losses` are the per-batch losses.
                `loss` is a single scalar tensor to minimize.
        """
        reward, action, done = labels['reward'], labels['action'], labels[
            'done']

        # Lower triangle matrix
        lt_size = self.num_actions * (self.num_actions + 1) // 2
        lt_entries = FullyConnected(self.mode, num_units=lt_size)(
            self._train_results['graph_results'])
        lt_matrix = tf.exp(tf.map_fn(tf.diag,
                                     lt_entries[:, :self.num_actions]))

        if self.num_actions > 1:
            offset = self.num_actions
            l_columns = list()
            for zeros, size in enumerate(xrange(self.num_actions - 1, 0, -1),
                                         1):
                column = tf.pad(lt_entries[:, offset:offset + size],
                                ((0, 0), (zeros, 0)))
                l_columns.append(column)
                offset += size
            lt_matrix += tf.stack(l_columns, 1)

        # P = LL^T
        p_matrix = tf.matmul(lt_matrix, tf.transpose(lt_matrix, (0, 2, 1)))

        action_diff = action - self._train_results['a']

        # A = (a - mean)P(a - mean) / 2
        advantage = -tf.matmul(
            tf.expand_dims(action_diff, 1),
            tf.matmul(p_matrix, tf.expand_dims(action_diff, 2))) / 2
        advantage = tf.squeeze(advantage, 2)

        # Q = V(s) + A(s, a)
        train_q_value = (self._train_results['v'] + advantage)[:-1]
        target_q_value = (reward[:-1] +
                          (1.0 - tf.cast(done[:-1], tf.float32)) *
                          self.discount * self._target_results['v'][1:])

        return super(NAFModel, self)._build_loss(train_q_value, features,
                                                 target_q_value)