Ejemplos de reopen_variable_scope en Python, ejemplos de tfsnippet.utils.reopen_variable_scope en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: vae.py Proyecto: Huilin-Zhu/Robust-Anomaly-Detection-for-Multivariate-Time-Series-through-Stochastic-Recurrent-Neural-Network

    def __init__(self, p_z, p_x_given_z, q_z_given_x, h_for_p_x, h_for_q_z,
                 z_group_ndims=1, x_group_ndims=1, is_reparameterized=None,
                 name=None, scope=None):
        """
        Construct the :class:`VAE`.

        Args:
            p_z (Distribution): :math:`p(z)`, the distribution instance.
            p_x_given_z: :math:`p(x|h(z))`, a distribution class or
                a :class:`DistributionFactory` object.
            q_z_given_x: :math:`q(z|h(x))`, a distribution class or
                a :class:`DistributionFactory` object.
            h_for_p_x (Module): :math:`h(z)`, the hidden network module for
                :math:`p(x|h(z))`. The output of `h_for_p_x` must be a
                ``dict[str, any]``, the parameters for `p_x_given_z`.
            h_for_q_z (Module): :math:`h(x)`, the hidden network module for
                :math:`q(z|h(x))`. The output of `h_for_q_z` must be a
                ``dict[str, any]``, the parameters for `q_z_given_x`.
            z_group_ndims (int or tf.Tensor): `group_ndims` for `z`. (default 1)
            x_group_ndims (int or tf.Tensor): `group_ndims` for `x`. (default 1)
            is_reparameterized (bool or None): Whether or not `z` should be
                re-parameterized? (default :obj:`None`, following the settings
                of z distributions.)
            name (str): Optional name of this module
                (argument of :class:`~tfsnippet.utils.VarScopeObject`).
            scope (str): Optional scope of this module
                (argument of :class:`~tfsnippet.utils.VarScopeObject`).

        See Also:
            :meth:`tfsnippet.distributions.Distribution.log_prob` for
                contents about `group_ndims`.
        """
        if not isinstance(p_z, Distribution):
            raise TypeError('`p_z` must be an instance of `Distribution`')
        if not callable(h_for_p_x):
            raise TypeError('`h_for_p_x` must be an instance of `Module` or '
                            'a callable object')
        if not callable(h_for_q_z):
            raise TypeError('`h_for_q_z` must be an instance of `Module` or '
                            'a callable object')
        super(VAE, self).__init__(name=name, scope=scope)

        # Defensive coding: wrap `h_for_p_x` and `h_for_q_z` in reused scope.
        if not isinstance(h_for_p_x, VarScopeObject):
            with reopen_variable_scope(self.variable_scope):
                h_for_p_x = Lambda(h_for_p_x, name='h_for_p_x')
        if not isinstance(h_for_q_z, VarScopeObject):
            with reopen_variable_scope(self.variable_scope):
                h_for_q_z = Lambda(h_for_q_z, name='h_for_q_z')

        self._p_z = p_z
        self._p_x_given_z = p_x_given_z
        self._q_z_given_x = q_z_given_x
        self._h_for_p_x = h_for_p_x
        self._h_for_q_z = h_for_q_z
        self._z_group_ndims = z_group_ndims
        self._x_group_ndims = x_group_ndims
        self._is_reparameterized = is_reparameterized

Ejemplo n.º 2

0

Mostrar archivo

Archivo: categorical.py Proyecto: paojianghu/tfsnippet

    def __init__(self,
                 logits=None,
                 probs=None,
                 dtype=None,
                 group_event_ndims=None,
                 check_numerics=False,
                 name=None,
                 default_name=None):
        if dtype is None:
            dtype = tf.int32
        else:
            dtype = tf.as_dtype(dtype)

        super(OneHotCategorical,
              self).__init__(logits=logits,
                             probs=probs,
                             group_event_ndims=group_event_ndims,
                             check_numerics=check_numerics,
                             name=name,
                             default_name=default_name)
        self._dtype = dtype

        with reopen_variable_scope(self.variable_scope):
            with tf.name_scope('init'):
                # derive the value shape of parameters
                logits_shape = self.logits.get_shape()
                self._static_value_shape = logits_shape[-1:]
                if is_deterministic_shape(self._static_value_shape):
                    self._dynamic_value_shape = tf.constant(
                        self._static_value_shape.as_list(), dtype=tf.int32)
                else:
                    self._dynamic_value_shape = tf.shape(logits)[-1:]

Ejemplo n.º 3

0

Mostrar archivo

    def __init__(self,
                 model,
                 n_z=1024,
                 mcmc_iteration=10,
                 batch_size=32,
                 feed_dict=None,
                 last_point_only=True,
                 name=None,
                 scope=None):
        super(DonutPredictor, self).__init__(name=name, scope=scope)
        self._model = model
        self._n_z = n_z
        self._mcmc_iteration = mcmc_iteration
        self._batch_size = batch_size
        # 有提要字典
        if feed_dict is not None:
            # Tensor字典->字典迭代器->字典
            self._feed_dict = dict(six.iteritems(feed_dict))
        else:
            self._feed_dict = {}
        self._last_point_only = last_point_only

        # 重新打开指定的变量作用域及其原始名称作用域。
        with reopen_variable_scope(self.variable_scope):
            # 输入占位符
            self._input_x = tf.placeholder(dtype=tf.float32,
                                           shape=[None, model.x_dims],
                                           name='input_x')
            self._input_y = tf.placeholder(dtype=tf.int32,
                                           shape=[None, model.x_dims],
                                           name='input_y')

            # 感兴趣的输出
            self._refactor_probability = self._refactor_probability_without_y = None

Ejemplo n.º 4

0

Mostrar archivo

Archivo: base.py Proyecto: 897615138/tfsnippet-jill

    def __init__(self, n_layers, dtype=tf.float32, name=None, scope=None):
        """
        Construct a new :class:`MultiLayerFlow`.

        Args:
            n_layers (int): Number of flow layers.
            dtype: The data type of the transformed `y`.
            name (str): Optional name of this :class:`VariableSaver`
                (argument of :class:`~tfsnippet.utils.VarScopeObject`).
            scope (str): Optional scope of this :class:`VariableSaver`
                (argument of :class:`~tfsnippet.utils.VarScopeObject`).
        """
        super(MultiLayerFlow, self).__init__(
            dtype=dtype, name=name, scope=scope)

        n_layers = int(n_layers)
        if n_layers < 1:
            raise ValueError('`n_layers` must be larger than 0.')
        self._n_layers = n_layers
        self._layer_params = []

        with reopen_variable_scope(self.variable_scope):
            for i in range(self._n_layers):
                with tf.variable_scope('_{}'.format(i)):
                    self._layer_params.append(self._create_layer_params(i))

Ejemplo n.º 5

0

Mostrar archivo

    def __init__(self,
                 model,
                 n_z=1024,
                 batch_size=32,
                 feed_dict=None,
                 last_point_only=True,
                 name=None,
                 scope=None):
        super(Predictor, self).__init__(name=name, scope=scope)
        self._model = model
        self._n_z = n_z
        self._batch_size = batch_size
        if feed_dict is not None:
            self._feed_dict = dict(six.iteritems(feed_dict))
        else:
            self._feed_dict = {}
        self._last_point_only = last_point_only

        with reopen_variable_scope(self.variable_scope):
            # input placeholders
            self._input_x = tf.placeholder(
                dtype=tf.float32,
                shape=[None, model.window_length, model.x_dims],
                name='input_x')
            self._input_y = tf.placeholder(dtype=tf.int32,
                                           shape=[None, model.window_length],
                                           name='input_y')

            # outputs of interest
            self._score = self._score_without_y = None

Ejemplo n.º 6

0

Mostrar archivo

Archivo: gamma.py Proyecto: paojianghu/tfsnippet

    def __init__(self,
                 alpha,
                 beta,
                 group_event_ndims=None,
                 check_numerics=False,
                 name=None,
                 default_name=None):
        # check the arguments
        dtype = get_preferred_tensor_dtype(alpha)
        if not dtype.is_floating:
            raise TypeError('Gamma distribution parameters must be float '
                            'numbers.')

        super(Gamma, self).__init__(group_event_ndims=group_event_ndims,
                                    check_numerics=check_numerics,
                                    name=name,
                                    default_name=default_name)

        with reopen_variable_scope(self.variable_scope):
            with tf.name_scope('init'):
                # obtain parameter tensors
                self._alpha = alpha = tf.convert_to_tensor(alpha, dtype=dtype)
                self._beta = beta = tf.convert_to_tensor(beta, dtype=dtype)

                # check the shape and data types of parameters
                try:
                    self._static_batch_shape = tf.broadcast_static_shape(
                        alpha.get_shape(), beta.get_shape())
                except ValueError:
                    raise ValueError(
                        '`alpha` and `beta` should be '
                        'broadcastable to match each other (%r vs %r).' %
                        (alpha.get_shape(), beta.get_shape()))
                self._dynamic_batch_shape = tf.broadcast_dynamic_shape(
                    tf.shape(alpha), tf.shape(beta))

Ejemplo n.º 7

0

Mostrar archivo

Archivo: prediction.py Proyecto: WenweiGu/HUST-GWW-FYP

 def _get_score_without_y(self):
     if self._score_without_y is None:
         with reopen_variable_scope(self.variable_scope), \
                 tf.name_scope('score_without_y'):
             self._score_without_y = self.model.get_score(
                 x=self._input_x,
                 n_z=self._n_z,
                 last_point_only=self._last_point_only)
     return self._score_without_y

Ejemplo n.º 8

0

Mostrar archivo

Archivo: vae.py Proyecto: ducnx/OmniAnomaly

 def __call__(self, inputs, **kwargs):
     with reopen_variable_scope(self.variable_scope):
         # Here `reopen_name_scope` is set to True, so that multiple
         # calls to the same Module instance will always generate operations
         # within the original name scope.
         # However, in order for ``tf.variable_scope(default_name=...)``
         # to work properly with variable reusing, we must generate a nested
         # unique name scope.
         with tf.name_scope('forward'):
             return self._forward(inputs, **kwargs)

Ejemplo n.º 9

0

Mostrar archivo

Archivo: prediction.py Proyecto: WenweiGu/HUST-GWW-FYP

 def _get_score(self):
     if self._score is None:
         with reopen_variable_scope(self.variable_scope), \
                 tf.name_scope('score'):
             self._score = self.model.get_score(
                 x=self._input_x,
                 y=self._input_y,
                 n_z=self._n_z,
                 mcmc_iteration=self._mcmc_iteration,
                 last_point_only=self._last_point_only)
     return self._score

Ejemplo n.º 10

0

Mostrar archivo

Archivo: prediction.py Proyecto: WenweiGu/HUST-GWW-FYP

 def _get_score_without_y(self):
     if self._score_without_y is None:
         with reopen_variable_scope(self.variable_scope), tf.name_scope(
             "score_without_y"
         ):
             self._score_without_y, self._q_net_z = self.model.get_score(
                 x=self._input_x,
                 n_z=self._n_z,
                 last_point_only=self._last_point_only,
             )
             # print ('\t_get_score_without_y ',type(self._q_net_z))
     return self._score_without_y, self._q_net_z

Ejemplo n.º 11

0

Mostrar archivo

    def test_basic(self):
        root = tf.get_variable_scope()

        with tf.variable_scope('a') as a:
            self._check_vs('v1', 'a', 'a/', 'a/v1:0', 'a/op:0')

            with reopen_variable_scope(root):
                self._check_vs('v2', '', '', 'v2:0', 'op:0')

                with reopen_variable_scope(a):
                    self._check_vs('v3', 'a', 'a/', 'a/v3:0', 'a/op_1:0')

        with tf.variable_scope('a/b') as b:
            self._check_vs('v4', 'a/b', 'a/b/', 'a/b/v4:0', 'a/b/op:0')

            with reopen_variable_scope(root):
                self._check_vs('v5', '', '', 'v5:0', 'op_1:0')

            with reopen_variable_scope(a):
                self._check_vs('v6', 'a', 'a/', 'a/v6:0', 'a/op_2:0')

                with reopen_variable_scope(a):
                    self._check_vs('v7', 'a', 'a/', 'a/v7:0', 'a/op_3:0')

        with reopen_variable_scope(b):
            self._check_vs('v8', 'a/b', 'a/b/', 'a/b/v8:0', 'a/b/op_1:0')

Ejemplo n.º 12

0

Mostrar archivo

Archivo: prediction.py Proyecto: zhangdabao96/OmniAnomaly

 def _get_score_without_y(self):
     if self._score_without_y is None:
         with reopen_variable_scope(self.variable_scope), \
              tf.name_scope('score_without_y'):
             gcn_feat = self.model.run_gcn(self._input_x, self._input_adj)
             self._score_without_y, self._q_net_z = self.model.get_score(
                 x=self._input_x,
                 x_feature=gcn_feat,
                 n_z=self._n_z,
                 last_point_only=self._last_point_only
             )
             # print ('\t_get_score_without_y ',type(self._q_net_z))
     return self._score_without_y, self._q_net_z

Ejemplo n.º 13

0

Mostrar archivo

 def _get_refactor_probability_without_y(self):
     """
     没有y时获取重构概率
     Returns:没有y时获取的重构概率
     """
     if self._refactor_probability_without_y is None:
         with reopen_variable_scope(self.variable_scope), \
                 tf.name_scope('score_without_y'):
             self._refactor_probability_without_y = self.model.get_refactor_probability(
                 window=self._input_x,
                 n_z=self._n_z,
                 last_point_only=self._last_point_only)
     return self._refactor_probability_without_y

Ejemplo n.º 14

0

Mostrar archivo

Archivo: variable_saver.py Proyecto: mengyuan404/tfsnippet

    def __init__(self,
                 variables,
                 save_dir,
                 max_versions=2,
                 filename='variables.dat',
                 latest_file='latest',
                 save_meta=True,
                 name=None,
                 scope=None):
        """
        Construct the :class:`VariableSaver`.

        Args:
            variables (collections.Iterable[tf.Variable] or dict[str, any]):
                List of variables, or dict of variables with explicit keys,
                which should be saved and restored.
            save_dir (str): Directory where to place the saved variables.
            max_versions (int): Maximum versions to keep in the directory
                (Default is 2). At least 2 versions should be kept, in order to
                prevent corrupted checkpoint files caused by IO failure.
            filename (str): Name of the files of variable values (default is
                ``variables.dat``).
            latest_file (str): Name of the file which organizes the checkpoint
                versions (default is ``latest``).
            save_meta (bool): Whether or not to save meta graph (default
                is :obj:`True`).
            name (str): Name of this :class:`VariableSaver`
                (argument of :class:`~tfsnippet.utils.VarScopeObject`).
            scope (str): Scope of this :class:`VariableSaver`
                (argument of :class:`~tfsnippet.utils.VarScopeObject`).
        """
        if not isinstance(variables, dict):
            variables = list(variables)
        if max_versions < 2:
            raise ValueError('At least 2 versions should be kept')

        self.variables = variables
        self.save_dir = os.path.abspath(save_dir)
        self.filename = filename
        self.max_versions = max_versions
        self.latest_file = latest_file
        self.save_meta = save_meta

        super(VariableSaver, self).__init__(scope, name)

        with reopen_variable_scope(self.variable_scope):
            self._saver = tf.train.Saver(var_list=self.variables,
                                         max_to_keep=self.max_versions,
                                         name='saver')

Ejemplo n.º 15

0

Mostrar archivo

 def _get_refactor_probability(self):
     """
     获取重构概率
     Returns:重构概率
     """
     if self._refactor_probability is None:
         with reopen_variable_scope(
                 self.variable_scope), tf.name_scope('score'):
             self._refactor_probability = self.model.get_refactor_probability(
                 window=self._input_x,
                 missing=self._input_y,
                 n_z=self._n_z,
                 mcmc_iteration=self._mcmc_iteration,
                 last_point_only=self._last_point_only)
     return self._refactor_probability

Ejemplo n.º 16

0

Mostrar archivo

    def __init__(self,
                 hidden_net_p_x_z,
                 hidden_net_q_z_x,
                 x_dims,
                 z_dims,
                 std_epsilon=1e-4,
                 name=None,
                 scope=None):
        if not is_integer(x_dims) or x_dims <= 0:
            raise ValueError('`x_dims`必须为正整数')
        if not is_integer(z_dims) or z_dims <= 0:
            raise ValueError('`z_dims`必须为正整数')

        super(Donut, self).__init__(name=name, scope=scope)
        with reopen_variable_scope(self.variable_scope):
            # 基于VAE构造
            self._vae = VAE(
                # p(z)：均值和标准差都为z维数量大小的全零数组的一元正态分布
                p_z=Normal(mean=tf.zeros([z_dims]), std=tf.ones([z_dims])),
                # p(x|h(z))：一元正态分布
                p_x_given_z=Normal,
                # q(z|h(x))：一元正态分布
                q_z_given_x=Normal,
                # p(x|h(z))的隐藏网络：mean、std，由p(x|z)隐藏网络输入获得
                h_for_p_x=Lambda(partial(wrap_params_net,
                                         h_for_dist=hidden_net_p_x_z,
                                         mean_layer=partial(tf.layers.dense,
                                                            units=x_dims,
                                                            name='x_mean'),
                                         std_layer=partial(softplus_std,
                                                           units=x_dims,
                                                           epsilon=std_epsilon,
                                                           name='x_std')),
                                 name='p_x_given_z'),
                # q(z|h(x))的隐藏网络：mean、std，由q(z|x)隐藏网络输入获得
                h_for_q_z=Lambda(partial(wrap_params_net,
                                         h_for_dist=hidden_net_q_z_x,
                                         mean_layer=partial(tf.layers.dense,
                                                            units=z_dims,
                                                            name='z_mean'),
                                         std_layer=partial(softplus_std,
                                                           units=z_dims,
                                                           epsilon=std_epsilon,
                                                           name='z_std')),
                                 name='q_z_given_x'))
        self._x_dims = x_dims
        self._z_dims = z_dims

Ejemplo n.º 17

0

Mostrar archivo

Archivo: model.py Proyecto: HuichaoHong/monitor

    def __init__(self,
                 h_for_p_x,
                 h_for_q_z,
                 x_dims,
                 z_dims,
                 std_epsilon=1e-4,
                 name=None,
                 scope=None):
        if not is_integer(x_dims) or x_dims <= 0:
            raise ValueError('`x_dims` must be a positive integer')
        if not is_integer(z_dims) or z_dims <= 0:
            raise ValueError('`z_dims` must be a positive integer')

        super(Donut, self).__init__(name=name, scope=scope)
        with reopen_variable_scope(self.variable_scope):
            self._vae = VAE(
                p_z=Normal(mean=tf.zeros([z_dims]), std=tf.ones([z_dims])),
                p_x_given_z=Normal,
                q_z_given_x=Normal,
                h_for_p_x=Sequential([
                    h_for_p_x,
                    DictMapper(
                        {
                            'mean':
                            K.layers.Dense(x_dims),
                            'std':
                            lambda x: (std_epsilon + K.layers.Dense(
                                x_dims, activation=tf.nn.softplus)(x))
                        },
                        name='p_x_given_z')
                ]),
                h_for_q_z=Sequential([
                    h_for_q_z,
                    DictMapper(
                        {
                            'mean':
                            K.layers.Dense(z_dims),
                            'std':
                            lambda z: (std_epsilon + K.layers.Dense(
                                z_dims, activation=tf.nn.softplus)(z))
                        },
                        name='q_z_given_x')
                ]),
            )
        self._x_dims = x_dims
        self._z_dims = z_dims

Ejemplo n.º 18

0

Mostrar archivo

Archivo: _helper.py Proyecto: haowen-xu/tfsnippet-pre-alpha

    def __init__(self, p, group_event_ndims=None, check_numerics=False):
        super(_MyDistribution,
              self).__init__(group_event_ndims=group_event_ndims,
                             check_numerics=check_numerics)

        with reopen_variable_scope(self.variable_scope):
            self.p = p = tf.convert_to_tensor(
                p, dtype=get_preferred_tensor_dtype(p))

            # get the shapes of parameter
            self._static_value_shape = p.get_shape()[-1:]
            self._dynamic_value_shape = tf.convert_to_tensor(
                get_dynamic_tensor_shape(p, lambda s: s[-1:]))

            self._static_batch_shape = p.get_shape()[:-1]
            self._dynamic_batch_shape = tf.convert_to_tensor(
                get_dynamic_tensor_shape(p, lambda s: s[:-1]))

Ejemplo n.º 19

0

Mostrar archivo

Archivo: base.py Proyecto: shliujing/tfsnippet

    def build(self, input=None):
        """
        Build the layer, creating all required variables.

        Args:
            input (Tensor or list[Tensor] or None): If :meth:`build` is called
                within :meth:`apply`, it will be the input tensor(s).
                Otherwise if it is called separately, it will be :obj:`None`.
        """
        if self._has_built:
            raise RuntimeError(
                'Layer has already been built: {!r}'.format(self))
        if self._build_require_input and input is None:
            raise ValueError('`{}` requires `input` to build.'.format(
                self.__class__.__name__))
        with reopen_variable_scope(self.variable_scope):
            self._build(input)
            self._has_built = True

Ejemplo n.º 20

0

Mostrar archivo

Archivo: model.py Proyecto: WenweiGu/DONUT-SMD

    def __init__(self, h_for_p_x, h_for_q_z, x_dims, z_dims, std_epsilon=1e-4,
                 name=None, scope=None) -> object:
        if not is_integer(x_dims) or x_dims <= 0:
            raise ValueError('`x_dims` must be a positive integer')
        if not is_integer(z_dims) or z_dims <= 0:
            raise ValueError('`z_dims` must be a positive integer')

        super(Donut, self).__init__(name=name, scope=scope)
        with reopen_variable_scope(self.variable_scope):
            self._vae = VAE(
                p_z=Normal(mean=tf.zeros([z_dims]), std=tf.ones([z_dims])),
                p_x_given_z=Normal,
                q_z_given_x=Normal,
                h_for_p_x=Lambda(
                    partial(
                        wrap_params_net,
                        h_for_dist=h_for_p_x,
                        mean_layer=partial(
                            tf.layers.dense, units=x_dims, name='x_mean'
                        ),
                        std_layer=partial(
                            softplus_std, units=x_dims, epsilon=std_epsilon,
                            name='x_std'
                        )
                    ),
                    name='p_x_given_z'
                ),
                h_for_q_z=Lambda(
                    partial(
                        wrap_params_net,
                        h_for_dist=h_for_q_z,
                        mean_layer=partial(
                            tf.layers.dense, units=z_dims, name='z_mean'
                        ),
                        std_layer=partial(
                            softplus_std, units=z_dims, epsilon=std_epsilon,
                            name='z_std'
                        )
                    ),
                    name='q_z_given_x'
                )
            )
        self._x_dims = x_dims
        self._z_dims = z_dims

Ejemplo n.º 21

0

Mostrar archivo

    def build(self):
        """Build the model and the trainer.

        Although this method will be called automatically when the model
        is required to be built, however, it is recommended to call this
        method soon after the model object is constructed.
        """
        if self._has_built:
            return
        self._has_built = True

        with reopen_variable_scope(self.variable_scope):
            # create the global step variable if there's none
            if self._global_step is None:
                self._global_step = tf.get_variable(
                    'global_step', dtype=tf.int64, trainable=False,
                    initializer=np.asarray(0, dtype=np.int64)
                )

            # build the model
            self._build()

Ejemplo n.º 22

0

Mostrar archivo

Archivo: training.py Proyecto: JinYang88/OmniAnomaly

    def __init__(
        self,
        model,
        model_vs=None,
        n_z=None,
        feed_dict=None,
        valid_feed_dict=None,
        use_regularization_loss=True,
        max_epoch=256,
        max_step=None,
        batch_size=256,
        valid_batch_size=1024,
        valid_step_freq=100,
        initial_lr=0.001,
        lr_anneal_epochs=10,
        lr_anneal_factor=0.75,
        optimizer=tf.train.AdamOptimizer,
        optimizer_params=None,
        grad_clip_norm=50.0,
        check_numerics=True,
        name=None,
        scope=None,
    ):
        super(Trainer, self).__init__(name=name, scope=scope)

        # memorize the arguments
        self._model = model
        self._n_z = n_z
        if feed_dict is not None:
            self._feed_dict = dict(six.iteritems(feed_dict))
        else:
            self._feed_dict = {}
        if valid_feed_dict is not None:
            self._valid_feed_dict = dict(six.iteritems(valid_feed_dict))
        else:
            self._valid_feed_dict = self._feed_dict
        if max_epoch is None and max_step is None:
            raise ValueError(
                "At least one of `max_epoch` and `max_step` " "should be specified"
            )
        self._max_epoch = max_epoch
        self._max_step = max_step
        self._batch_size = batch_size
        self._valid_batch_size = valid_batch_size
        self._valid_step_freq = valid_step_freq
        self._initial_lr = initial_lr
        self._lr_anneal_epochs = lr_anneal_epochs
        self._lr_anneal_factor = lr_anneal_factor

        # build the trainer
        with reopen_variable_scope(self.variable_scope):
            # the global step for this model
            self._global_step = tf.get_variable(
                dtype=tf.int64,
                name="global_step",
                trainable=False,
                initializer=tf.constant(0, dtype=tf.int64),
                # reuse=True,
            )

            # input placeholders
            self._input_x = tf.placeholder(
                dtype=tf.float32,
                shape=[None, model.window_length, model.x_dims],
                name="input_x",
            )
            self._learning_rate = tf.placeholder(
                dtype=tf.float32, shape=(), name="learning_rate"
            )

            # compose the training loss
            with tf.name_scope("loss"):
                loss = model.get_training_loss(x=self._input_x, n_z=n_z)
                if use_regularization_loss:
                    loss += tf.losses.get_regularization_loss()
                self._loss = loss

            # get the training variables
            train_params = get_variables_as_dict(
                scope=model_vs, collection=tf.GraphKeys.TRAINABLE_VARIABLES
            )
            self._train_params = train_params

            # create the trainer
            if optimizer_params is None:
                optimizer_params = {}
            else:
                optimizer_params = dict(six.iteritems(optimizer_params))
            optimizer_params["learning_rate"] = self._learning_rate
            self._optimizer = optimizer(**optimizer_params)

            # derive the training gradient
            origin_grad_vars = self._optimizer.compute_gradients(
                self._loss, list(six.itervalues(self._train_params))
            )
            grad_vars = []
            for grad, var in origin_grad_vars:
                if grad is not None and var is not None:
                    if grad_clip_norm:
                        grad = tf.clip_by_norm(grad, grad_clip_norm)
                    if check_numerics:
                        grad = tf.check_numerics(
                            grad, "gradient for {} has numeric issue".format(var.name)
                        )
                    grad_vars.append((grad, var))

            # build the training op
            with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                self._train_op = self._optimizer.apply_gradients(
                    grad_vars, global_step=self._global_step
                )

            # the training summary in case `summary_dir` is specified
            with tf.name_scope("summary"):
                self._summary_op = tf.summary.merge(
                    [
                        tf.summary.histogram(v.name.rsplit(":", 1)[0], v)
                        for v in six.itervalues(self._train_params)
                    ]
                )

            # initializer for the variables
            self._trainer_initializer = tf.variables_initializer(
                list(
                    six.itervalues(
                        get_variables_as_dict(
                            scope=self.variable_scope,
                            collection=tf.GraphKeys.GLOBAL_VARIABLES,
                        )
                    )
                )
            )

Ejemplo n.º 23

0

Mostrar archivo

Archivo: training.py Proyecto: 897615138/donut

    def __init__(self,
                 model,
                 model_vs=None,
                 n_z=None,
                 feed_dict=None,
                 valid_feed_dict=None,
                 missing_data_injection_rate=0.01,
                 use_regularization_loss=True,
                 max_epoch=256,
                 max_step=None,
                 batch_size=256,
                 valid_batch_size=1024,
                 valid_step_freq=100,
                 initial_lr=0.001,
                 lr_anneal_epochs=10,
                 lr_anneal_factor=0.75,
                 optimizer=tf.train.AdamOptimizer,
                 optimizer_params=None,
                 grad_clip_norm=10.0,
                 check_numerics=True,
                 name=None,
                 scope=None):
        super(DonutTrainer, self).__init__(name=name, scope=scope)
        # 记忆参数
        self._model = model
        self._n_z = n_z
        if feed_dict is not None:
            # 迭代器->字典
            self._feed_dict = dict(six.iteritems(feed_dict))
        else:
            self._feed_dict = {}
        if valid_feed_dict is not None:
            self._valid_feed_dict = dict(six.iteritems(valid_feed_dict))
        else:
            # 为空使用feed_dict
            self._valid_feed_dict = self._feed_dict
        self._missing_data_injection_rate = missing_data_injection_rate
        # 必须有最大限制
        if max_epoch is None and max_step is None:
            raise ValueError('`max_epoch`和`max_step`至少有一个被指定')
        self._max_epoch = max_epoch
        self._max_step = max_step
        self._batch_size = batch_size
        self._valid_batch_size = valid_batch_size
        self._valid_step_freq = valid_step_freq
        self._initial_lr = initial_lr
        self._lr_anneal_epochs = lr_anneal_epochs
        self._lr_anneal_factor = lr_anneal_factor

        # 构建训练器
        with reopen_variable_scope(self.variable_scope):
            # 输入占位符 x，y输入列都为x维数，学习率为一维
            self._input_x = tf.placeholder(dtype=tf.float32,
                                           shape=[None, model.x_dims],
                                           name='input_x')
            self._input_y = tf.placeholder(dtype=tf.int32,
                                           shape=[None, model.x_dims],
                                           name='input_y')
            self._learning_rate = tf.placeholder(dtype=tf.float32,
                                                 shape=(),
                                                 name='learning_rate')
            # 弥补训练损失
            with tf.name_scope('loss'):
                loss = model.get_training_loss(x=self._input_x,
                                               y=self._input_y,
                                               n_z=n_z)
                if use_regularization_loss:
                    loss += tf.losses.get_regularization_loss()
                self._loss = loss

            # 获得训练变量
            train_params = get_variables_as_dict(
                scope=model_vs, collection=tf.GraphKeys.TRAINABLE_VARIABLES)
            self._train_params = train_params

            # 创建训练器
            if optimizer_params is None:
                optimizer_params = {}
            else:
                optimizer_params = dict(six.iteritems(optimizer_params))
            optimizer_params['learning_rate'] = self._learning_rate
            # 默认 实现Adam算法的优化器。
            self._optimizer = optimizer(**optimizer_params)

            # 推导训练梯度 对var_list中的变量计算loss的梯度
            # 该函数为函数minimize()的第一部分，返回一个以元组(gradient, variable)组成的列表
            origin_grad_vars = self._optimizer.compute_gradients(
                self._loss, list(six.itervalues(self._train_params)))
            grad_vars = []
            for grad, var in origin_grad_vars:
                if grad is not None and var is not None:
                    if grad_clip_norm:
                        # 剪辑张量值到最大l2范数。
                        grad = tf.clip_by_norm(grad, grad_clip_norm)
                    if check_numerics:
                        # 检查一个张量中的NaN和Inf值。
                        grad = tf.check_numerics(
                            grad, 'gradient for {} has numeric issue'.format(
                                var.name))
                    grad_vars.append((grad, var))

            # 构建训练操作
            # 模型的全局步长 常量初始化
            self._global_step = tf.get_variable(dtype=tf.int64,
                                                name='global_step',
                                                trainable=False,
                                                initializer=tf.constant(
                                                    0, dtype=tf.int64))
            # 保证其辖域中的操作必须要在该函数所传递的参数中的操作完成后再进行。需要在训练操作之前完成的操作。
            with tf.control_dependencies(
                    tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                # 将计算出的梯度应用到变量上，是函数minimize()的第二部分，返回一个应用指定的梯度的操作Operation，对global_step做自增操作
                self._train_op = self._optimizer.apply_gradients(
                    grad_vars, global_step=self._global_step)

            # 如果指定了`summary_dir`，则为训练摘要
            with tf.name_scope('summary'):
                self._summary_op = tf.summary.merge([
                    tf.summary.histogram(v.name.rsplit(':', 1)[0], v)
                    for v in six.itervalues(self._train_params)
                ])
            # 变量的初始化
            self._trainer_initializer = tf.variables_initializer(
                list(six.itervalues(self.get_variables_as_dict())))

Ejemplo n.º 24

0

Mostrar archivo

 def __init__(self, config, name=None, scope=None):
     self.config = config
     super(OmniAnomaly, self).__init__(name=name, scope=scope)
     with reopen_variable_scope(self.variable_scope):
         if config.posterior_flow_type == 'nf':
             self._posterior_flow = spt.layers.planar_normalizing_flows(
                 config.nf_layers, name='posterior_flow')
         else:
             self._posterior_flow = None
         self._window_length = config.window_length
         self._x_dims = config.x_dim
         self._z_dims = config.z_dim
         self._vae = VAE(
             p_z=TfpDistribution(
                 LinearGaussianStateSpaceModel(
                     num_timesteps=config.window_length,
                     transition_matrix=LinearOperatorIdentity(config.z_dim),
                     transition_noise=MultivariateNormalDiag(
                         scale_diag=tf.ones([config.z_dim])),
                     observation_matrix=LinearOperatorIdentity(
                         config.z_dim),
                     observation_noise=MultivariateNormalDiag(
                         scale_diag=tf.ones([config.z_dim])),
                     initial_state_prior=MultivariateNormalDiag(
                         scale_diag=tf.ones([config.z_dim]))))
             if config.use_connected_z_p else
             Normal(mean=tf.zeros([config.z_dim]),
                    std=tf.ones([config.z_dim])),
             p_x_given_z=Normal,
             q_z_given_x=partial(RecurrentDistribution,
                                 mean_q_mlp=partial(tf.layers.dense,
                                                    units=config.z_dim,
                                                    name='z_mean',
                                                    reuse=tf.AUTO_REUSE),
                                 std_q_mlp=partial(
                                     softplus_std,
                                     units=config.z_dim,
                                     epsilon=config.std_epsilon,
                                     name='z_std'),
                                 z_dim=config.z_dim,
                                 window_length=config.window_length)
             if config.use_connected_z_q else Normal,
             h_for_p_x=Lambda(
                 partial(wrap_params_net,
                         h_for_dist=lambda
                         x: rnn(x=x,
                                window_length=config.window_length,
                                rnn_num_hidden=config.rnn_num_hidden,
                                hidden_dense=2,
                                dense_dim=config.dense_dim,
                                name='rnn_p_x'),
                         mean_layer=partial(tf.layers.dense,
                                            units=config.x_dim,
                                            name='x_mean',
                                            reuse=tf.AUTO_REUSE),
                         std_layer=partial(softplus_std,
                                           units=config.x_dim,
                                           epsilon=config.std_epsilon,
                                           name='x_std')),
                 name='p_x_given_z'),
             h_for_q_z=Lambda(lambda x: {
                 'input_q':
                 rnn(x=x,
                     window_length=config.window_length,
                     rnn_num_hidden=config.rnn_num_hidden,
                     hidden_dense=2,
                     dense_dim=config.dense_dim,
                     name="rnn_q_z")
             },
                              name='q_z_given_x')
             if config.use_connected_z_q else Lambda(
                 partial(wrap_params_net,
                         h_for_dist=lambda
                         x: rnn(x=x,
                                window_length=config.window_length,
                                rnn_num_hidden=config.rnn_num_hidden,
                                hidden_dense=2,
                                dense_dim=config.dense_dim,
                                name="rnn_q_z"),
                         mean_layer=partial(tf.layers.dense,
                                            units=config.z_dim,
                                            name='z_mean',
                                            reuse=tf.AUTO_REUSE),
                         std_layer=partial(softplus_std,
                                           units=config.z_dim,
                                           epsilon=config.std_epsilon,
                                           name='z_std')),
                 name='q_z_given_x'))

Ejemplo n.º 25

0

Mostrar archivo

    def __init__(self,
                 model,
                 model_vs=None,
                 n_z=None,
                 feed_dict=None,
                 valid_feed_dict=None,
                 use_regularization_loss=True,
                 max_epoch=256,
                 max_step=None,
                 batch_size=256,
                 valid_batch_size=1024,
                 valid_step_freq=100,
                 initial_lr=0.001,
                 lr_anneal_epochs=10,
                 lr_anneal_factor=0.75,
                 optimizer=tf.train.AdamOptimizer,
                 optimizer_params=None,
                 grad_clip_norm=50.0,
                 check_numerics=True,
                 name=None,
                 scope=None,
                 untrainable_variables_keyvalues=None):
        super(Trainer, self).__init__(name=name, scope=scope)

        # memorize the arguments
        self._model = model
        self._n_z = n_z
        if feed_dict is not None:
            self._feed_dict = dict(six.iteritems(feed_dict))
        else:
            self._feed_dict = {}
        if valid_feed_dict is not None:
            self._valid_feed_dict = dict(six.iteritems(valid_feed_dict))
        else:
            self._valid_feed_dict = self._feed_dict
        if max_epoch is None and max_step is None:
            raise ValueError('At least one of `max_epoch` and `max_step` '
                             'should be specified')
        self._max_epoch = max_epoch
        self._max_step = max_step
        self._batch_size = batch_size
        self._valid_batch_size = valid_batch_size
        self._valid_step_freq = valid_step_freq
        self._initial_lr = initial_lr
        self._lr_anneal_epochs = lr_anneal_epochs
        self._lr_anneal_factor = lr_anneal_factor

        # build the trainer
        with reopen_variable_scope(self.variable_scope):
            # the global step for this model
            self._global_step = tf.get_variable(dtype=tf.int64,
                                                name='global_step',
                                                trainable=False,
                                                initializer=tf.constant(
                                                    0, dtype=tf.int64))

            # input placeholders
            self._input_x = tf.placeholder(
                dtype=tf.float32,
                shape=[None, model.window_length, model.x_dims],
                name='input_x')
            self._learning_rate = tf.placeholder(dtype=tf.float32,
                                                 shape=(),
                                                 name='learning_rate')

            # compose the training loss
            with tf.name_scope('loss'):
                loss = model.get_training_loss(x=self._input_x, n_z=n_z)
                if use_regularization_loss:
                    loss += tf.losses.get_regularization_loss()
                self._loss = loss

            # get the training variables
            train_params = get_variables_as_dict(
                scope=model_vs, collection=tf.GraphKeys.TRAINABLE_VARIABLES)
            print(train_params)
            self._train_params = train_params

            # create the trainer
            if optimizer_params is None:
                optimizer_params = {}
            else:
                optimizer_params = dict(six.iteritems(optimizer_params))
            optimizer_params['learning_rate'] = self._learning_rate
            self._optimizer = optimizer(**optimizer_params)

            # derive the training gradient
            origin_grad_vars = self._optimizer.compute_gradients(
                self._loss, list(six.itervalues(self._train_params)))
            grad_vars = []

            def get_variable_via_scope(scope_lst):
                vars = []
                for scope in scope_lst:
                    sc_variable = tf.get_collection(
                        tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope)
                    vars.extend(sc_variable)
                return vars

            if untrainable_variables_keyvalues is not None:
                raw_name_list = [_vars[1] for _vars in origin_grad_vars]
                all_name_list = [
                    raw_name_list[v].name for v in range(len(raw_name_list))
                ]
                untrainable_variables_list = []
                for kv in untrainable_variables_keyvalues:
                    untrainable_variables_list.extend(
                        [tfv for tfv in all_name_list if kv in tfv])
                convert_untrainable_variables_list = get_variable_via_scope(
                    untrainable_variables_list)
            else:
                convert_untrainable_variables_list = []

            for grad, var in origin_grad_vars:
                if grad is not None and var is not None:
                    if grad_clip_norm:
                        grad = tf.clip_by_norm(grad, grad_clip_norm)
                    if check_numerics:
                        grad = tf.check_numerics(
                            grad, 'gradient for {} has numeric issue'.format(
                                var.name))
                    if var in convert_untrainable_variables_list:
                        continue
                    grad_vars.append((grad, var))
            # build the training op
            with tf.control_dependencies(
                    tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                self._train_op = self._optimizer.apply_gradients(
                    grad_vars, global_step=self._global_step)

            # print(tf.trainable_variables())
            # the training summary in case `summary_dir` is specified
            with tf.name_scope('summary'):
                self._summary_op = tf.summary.merge([
                    tf.summary.histogram(v.name.rsplit(':', 1)[0], v)
                    for v in six.itervalues(self._train_params)
                ])

            # initializer for the variables
            self._trainer_initializer = tf.variables_initializer(
                list(
                    six.itervalues(
                        get_variables_as_dict(
                            scope=self.variable_scope,
                            collection=tf.GraphKeys.GLOBAL_VARIABLES))))

Ejemplo n.º 26

0

Mostrar archivo

    def __init__(self,
                 mean,
                 stddev=None,
                 logstd=None,
                 group_event_ndims=None,
                 check_numerics=False,
                 name=None,
                 scope=None):
        # check the arguments
        if (stddev is None and logstd is None) or \
                (stddev is not None and logstd is not None):
            raise ValueError('One and only one of `stddev`, `logstd` should '
                             'be specified.')
        dtype = get_preferred_tensor_dtype(mean)
        if not dtype.is_floating:
            raise TypeError('Normal distribution parameters must be float '
                            'numbers.')

        super(Normal, self).__init__(
            group_event_ndims=group_event_ndims,
            check_numerics=check_numerics,
            name=name,
            scope=scope,
        )

        with reopen_variable_scope(self.variable_scope):
            with tf.name_scope('init'):
                # obtain parameter tensors
                mean = tf.convert_to_tensor(mean, dtype=dtype)
                if stddev is not None:
                    stddev = tf.convert_to_tensor(stddev, dtype=dtype)
                    self._stdx = stddev
                    self._stdx_is_log = False
                else:
                    logstd = tf.convert_to_tensor(logstd, dtype=dtype)
                    self._stdx = logstd
                    self._stdx_is_log = True

                # check the shape and data types of parameters
                self._mean = mean
                try:
                    self._static_batch_shape = tf.broadcast_static_shape(
                        self._mean.get_shape(), self._stdx.get_shape())
                except ValueError:
                    raise ValueError(
                        '`mean` and `stddev`/`logstd` should be '
                        'broadcastable to match each other (%r vs %r).' %
                        (self._mean.get_shape(), self._stdx.get_shape()))
                self._dynamic_batch_shape = tf.broadcast_dynamic_shape(
                    tf.shape(self._mean), tf.shape(self._stdx))

                # derive the attributes of this Normal distribution
                if self._stdx_is_log:
                    self._stddev = self._check_numerics(
                        tf.exp(self._stdx, name='stddev'), 'stddev')
                    self._logstd = self._stdx
                    self._var = self._check_numerics(
                        tf.exp(tf.constant(2., dtype=dtype) * self._logstd,
                               name='variance'), 'variance')
                    self._precision = self._check_numerics(
                        tf.exp(tf.constant(-2., dtype=dtype) * self._logstd,
                               name='precision'), 'precision')
                else:
                    self._stddev = self._stdx
                    self._logstd = self._check_numerics(
                        tf.log(self._stdx, name='logstd'), 'logstd')
                    self._var = tf.square(self._stddev, name='variance')
                    self._precision = self._check_numerics(
                        tf.divide(tf.constant(1., dtype=dtype),
                                  self._var,
                                  name='precision'), 'precision')
                self._logvar = tf.multiply(tf.constant(2., dtype=dtype),
                                           self._logstd,
                                           name='logvar')
                self._log_prec = tf.negative(self._logvar,
                                             name='log_precision')

Ejemplo n.º 27

0

Mostrar archivo

Archivo: checkpoint.py Proyecto: shliujing/tfsnippet

    def __init__(self, variables, save_dir, objects=None,
                 filename='checkpoint.dat', max_to_keep=None, save_meta=True,
                 name=None, scope=None):
        """
        Construct a new :class:`CheckpointSaver`.

        Args:
            variables: A list of variables, or a dict `(name -> variable)`.
                A variable might be a :class:`tf.Variable` or a
                :class:`ScheduledVariable`.
            save_dir (str): The directory, where to place the checkpoint files.
                This directory must be solely owned by this saver.
            objects (dict[str, CheckpointSavableObject]): A dict
                `(name -> savable object)`.
            filename (str): Name of the checkpoint files.
            max_to_keep (int or None): Maximum number of versions to keep.
                If :obj:`None` or `0`, keep all versions.
            save_meta (bool): Whether or not to save the graph meta in
                 checkpoint files?
        """
        # check the argument `variables`
        def check_var(var):
            if not isinstance(var, (tf.Variable, ScheduledVariable)):
                raise TypeError('Not a variable: {!r}'.format(var))
            if isinstance(var, ScheduledVariable):
                var = var.variable
            return var

        def normalize_var_name(var):
            name = var.name
            if name.endswith(':0'):
                name = name[:-2]
            return name

        if isinstance(variables, (dict, OrderedDict)):
            variables = {
                k: check_var(v)
                for k, v in six.iteritems(variables)
            }
        else:
            variables = {
                normalize_var_name(v): v
                for v in map(check_var, variables)
            }
        if CHECKPOINT_VAR_NAME in variables:
            raise KeyError('Name is reserved for `variables`: {}'.
                           format(CHECKPOINT_VAR_NAME))

        # check the arguments `objects`
        def check_obj(obj):
            if not isinstance(obj, CheckpointSavableObject):
                raise TypeError('Not a savable object: {!r}'.format(obj))
            return obj

        objects = {k: check_obj(v) for k, v in six.iteritems(objects or {})}
        if CHECKPOINT_VAR_NAME in objects:
            raise KeyError('Name is reserved for `objects`: {}'.
                           format(CHECKPOINT_VAR_NAME))

        self._variables = variables
        self._objects = objects
        self._save_dir = os.path.abspath(save_dir)
        self._filename = str(filename)
        self._save_meta = bool(save_meta)

        super(CheckpointSaver, self).__init__(name=name, scope=scope)

        with reopen_variable_scope(self.variable_scope):
            # build the variable for serialization
            self._serial_var = None
            if self._objects:
                self._serial_var = CheckpointSerialVar()

            # add the serial var to var_dict
            var_dict = copy.copy(variables)
            if self._objects:
                var_dict[CHECKPOINT_VAR_NAME] = self._serial_var.variable
            self._var_dict = var_dict

            # now build the saver
            self._saver = tf.train.Saver(
                var_list=var_dict,
                max_to_keep=max_to_keep
            )

        # recover the internal states
        self.recover_internal_states()

Ejemplo n.º 28

0

Mostrar archivo

    def __init__(self,
                 logits=None,
                 probs=None,
                 dtype=None,
                 group_event_ndims=None,
                 check_numerics=False,
                 name=None,
                 default_name=None):
        # check the arguments
        if (logits is None and probs is None) or \
                (logits is not None and probs is not None):
            raise ValueError('One and only one of `logits`, `probs` should '
                             'be specified.')

        if logits is not None:
            param_dtype = get_preferred_tensor_dtype(logits)
        else:
            param_dtype = get_preferred_tensor_dtype(probs)
        if not param_dtype.is_floating:
            raise TypeError('Bernoulli distribution parameters must be float '
                            'numbers.')
        if dtype is None:
            dtype = tf.int32
        else:
            dtype = tf.as_dtype(dtype)

        super(Bernoulli, self).__init__(
            group_event_ndims=group_event_ndims,
            check_numerics=check_numerics,
            name=name,
            default_name=default_name,
        )

        with reopen_variable_scope(self.variable_scope):
            with tf.name_scope('init'):
                # obtain parameter tensors
                if logits is not None:
                    logits = tf.convert_to_tensor(logits,
                                                  dtype=param_dtype,
                                                  name='logits')
                    probs = tf.nn.sigmoid(logits, 'probs')
                    probs_clipped = probs
                    probs_is_derived = True
                else:
                    probs = tf.convert_to_tensor(probs,
                                                 dtype=param_dtype,
                                                 name='probs')
                    probs_eps = (1e-11 if probs.dtype == tf.float64 else 1e-7)
                    probs_clipped = tf.clip_by_value(probs, probs_eps,
                                                     1 - probs_eps)
                    logits = self._check_numerics(
                        tf.subtract(tf.log(probs_clipped),
                                    tf.log1p(-probs_clipped),
                                    name='logits'), 'logits')
                    probs_is_derived = False

                # derive the shape and data types of parameters
                logits_shape = logits.get_shape()
                self._logits = logits
                self._static_batch_shape = logits_shape
                if is_deterministic_shape(logits_shape):
                    self._dynamic_batch_shape = tf.constant(
                        logits_shape.as_list(), dtype=tf.int32)
                else:
                    self._dynamic_batch_shape = tf.shape(logits)

                # derive various distribution attributes
                self._probs = probs
                self._probs_clipped = probs_clipped
                self._probs_is_derived = probs_is_derived

                # set other attributes
                self._dtype = dtype

Ejemplo n.º 29

0

Mostrar archivo

 def test_errors(self):
     with pytest.raises(TypeError,
                        match='`var_scope` must be an instance '
                        'of `tf.VariableScope`'):
         with reopen_variable_scope(object()):
             pass

Ejemplo n.º 30

0

Mostrar archivo

Archivo: categorical.py Proyecto: paojianghu/tfsnippet

    def __init__(self,
                 logits=None,
                 probs=None,
                 group_event_ndims=None,
                 check_numerics=False,
                 name=None,
                 default_name=None):
        # check the arguments
        if (logits is None and probs is None) or \
                (logits is not None and probs is not None):
            raise ValueError('One and only one of `logits`, `probs` should '
                             'be specified.')

        if logits is not None:
            param_dtype = get_preferred_tensor_dtype(logits)
        else:
            param_dtype = get_preferred_tensor_dtype(probs)
        if not param_dtype.is_floating:
            raise TypeError(
                'Categorical distribution parameters must be float '
                'numbers.')

        super(_BaseCategorical,
              self).__init__(group_event_ndims=group_event_ndims,
                             check_numerics=check_numerics,
                             name=name,
                             default_name=default_name)

        with reopen_variable_scope(self.variable_scope):
            with tf.name_scope('init'):
                # obtain parameter tensors
                if logits is not None:
                    logits = tf.convert_to_tensor(logits, dtype=param_dtype)
                    probs = tf.nn.softmax(logits, name='probs_given_logits')
                    probs_clipped = probs
                    probs_is_derived = True
                else:
                    probs = tf.convert_to_tensor(probs, dtype=param_dtype)
                    probs_eps = (1e-11 if probs.dtype == tf.float64 else 1e-7)
                    probs_clipped = tf.clip_by_value(probs, probs_eps,
                                                     1 - probs_eps)
                    logits = self._check_numerics(
                        tf.log(probs, name='logits_given_probs'), 'logits')
                    probs_is_derived = False
                self._logits = logits
                self._probs = probs
                self._probs_is_derived = probs_is_derived
                self._probs_clipped = probs_clipped

                # derive the shape and data types of parameters
                logits_shape = logits.get_shape()
                self._static_batch_shape = logits_shape[:-1]
                if is_deterministic_shape(self._static_batch_shape):
                    self._dynamic_batch_shape = tf.constant(
                        self._static_batch_shape.as_list(), dtype=tf.int32)
                else:
                    self._dynamic_batch_shape = tf.shape(logits)[:-1]

                # infer the number of categories
                self._n_categories = logits_shape[-1].value
                if self._n_categories is None:
                    self._n_categories = tf.shape(logits)[-1]