def __init__(self, p_z, p_x_given_z, q_z_given_x, h_for_p_x, h_for_q_z, z_group_ndims=1, x_group_ndims=1, is_reparameterized=None, name=None, scope=None): """ Construct the :class:`VAE`. Args: p_z (Distribution): :math:`p(z)`, the distribution instance. p_x_given_z: :math:`p(x|h(z))`, a distribution class or a :class:`DistributionFactory` object. q_z_given_x: :math:`q(z|h(x))`, a distribution class or a :class:`DistributionFactory` object. h_for_p_x (Module): :math:`h(z)`, the hidden network module for :math:`p(x|h(z))`. The output of `h_for_p_x` must be a ``dict[str, any]``, the parameters for `p_x_given_z`. h_for_q_z (Module): :math:`h(x)`, the hidden network module for :math:`q(z|h(x))`. The output of `h_for_q_z` must be a ``dict[str, any]``, the parameters for `q_z_given_x`. z_group_ndims (int or tf.Tensor): `group_ndims` for `z`. (default 1) x_group_ndims (int or tf.Tensor): `group_ndims` for `x`. (default 1) is_reparameterized (bool or None): Whether or not `z` should be re-parameterized? (default :obj:`None`, following the settings of z distributions.) name (str): Optional name of this module (argument of :class:`~tfsnippet.utils.VarScopeObject`). scope (str): Optional scope of this module (argument of :class:`~tfsnippet.utils.VarScopeObject`). See Also: :meth:`tfsnippet.distributions.Distribution.log_prob` for contents about `group_ndims`. """ if not isinstance(p_z, Distribution): raise TypeError('`p_z` must be an instance of `Distribution`') if not callable(h_for_p_x): raise TypeError('`h_for_p_x` must be an instance of `Module` or ' 'a callable object') if not callable(h_for_q_z): raise TypeError('`h_for_q_z` must be an instance of `Module` or ' 'a callable object') super(VAE, self).__init__(name=name, scope=scope) # Defensive coding: wrap `h_for_p_x` and `h_for_q_z` in reused scope. if not isinstance(h_for_p_x, VarScopeObject): with reopen_variable_scope(self.variable_scope): h_for_p_x = Lambda(h_for_p_x, name='h_for_p_x') if not isinstance(h_for_q_z, VarScopeObject): with reopen_variable_scope(self.variable_scope): h_for_q_z = Lambda(h_for_q_z, name='h_for_q_z') self._p_z = p_z self._p_x_given_z = p_x_given_z self._q_z_given_x = q_z_given_x self._h_for_p_x = h_for_p_x self._h_for_q_z = h_for_q_z self._z_group_ndims = z_group_ndims self._x_group_ndims = x_group_ndims self._is_reparameterized = is_reparameterized
def __init__(self, logits=None, probs=None, dtype=None, group_event_ndims=None, check_numerics=False, name=None, default_name=None): if dtype is None: dtype = tf.int32 else: dtype = tf.as_dtype(dtype) super(OneHotCategorical, self).__init__(logits=logits, probs=probs, group_event_ndims=group_event_ndims, check_numerics=check_numerics, name=name, default_name=default_name) self._dtype = dtype with reopen_variable_scope(self.variable_scope): with tf.name_scope('init'): # derive the value shape of parameters logits_shape = self.logits.get_shape() self._static_value_shape = logits_shape[-1:] if is_deterministic_shape(self._static_value_shape): self._dynamic_value_shape = tf.constant( self._static_value_shape.as_list(), dtype=tf.int32) else: self._dynamic_value_shape = tf.shape(logits)[-1:]
def __init__(self, model, n_z=1024, mcmc_iteration=10, batch_size=32, feed_dict=None, last_point_only=True, name=None, scope=None): super(DonutPredictor, self).__init__(name=name, scope=scope) self._model = model self._n_z = n_z self._mcmc_iteration = mcmc_iteration self._batch_size = batch_size # 有提要字典 if feed_dict is not None: # Tensor字典->字典迭代器->字典 self._feed_dict = dict(six.iteritems(feed_dict)) else: self._feed_dict = {} self._last_point_only = last_point_only # 重新打开指定的变量作用域及其原始名称作用域。 with reopen_variable_scope(self.variable_scope): # 输入占位符 self._input_x = tf.placeholder(dtype=tf.float32, shape=[None, model.x_dims], name='input_x') self._input_y = tf.placeholder(dtype=tf.int32, shape=[None, model.x_dims], name='input_y') # 感兴趣的输出 self._refactor_probability = self._refactor_probability_without_y = None
def __init__(self, n_layers, dtype=tf.float32, name=None, scope=None): """ Construct a new :class:`MultiLayerFlow`. Args: n_layers (int): Number of flow layers. dtype: The data type of the transformed `y`. name (str): Optional name of this :class:`VariableSaver` (argument of :class:`~tfsnippet.utils.VarScopeObject`). scope (str): Optional scope of this :class:`VariableSaver` (argument of :class:`~tfsnippet.utils.VarScopeObject`). """ super(MultiLayerFlow, self).__init__( dtype=dtype, name=name, scope=scope) n_layers = int(n_layers) if n_layers < 1: raise ValueError('`n_layers` must be larger than 0.') self._n_layers = n_layers self._layer_params = [] with reopen_variable_scope(self.variable_scope): for i in range(self._n_layers): with tf.variable_scope('_{}'.format(i)): self._layer_params.append(self._create_layer_params(i))
def __init__(self, model, n_z=1024, batch_size=32, feed_dict=None, last_point_only=True, name=None, scope=None): super(Predictor, self).__init__(name=name, scope=scope) self._model = model self._n_z = n_z self._batch_size = batch_size if feed_dict is not None: self._feed_dict = dict(six.iteritems(feed_dict)) else: self._feed_dict = {} self._last_point_only = last_point_only with reopen_variable_scope(self.variable_scope): # input placeholders self._input_x = tf.placeholder( dtype=tf.float32, shape=[None, model.window_length, model.x_dims], name='input_x') self._input_y = tf.placeholder(dtype=tf.int32, shape=[None, model.window_length], name='input_y') # outputs of interest self._score = self._score_without_y = None
def __init__(self, alpha, beta, group_event_ndims=None, check_numerics=False, name=None, default_name=None): # check the arguments dtype = get_preferred_tensor_dtype(alpha) if not dtype.is_floating: raise TypeError('Gamma distribution parameters must be float ' 'numbers.') super(Gamma, self).__init__(group_event_ndims=group_event_ndims, check_numerics=check_numerics, name=name, default_name=default_name) with reopen_variable_scope(self.variable_scope): with tf.name_scope('init'): # obtain parameter tensors self._alpha = alpha = tf.convert_to_tensor(alpha, dtype=dtype) self._beta = beta = tf.convert_to_tensor(beta, dtype=dtype) # check the shape and data types of parameters try: self._static_batch_shape = tf.broadcast_static_shape( alpha.get_shape(), beta.get_shape()) except ValueError: raise ValueError( '`alpha` and `beta` should be ' 'broadcastable to match each other (%r vs %r).' % (alpha.get_shape(), beta.get_shape())) self._dynamic_batch_shape = tf.broadcast_dynamic_shape( tf.shape(alpha), tf.shape(beta))
def _get_score_without_y(self): if self._score_without_y is None: with reopen_variable_scope(self.variable_scope), \ tf.name_scope('score_without_y'): self._score_without_y = self.model.get_score( x=self._input_x, n_z=self._n_z, last_point_only=self._last_point_only) return self._score_without_y
def __call__(self, inputs, **kwargs): with reopen_variable_scope(self.variable_scope): # Here `reopen_name_scope` is set to True, so that multiple # calls to the same Module instance will always generate operations # within the original name scope. # However, in order for ``tf.variable_scope(default_name=...)`` # to work properly with variable reusing, we must generate a nested # unique name scope. with tf.name_scope('forward'): return self._forward(inputs, **kwargs)
def _get_score(self): if self._score is None: with reopen_variable_scope(self.variable_scope), \ tf.name_scope('score'): self._score = self.model.get_score( x=self._input_x, y=self._input_y, n_z=self._n_z, mcmc_iteration=self._mcmc_iteration, last_point_only=self._last_point_only) return self._score
def _get_score_without_y(self): if self._score_without_y is None: with reopen_variable_scope(self.variable_scope), tf.name_scope( "score_without_y" ): self._score_without_y, self._q_net_z = self.model.get_score( x=self._input_x, n_z=self._n_z, last_point_only=self._last_point_only, ) # print ('\t_get_score_without_y ',type(self._q_net_z)) return self._score_without_y, self._q_net_z
def test_basic(self): root = tf.get_variable_scope() with tf.variable_scope('a') as a: self._check_vs('v1', 'a', 'a/', 'a/v1:0', 'a/op:0') with reopen_variable_scope(root): self._check_vs('v2', '', '', 'v2:0', 'op:0') with reopen_variable_scope(a): self._check_vs('v3', 'a', 'a/', 'a/v3:0', 'a/op_1:0') with tf.variable_scope('a/b') as b: self._check_vs('v4', 'a/b', 'a/b/', 'a/b/v4:0', 'a/b/op:0') with reopen_variable_scope(root): self._check_vs('v5', '', '', 'v5:0', 'op_1:0') with reopen_variable_scope(a): self._check_vs('v6', 'a', 'a/', 'a/v6:0', 'a/op_2:0') with reopen_variable_scope(a): self._check_vs('v7', 'a', 'a/', 'a/v7:0', 'a/op_3:0') with reopen_variable_scope(b): self._check_vs('v8', 'a/b', 'a/b/', 'a/b/v8:0', 'a/b/op_1:0')
def _get_score_without_y(self): if self._score_without_y is None: with reopen_variable_scope(self.variable_scope), \ tf.name_scope('score_without_y'): gcn_feat = self.model.run_gcn(self._input_x, self._input_adj) self._score_without_y, self._q_net_z = self.model.get_score( x=self._input_x, x_feature=gcn_feat, n_z=self._n_z, last_point_only=self._last_point_only ) # print ('\t_get_score_without_y ',type(self._q_net_z)) return self._score_without_y, self._q_net_z
def _get_refactor_probability_without_y(self): """ 没有y时获取重构概率 Returns:没有y时获取的重构概率 """ if self._refactor_probability_without_y is None: with reopen_variable_scope(self.variable_scope), \ tf.name_scope('score_without_y'): self._refactor_probability_without_y = self.model.get_refactor_probability( window=self._input_x, n_z=self._n_z, last_point_only=self._last_point_only) return self._refactor_probability_without_y
def __init__(self, variables, save_dir, max_versions=2, filename='variables.dat', latest_file='latest', save_meta=True, name=None, scope=None): """ Construct the :class:`VariableSaver`. Args: variables (collections.Iterable[tf.Variable] or dict[str, any]): List of variables, or dict of variables with explicit keys, which should be saved and restored. save_dir (str): Directory where to place the saved variables. max_versions (int): Maximum versions to keep in the directory (Default is 2). At least 2 versions should be kept, in order to prevent corrupted checkpoint files caused by IO failure. filename (str): Name of the files of variable values (default is ``variables.dat``). latest_file (str): Name of the file which organizes the checkpoint versions (default is ``latest``). save_meta (bool): Whether or not to save meta graph (default is :obj:`True`). name (str): Name of this :class:`VariableSaver` (argument of :class:`~tfsnippet.utils.VarScopeObject`). scope (str): Scope of this :class:`VariableSaver` (argument of :class:`~tfsnippet.utils.VarScopeObject`). """ if not isinstance(variables, dict): variables = list(variables) if max_versions < 2: raise ValueError('At least 2 versions should be kept') self.variables = variables self.save_dir = os.path.abspath(save_dir) self.filename = filename self.max_versions = max_versions self.latest_file = latest_file self.save_meta = save_meta super(VariableSaver, self).__init__(scope, name) with reopen_variable_scope(self.variable_scope): self._saver = tf.train.Saver(var_list=self.variables, max_to_keep=self.max_versions, name='saver')
def _get_refactor_probability(self): """ 获取重构概率 Returns:重构概率 """ if self._refactor_probability is None: with reopen_variable_scope( self.variable_scope), tf.name_scope('score'): self._refactor_probability = self.model.get_refactor_probability( window=self._input_x, missing=self._input_y, n_z=self._n_z, mcmc_iteration=self._mcmc_iteration, last_point_only=self._last_point_only) return self._refactor_probability
def __init__(self, hidden_net_p_x_z, hidden_net_q_z_x, x_dims, z_dims, std_epsilon=1e-4, name=None, scope=None): if not is_integer(x_dims) or x_dims <= 0: raise ValueError('`x_dims`必须为正整数') if not is_integer(z_dims) or z_dims <= 0: raise ValueError('`z_dims`必须为正整数') super(Donut, self).__init__(name=name, scope=scope) with reopen_variable_scope(self.variable_scope): # 基于VAE构造 self._vae = VAE( # p(z):均值和标准差都为z维数量大小的全零数组的一元正态分布 p_z=Normal(mean=tf.zeros([z_dims]), std=tf.ones([z_dims])), # p(x|h(z)):一元正态分布 p_x_given_z=Normal, # q(z|h(x)):一元正态分布 q_z_given_x=Normal, # p(x|h(z))的隐藏网络:mean、std,由p(x|z)隐藏网络输入获得 h_for_p_x=Lambda(partial(wrap_params_net, h_for_dist=hidden_net_p_x_z, mean_layer=partial(tf.layers.dense, units=x_dims, name='x_mean'), std_layer=partial(softplus_std, units=x_dims, epsilon=std_epsilon, name='x_std')), name='p_x_given_z'), # q(z|h(x))的隐藏网络:mean、std,由q(z|x)隐藏网络输入获得 h_for_q_z=Lambda(partial(wrap_params_net, h_for_dist=hidden_net_q_z_x, mean_layer=partial(tf.layers.dense, units=z_dims, name='z_mean'), std_layer=partial(softplus_std, units=z_dims, epsilon=std_epsilon, name='z_std')), name='q_z_given_x')) self._x_dims = x_dims self._z_dims = z_dims
def __init__(self, h_for_p_x, h_for_q_z, x_dims, z_dims, std_epsilon=1e-4, name=None, scope=None): if not is_integer(x_dims) or x_dims <= 0: raise ValueError('`x_dims` must be a positive integer') if not is_integer(z_dims) or z_dims <= 0: raise ValueError('`z_dims` must be a positive integer') super(Donut, self).__init__(name=name, scope=scope) with reopen_variable_scope(self.variable_scope): self._vae = VAE( p_z=Normal(mean=tf.zeros([z_dims]), std=tf.ones([z_dims])), p_x_given_z=Normal, q_z_given_x=Normal, h_for_p_x=Sequential([ h_for_p_x, DictMapper( { 'mean': K.layers.Dense(x_dims), 'std': lambda x: (std_epsilon + K.layers.Dense( x_dims, activation=tf.nn.softplus)(x)) }, name='p_x_given_z') ]), h_for_q_z=Sequential([ h_for_q_z, DictMapper( { 'mean': K.layers.Dense(z_dims), 'std': lambda z: (std_epsilon + K.layers.Dense( z_dims, activation=tf.nn.softplus)(z)) }, name='q_z_given_x') ]), ) self._x_dims = x_dims self._z_dims = z_dims
def __init__(self, p, group_event_ndims=None, check_numerics=False): super(_MyDistribution, self).__init__(group_event_ndims=group_event_ndims, check_numerics=check_numerics) with reopen_variable_scope(self.variable_scope): self.p = p = tf.convert_to_tensor( p, dtype=get_preferred_tensor_dtype(p)) # get the shapes of parameter self._static_value_shape = p.get_shape()[-1:] self._dynamic_value_shape = tf.convert_to_tensor( get_dynamic_tensor_shape(p, lambda s: s[-1:])) self._static_batch_shape = p.get_shape()[:-1] self._dynamic_batch_shape = tf.convert_to_tensor( get_dynamic_tensor_shape(p, lambda s: s[:-1]))
def build(self, input=None): """ Build the layer, creating all required variables. Args: input (Tensor or list[Tensor] or None): If :meth:`build` is called within :meth:`apply`, it will be the input tensor(s). Otherwise if it is called separately, it will be :obj:`None`. """ if self._has_built: raise RuntimeError( 'Layer has already been built: {!r}'.format(self)) if self._build_require_input and input is None: raise ValueError('`{}` requires `input` to build.'.format( self.__class__.__name__)) with reopen_variable_scope(self.variable_scope): self._build(input) self._has_built = True
def __init__(self, h_for_p_x, h_for_q_z, x_dims, z_dims, std_epsilon=1e-4, name=None, scope=None) -> object: if not is_integer(x_dims) or x_dims <= 0: raise ValueError('`x_dims` must be a positive integer') if not is_integer(z_dims) or z_dims <= 0: raise ValueError('`z_dims` must be a positive integer') super(Donut, self).__init__(name=name, scope=scope) with reopen_variable_scope(self.variable_scope): self._vae = VAE( p_z=Normal(mean=tf.zeros([z_dims]), std=tf.ones([z_dims])), p_x_given_z=Normal, q_z_given_x=Normal, h_for_p_x=Lambda( partial( wrap_params_net, h_for_dist=h_for_p_x, mean_layer=partial( tf.layers.dense, units=x_dims, name='x_mean' ), std_layer=partial( softplus_std, units=x_dims, epsilon=std_epsilon, name='x_std' ) ), name='p_x_given_z' ), h_for_q_z=Lambda( partial( wrap_params_net, h_for_dist=h_for_q_z, mean_layer=partial( tf.layers.dense, units=z_dims, name='z_mean' ), std_layer=partial( softplus_std, units=z_dims, epsilon=std_epsilon, name='z_std' ) ), name='q_z_given_x' ) ) self._x_dims = x_dims self._z_dims = z_dims
def build(self): """Build the model and the trainer. Although this method will be called automatically when the model is required to be built, however, it is recommended to call this method soon after the model object is constructed. """ if self._has_built: return self._has_built = True with reopen_variable_scope(self.variable_scope): # create the global step variable if there's none if self._global_step is None: self._global_step = tf.get_variable( 'global_step', dtype=tf.int64, trainable=False, initializer=np.asarray(0, dtype=np.int64) ) # build the model self._build()
def __init__( self, model, model_vs=None, n_z=None, feed_dict=None, valid_feed_dict=None, use_regularization_loss=True, max_epoch=256, max_step=None, batch_size=256, valid_batch_size=1024, valid_step_freq=100, initial_lr=0.001, lr_anneal_epochs=10, lr_anneal_factor=0.75, optimizer=tf.train.AdamOptimizer, optimizer_params=None, grad_clip_norm=50.0, check_numerics=True, name=None, scope=None, ): super(Trainer, self).__init__(name=name, scope=scope) # memorize the arguments self._model = model self._n_z = n_z if feed_dict is not None: self._feed_dict = dict(six.iteritems(feed_dict)) else: self._feed_dict = {} if valid_feed_dict is not None: self._valid_feed_dict = dict(six.iteritems(valid_feed_dict)) else: self._valid_feed_dict = self._feed_dict if max_epoch is None and max_step is None: raise ValueError( "At least one of `max_epoch` and `max_step` " "should be specified" ) self._max_epoch = max_epoch self._max_step = max_step self._batch_size = batch_size self._valid_batch_size = valid_batch_size self._valid_step_freq = valid_step_freq self._initial_lr = initial_lr self._lr_anneal_epochs = lr_anneal_epochs self._lr_anneal_factor = lr_anneal_factor # build the trainer with reopen_variable_scope(self.variable_scope): # the global step for this model self._global_step = tf.get_variable( dtype=tf.int64, name="global_step", trainable=False, initializer=tf.constant(0, dtype=tf.int64), # reuse=True, ) # input placeholders self._input_x = tf.placeholder( dtype=tf.float32, shape=[None, model.window_length, model.x_dims], name="input_x", ) self._learning_rate = tf.placeholder( dtype=tf.float32, shape=(), name="learning_rate" ) # compose the training loss with tf.name_scope("loss"): loss = model.get_training_loss(x=self._input_x, n_z=n_z) if use_regularization_loss: loss += tf.losses.get_regularization_loss() self._loss = loss # get the training variables train_params = get_variables_as_dict( scope=model_vs, collection=tf.GraphKeys.TRAINABLE_VARIABLES ) self._train_params = train_params # create the trainer if optimizer_params is None: optimizer_params = {} else: optimizer_params = dict(six.iteritems(optimizer_params)) optimizer_params["learning_rate"] = self._learning_rate self._optimizer = optimizer(**optimizer_params) # derive the training gradient origin_grad_vars = self._optimizer.compute_gradients( self._loss, list(six.itervalues(self._train_params)) ) grad_vars = [] for grad, var in origin_grad_vars: if grad is not None and var is not None: if grad_clip_norm: grad = tf.clip_by_norm(grad, grad_clip_norm) if check_numerics: grad = tf.check_numerics( grad, "gradient for {} has numeric issue".format(var.name) ) grad_vars.append((grad, var)) # build the training op with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): self._train_op = self._optimizer.apply_gradients( grad_vars, global_step=self._global_step ) # the training summary in case `summary_dir` is specified with tf.name_scope("summary"): self._summary_op = tf.summary.merge( [ tf.summary.histogram(v.name.rsplit(":", 1)[0], v) for v in six.itervalues(self._train_params) ] ) # initializer for the variables self._trainer_initializer = tf.variables_initializer( list( six.itervalues( get_variables_as_dict( scope=self.variable_scope, collection=tf.GraphKeys.GLOBAL_VARIABLES, ) ) ) )
def __init__(self, model, model_vs=None, n_z=None, feed_dict=None, valid_feed_dict=None, missing_data_injection_rate=0.01, use_regularization_loss=True, max_epoch=256, max_step=None, batch_size=256, valid_batch_size=1024, valid_step_freq=100, initial_lr=0.001, lr_anneal_epochs=10, lr_anneal_factor=0.75, optimizer=tf.train.AdamOptimizer, optimizer_params=None, grad_clip_norm=10.0, check_numerics=True, name=None, scope=None): super(DonutTrainer, self).__init__(name=name, scope=scope) # 记忆参数 self._model = model self._n_z = n_z if feed_dict is not None: # 迭代器->字典 self._feed_dict = dict(six.iteritems(feed_dict)) else: self._feed_dict = {} if valid_feed_dict is not None: self._valid_feed_dict = dict(six.iteritems(valid_feed_dict)) else: # 为空使用feed_dict self._valid_feed_dict = self._feed_dict self._missing_data_injection_rate = missing_data_injection_rate # 必须有最大限制 if max_epoch is None and max_step is None: raise ValueError('`max_epoch`和`max_step`至少有一个被指定') self._max_epoch = max_epoch self._max_step = max_step self._batch_size = batch_size self._valid_batch_size = valid_batch_size self._valid_step_freq = valid_step_freq self._initial_lr = initial_lr self._lr_anneal_epochs = lr_anneal_epochs self._lr_anneal_factor = lr_anneal_factor # 构建训练器 with reopen_variable_scope(self.variable_scope): # 输入占位符 x,y输入列都为x维数,学习率为一维 self._input_x = tf.placeholder(dtype=tf.float32, shape=[None, model.x_dims], name='input_x') self._input_y = tf.placeholder(dtype=tf.int32, shape=[None, model.x_dims], name='input_y') self._learning_rate = tf.placeholder(dtype=tf.float32, shape=(), name='learning_rate') # 弥补训练损失 with tf.name_scope('loss'): loss = model.get_training_loss(x=self._input_x, y=self._input_y, n_z=n_z) if use_regularization_loss: loss += tf.losses.get_regularization_loss() self._loss = loss # 获得训练变量 train_params = get_variables_as_dict( scope=model_vs, collection=tf.GraphKeys.TRAINABLE_VARIABLES) self._train_params = train_params # 创建训练器 if optimizer_params is None: optimizer_params = {} else: optimizer_params = dict(six.iteritems(optimizer_params)) optimizer_params['learning_rate'] = self._learning_rate # 默认 实现Adam算法的优化器。 self._optimizer = optimizer(**optimizer_params) # 推导训练梯度 对var_list中的变量计算loss的梯度 # 该函数为函数minimize()的第一部分,返回一个以元组(gradient, variable)组成的列表 origin_grad_vars = self._optimizer.compute_gradients( self._loss, list(six.itervalues(self._train_params))) grad_vars = [] for grad, var in origin_grad_vars: if grad is not None and var is not None: if grad_clip_norm: # 剪辑张量值到最大l2范数。 grad = tf.clip_by_norm(grad, grad_clip_norm) if check_numerics: # 检查一个张量中的NaN和Inf值。 grad = tf.check_numerics( grad, 'gradient for {} has numeric issue'.format( var.name)) grad_vars.append((grad, var)) # 构建训练操作 # 模型的全局步长 常量初始化 self._global_step = tf.get_variable(dtype=tf.int64, name='global_step', trainable=False, initializer=tf.constant( 0, dtype=tf.int64)) # 保证其辖域中的操作必须要在该函数所传递的参数中的操作完成后再进行。需要在训练操作之前完成的操作。 with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): # 将计算出的梯度应用到变量上,是函数minimize()的第二部分,返回一个应用指定的梯度的操作Operation,对global_step做自增操作 self._train_op = self._optimizer.apply_gradients( grad_vars, global_step=self._global_step) # 如果指定了`summary_dir`,则为训练摘要 with tf.name_scope('summary'): self._summary_op = tf.summary.merge([ tf.summary.histogram(v.name.rsplit(':', 1)[0], v) for v in six.itervalues(self._train_params) ]) # 变量的初始化 self._trainer_initializer = tf.variables_initializer( list(six.itervalues(self.get_variables_as_dict())))
def __init__(self, config, name=None, scope=None): self.config = config super(OmniAnomaly, self).__init__(name=name, scope=scope) with reopen_variable_scope(self.variable_scope): if config.posterior_flow_type == 'nf': self._posterior_flow = spt.layers.planar_normalizing_flows( config.nf_layers, name='posterior_flow') else: self._posterior_flow = None self._window_length = config.window_length self._x_dims = config.x_dim self._z_dims = config.z_dim self._vae = VAE( p_z=TfpDistribution( LinearGaussianStateSpaceModel( num_timesteps=config.window_length, transition_matrix=LinearOperatorIdentity(config.z_dim), transition_noise=MultivariateNormalDiag( scale_diag=tf.ones([config.z_dim])), observation_matrix=LinearOperatorIdentity( config.z_dim), observation_noise=MultivariateNormalDiag( scale_diag=tf.ones([config.z_dim])), initial_state_prior=MultivariateNormalDiag( scale_diag=tf.ones([config.z_dim])))) if config.use_connected_z_p else Normal(mean=tf.zeros([config.z_dim]), std=tf.ones([config.z_dim])), p_x_given_z=Normal, q_z_given_x=partial(RecurrentDistribution, mean_q_mlp=partial(tf.layers.dense, units=config.z_dim, name='z_mean', reuse=tf.AUTO_REUSE), std_q_mlp=partial( softplus_std, units=config.z_dim, epsilon=config.std_epsilon, name='z_std'), z_dim=config.z_dim, window_length=config.window_length) if config.use_connected_z_q else Normal, h_for_p_x=Lambda( partial(wrap_params_net, h_for_dist=lambda x: rnn(x=x, window_length=config.window_length, rnn_num_hidden=config.rnn_num_hidden, hidden_dense=2, dense_dim=config.dense_dim, name='rnn_p_x'), mean_layer=partial(tf.layers.dense, units=config.x_dim, name='x_mean', reuse=tf.AUTO_REUSE), std_layer=partial(softplus_std, units=config.x_dim, epsilon=config.std_epsilon, name='x_std')), name='p_x_given_z'), h_for_q_z=Lambda(lambda x: { 'input_q': rnn(x=x, window_length=config.window_length, rnn_num_hidden=config.rnn_num_hidden, hidden_dense=2, dense_dim=config.dense_dim, name="rnn_q_z") }, name='q_z_given_x') if config.use_connected_z_q else Lambda( partial(wrap_params_net, h_for_dist=lambda x: rnn(x=x, window_length=config.window_length, rnn_num_hidden=config.rnn_num_hidden, hidden_dense=2, dense_dim=config.dense_dim, name="rnn_q_z"), mean_layer=partial(tf.layers.dense, units=config.z_dim, name='z_mean', reuse=tf.AUTO_REUSE), std_layer=partial(softplus_std, units=config.z_dim, epsilon=config.std_epsilon, name='z_std')), name='q_z_given_x'))
def __init__(self, model, model_vs=None, n_z=None, feed_dict=None, valid_feed_dict=None, use_regularization_loss=True, max_epoch=256, max_step=None, batch_size=256, valid_batch_size=1024, valid_step_freq=100, initial_lr=0.001, lr_anneal_epochs=10, lr_anneal_factor=0.75, optimizer=tf.train.AdamOptimizer, optimizer_params=None, grad_clip_norm=50.0, check_numerics=True, name=None, scope=None, untrainable_variables_keyvalues=None): super(Trainer, self).__init__(name=name, scope=scope) # memorize the arguments self._model = model self._n_z = n_z if feed_dict is not None: self._feed_dict = dict(six.iteritems(feed_dict)) else: self._feed_dict = {} if valid_feed_dict is not None: self._valid_feed_dict = dict(six.iteritems(valid_feed_dict)) else: self._valid_feed_dict = self._feed_dict if max_epoch is None and max_step is None: raise ValueError('At least one of `max_epoch` and `max_step` ' 'should be specified') self._max_epoch = max_epoch self._max_step = max_step self._batch_size = batch_size self._valid_batch_size = valid_batch_size self._valid_step_freq = valid_step_freq self._initial_lr = initial_lr self._lr_anneal_epochs = lr_anneal_epochs self._lr_anneal_factor = lr_anneal_factor # build the trainer with reopen_variable_scope(self.variable_scope): # the global step for this model self._global_step = tf.get_variable(dtype=tf.int64, name='global_step', trainable=False, initializer=tf.constant( 0, dtype=tf.int64)) # input placeholders self._input_x = tf.placeholder( dtype=tf.float32, shape=[None, model.window_length, model.x_dims], name='input_x') self._learning_rate = tf.placeholder(dtype=tf.float32, shape=(), name='learning_rate') # compose the training loss with tf.name_scope('loss'): loss = model.get_training_loss(x=self._input_x, n_z=n_z) if use_regularization_loss: loss += tf.losses.get_regularization_loss() self._loss = loss # get the training variables train_params = get_variables_as_dict( scope=model_vs, collection=tf.GraphKeys.TRAINABLE_VARIABLES) print(train_params) self._train_params = train_params # create the trainer if optimizer_params is None: optimizer_params = {} else: optimizer_params = dict(six.iteritems(optimizer_params)) optimizer_params['learning_rate'] = self._learning_rate self._optimizer = optimizer(**optimizer_params) # derive the training gradient origin_grad_vars = self._optimizer.compute_gradients( self._loss, list(six.itervalues(self._train_params))) grad_vars = [] def get_variable_via_scope(scope_lst): vars = [] for scope in scope_lst: sc_variable = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) vars.extend(sc_variable) return vars if untrainable_variables_keyvalues is not None: raw_name_list = [_vars[1] for _vars in origin_grad_vars] all_name_list = [ raw_name_list[v].name for v in range(len(raw_name_list)) ] untrainable_variables_list = [] for kv in untrainable_variables_keyvalues: untrainable_variables_list.extend( [tfv for tfv in all_name_list if kv in tfv]) convert_untrainable_variables_list = get_variable_via_scope( untrainable_variables_list) else: convert_untrainable_variables_list = [] for grad, var in origin_grad_vars: if grad is not None and var is not None: if grad_clip_norm: grad = tf.clip_by_norm(grad, grad_clip_norm) if check_numerics: grad = tf.check_numerics( grad, 'gradient for {} has numeric issue'.format( var.name)) if var in convert_untrainable_variables_list: continue grad_vars.append((grad, var)) # build the training op with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): self._train_op = self._optimizer.apply_gradients( grad_vars, global_step=self._global_step) # print(tf.trainable_variables()) # the training summary in case `summary_dir` is specified with tf.name_scope('summary'): self._summary_op = tf.summary.merge([ tf.summary.histogram(v.name.rsplit(':', 1)[0], v) for v in six.itervalues(self._train_params) ]) # initializer for the variables self._trainer_initializer = tf.variables_initializer( list( six.itervalues( get_variables_as_dict( scope=self.variable_scope, collection=tf.GraphKeys.GLOBAL_VARIABLES))))
def __init__(self, mean, stddev=None, logstd=None, group_event_ndims=None, check_numerics=False, name=None, scope=None): # check the arguments if (stddev is None and logstd is None) or \ (stddev is not None and logstd is not None): raise ValueError('One and only one of `stddev`, `logstd` should ' 'be specified.') dtype = get_preferred_tensor_dtype(mean) if not dtype.is_floating: raise TypeError('Normal distribution parameters must be float ' 'numbers.') super(Normal, self).__init__( group_event_ndims=group_event_ndims, check_numerics=check_numerics, name=name, scope=scope, ) with reopen_variable_scope(self.variable_scope): with tf.name_scope('init'): # obtain parameter tensors mean = tf.convert_to_tensor(mean, dtype=dtype) if stddev is not None: stddev = tf.convert_to_tensor(stddev, dtype=dtype) self._stdx = stddev self._stdx_is_log = False else: logstd = tf.convert_to_tensor(logstd, dtype=dtype) self._stdx = logstd self._stdx_is_log = True # check the shape and data types of parameters self._mean = mean try: self._static_batch_shape = tf.broadcast_static_shape( self._mean.get_shape(), self._stdx.get_shape()) except ValueError: raise ValueError( '`mean` and `stddev`/`logstd` should be ' 'broadcastable to match each other (%r vs %r).' % (self._mean.get_shape(), self._stdx.get_shape())) self._dynamic_batch_shape = tf.broadcast_dynamic_shape( tf.shape(self._mean), tf.shape(self._stdx)) # derive the attributes of this Normal distribution if self._stdx_is_log: self._stddev = self._check_numerics( tf.exp(self._stdx, name='stddev'), 'stddev') self._logstd = self._stdx self._var = self._check_numerics( tf.exp(tf.constant(2., dtype=dtype) * self._logstd, name='variance'), 'variance') self._precision = self._check_numerics( tf.exp(tf.constant(-2., dtype=dtype) * self._logstd, name='precision'), 'precision') else: self._stddev = self._stdx self._logstd = self._check_numerics( tf.log(self._stdx, name='logstd'), 'logstd') self._var = tf.square(self._stddev, name='variance') self._precision = self._check_numerics( tf.divide(tf.constant(1., dtype=dtype), self._var, name='precision'), 'precision') self._logvar = tf.multiply(tf.constant(2., dtype=dtype), self._logstd, name='logvar') self._log_prec = tf.negative(self._logvar, name='log_precision')
def __init__(self, variables, save_dir, objects=None, filename='checkpoint.dat', max_to_keep=None, save_meta=True, name=None, scope=None): """ Construct a new :class:`CheckpointSaver`. Args: variables: A list of variables, or a dict `(name -> variable)`. A variable might be a :class:`tf.Variable` or a :class:`ScheduledVariable`. save_dir (str): The directory, where to place the checkpoint files. This directory must be solely owned by this saver. objects (dict[str, CheckpointSavableObject]): A dict `(name -> savable object)`. filename (str): Name of the checkpoint files. max_to_keep (int or None): Maximum number of versions to keep. If :obj:`None` or `0`, keep all versions. save_meta (bool): Whether or not to save the graph meta in checkpoint files? """ # check the argument `variables` def check_var(var): if not isinstance(var, (tf.Variable, ScheduledVariable)): raise TypeError('Not a variable: {!r}'.format(var)) if isinstance(var, ScheduledVariable): var = var.variable return var def normalize_var_name(var): name = var.name if name.endswith(':0'): name = name[:-2] return name if isinstance(variables, (dict, OrderedDict)): variables = { k: check_var(v) for k, v in six.iteritems(variables) } else: variables = { normalize_var_name(v): v for v in map(check_var, variables) } if CHECKPOINT_VAR_NAME in variables: raise KeyError('Name is reserved for `variables`: {}'. format(CHECKPOINT_VAR_NAME)) # check the arguments `objects` def check_obj(obj): if not isinstance(obj, CheckpointSavableObject): raise TypeError('Not a savable object: {!r}'.format(obj)) return obj objects = {k: check_obj(v) for k, v in six.iteritems(objects or {})} if CHECKPOINT_VAR_NAME in objects: raise KeyError('Name is reserved for `objects`: {}'. format(CHECKPOINT_VAR_NAME)) self._variables = variables self._objects = objects self._save_dir = os.path.abspath(save_dir) self._filename = str(filename) self._save_meta = bool(save_meta) super(CheckpointSaver, self).__init__(name=name, scope=scope) with reopen_variable_scope(self.variable_scope): # build the variable for serialization self._serial_var = None if self._objects: self._serial_var = CheckpointSerialVar() # add the serial var to var_dict var_dict = copy.copy(variables) if self._objects: var_dict[CHECKPOINT_VAR_NAME] = self._serial_var.variable self._var_dict = var_dict # now build the saver self._saver = tf.train.Saver( var_list=var_dict, max_to_keep=max_to_keep ) # recover the internal states self.recover_internal_states()
def __init__(self, logits=None, probs=None, dtype=None, group_event_ndims=None, check_numerics=False, name=None, default_name=None): # check the arguments if (logits is None and probs is None) or \ (logits is not None and probs is not None): raise ValueError('One and only one of `logits`, `probs` should ' 'be specified.') if logits is not None: param_dtype = get_preferred_tensor_dtype(logits) else: param_dtype = get_preferred_tensor_dtype(probs) if not param_dtype.is_floating: raise TypeError('Bernoulli distribution parameters must be float ' 'numbers.') if dtype is None: dtype = tf.int32 else: dtype = tf.as_dtype(dtype) super(Bernoulli, self).__init__( group_event_ndims=group_event_ndims, check_numerics=check_numerics, name=name, default_name=default_name, ) with reopen_variable_scope(self.variable_scope): with tf.name_scope('init'): # obtain parameter tensors if logits is not None: logits = tf.convert_to_tensor(logits, dtype=param_dtype, name='logits') probs = tf.nn.sigmoid(logits, 'probs') probs_clipped = probs probs_is_derived = True else: probs = tf.convert_to_tensor(probs, dtype=param_dtype, name='probs') probs_eps = (1e-11 if probs.dtype == tf.float64 else 1e-7) probs_clipped = tf.clip_by_value(probs, probs_eps, 1 - probs_eps) logits = self._check_numerics( tf.subtract(tf.log(probs_clipped), tf.log1p(-probs_clipped), name='logits'), 'logits') probs_is_derived = False # derive the shape and data types of parameters logits_shape = logits.get_shape() self._logits = logits self._static_batch_shape = logits_shape if is_deterministic_shape(logits_shape): self._dynamic_batch_shape = tf.constant( logits_shape.as_list(), dtype=tf.int32) else: self._dynamic_batch_shape = tf.shape(logits) # derive various distribution attributes self._probs = probs self._probs_clipped = probs_clipped self._probs_is_derived = probs_is_derived # set other attributes self._dtype = dtype
def test_errors(self): with pytest.raises(TypeError, match='`var_scope` must be an instance ' 'of `tf.VariableScope`'): with reopen_variable_scope(object()): pass
def __init__(self, logits=None, probs=None, group_event_ndims=None, check_numerics=False, name=None, default_name=None): # check the arguments if (logits is None and probs is None) or \ (logits is not None and probs is not None): raise ValueError('One and only one of `logits`, `probs` should ' 'be specified.') if logits is not None: param_dtype = get_preferred_tensor_dtype(logits) else: param_dtype = get_preferred_tensor_dtype(probs) if not param_dtype.is_floating: raise TypeError( 'Categorical distribution parameters must be float ' 'numbers.') super(_BaseCategorical, self).__init__(group_event_ndims=group_event_ndims, check_numerics=check_numerics, name=name, default_name=default_name) with reopen_variable_scope(self.variable_scope): with tf.name_scope('init'): # obtain parameter tensors if logits is not None: logits = tf.convert_to_tensor(logits, dtype=param_dtype) probs = tf.nn.softmax(logits, name='probs_given_logits') probs_clipped = probs probs_is_derived = True else: probs = tf.convert_to_tensor(probs, dtype=param_dtype) probs_eps = (1e-11 if probs.dtype == tf.float64 else 1e-7) probs_clipped = tf.clip_by_value(probs, probs_eps, 1 - probs_eps) logits = self._check_numerics( tf.log(probs, name='logits_given_probs'), 'logits') probs_is_derived = False self._logits = logits self._probs = probs self._probs_is_derived = probs_is_derived self._probs_clipped = probs_clipped # derive the shape and data types of parameters logits_shape = logits.get_shape() self._static_batch_shape = logits_shape[:-1] if is_deterministic_shape(self._static_batch_shape): self._dynamic_batch_shape = tf.constant( self._static_batch_shape.as_list(), dtype=tf.int32) else: self._dynamic_batch_shape = tf.shape(logits)[:-1] # infer the number of categories self._n_categories = logits_shape[-1].value if self._n_categories is None: self._n_categories = tf.shape(logits)[-1]