def inference_ref2(input, is_train, use_bnorm=False): with tf.name_scope('conv1'): x = tf.layers.conv2d( inputs=input, filters=32, kernel_size=3, padding="same", activation=tf.nn.relu, use_bias=not use_bnorm, kernel_initializer=init_ops.glorot_normal_initializer()) if use_bnorm: x = tf.layers.batch_normalization(inputs=x, training=is_train) x = tf.layers.conv2d( inputs=x, filters=32, kernel_size=3, padding="same", activation=tf.nn.relu, use_bias=not use_bnorm, kernel_initializer=init_ops.glorot_normal_initializer()) if use_bnorm: x = tf.layers.batch_normalization(inputs=x, training=is_train) x = tf.layers.max_pooling2d(inputs=x, pool_size=2, strides=2) with tf.name_scope('conv2'): x = tf.layers.conv2d( inputs=x, filters=64, kernel_size=3, padding="same", activation=tf.nn.relu, use_bias=not use_bnorm, kernel_initializer=init_ops.glorot_normal_initializer()) if use_bnorm: x = tf.layers.batch_normalization(inputs=x, training=is_train) x = tf.layers.conv2d( inputs=x, filters=64, kernel_size=3, padding="same", activation=tf.nn.relu, use_bias=not use_bnorm, kernel_initializer=init_ops.glorot_normal_initializer()) if use_bnorm: x = tf.layers.batch_normalization(inputs=x, training=is_train) x = tf.layers.max_pooling2d(inputs=x, pool_size=2, strides=2) with tf.name_scope('dense1'): # x = tf.reshape(x, [-1, np.prod(x.get_shape()[1:])]) x = tf.reshape(x, [x.get_shape()[0].value, -1]) x = tf.layers.dense(inputs=x, units=512, activation=tf.nn.relu) with tf.name_scope('dense2'): x = tf.layers.dense(inputs=x, units=cifar10.NB_CLASSES) return x
def test_glorot_normal_initializer(self): tensor_shape = (5, 6, 4, 2) with self.cached_session(): fan_in, fan_out = init_ops._compute_fans(tensor_shape) std = np.sqrt(2. / (fan_in + fan_out)) self._runner(init_ops.glorot_normal_initializer(seed=123), tensor_shape, target_mean=0., target_std=std)
def __init__(self, num_units, depth, forget_bias=1.0, state_is_tuple=True, use_peepholes=False, activation=None, gate_activation=None, cell_activation=None, initializer=None, input_gate_initializer=None, use_bias=True, reuse=None, name=None): """Initialize the basic NLSTM cell. Args: num_units: `int`, The number of hidden units of each cell state and hidden state. depth: `int`, The number of layers in the nest. forget_bias: `float`, The bias added to forget gates. state_is_tuple: If `True`, accepted and returned states are tuples of the `h_state` and `c_state`s. If `False`, they are concatenated along the column axis. The latter behavior will soon be deprecated. use_peepholes: `bool`(optional). activation: Activation function of the update values, including new inputs and new cell states. Default: `tanh`. gate_activation: Activation function of the gates, including the input, ouput, and forget gate. Default: `sigmoid`. cell_activation: Activation function of the first cell gate. Default: `identity`. Note that in the paper only the first cell_activation is identity. initializer: Initializer of kernel. Default: `orthogonal_initializer`. input_gate_initializer: Initializer of input gates. Default: `glorot_normal_initializer`. use_bias: `bool`. Default: `True`. reuse: `bool`(optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. name: `str`, the name of the layer. Layers with the same name will share weights, but to avoid mistakes we require reuse=True in such cases. """ super(NLSTMCell, self).__init__(_reuse=reuse, name=name) if not state_is_tuple: logging.warn("%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) # Inputs must be 2-dimensional. self.input_spec = base_layer.InputSpec(ndim=2) self._num_units = num_units self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._use_peepholes = use_peepholes self._depth = depth self._activation = activation or math_ops.tanh self._gate_activation = gate_activation or math_ops.sigmoid self._cell_activation = cell_activation or array_ops.identity self._initializer = initializer or init_ops.orthogonal_initializer() self._input_gate_initializer = (input_gate_initializer or init_ops.glorot_normal_initializer()) self._use_bias = use_bias self._kernels = None self._biases = None self.built = False
def test_glorot_normal_initializer(self): tensor_shape = (5, 6, 4, 2) with self.cached_session(): fan_in, fan_out = init_ops._compute_fans(tensor_shape) std = np.sqrt(2. / (fan_in + fan_out)) self._runner( init_ops.glorot_normal_initializer(seed=123), tensor_shape, target_mean=0., target_std=std)
def suanet_v2_arg_scope(_weight_decay=weight_decay): with arg_scope([layers.conv2d, layers_lib.fully_connected], activation_fn=nn_ops.relu, biases_initializer=init_ops.glorot_normal_initializer(), weights_regularizer=regularizers.l2_regularizer( _weight_decay)): with arg_scope([layers.conv2d], padding='SAME'): with arg_scope([layers_lib.max_pool2d], padding='SAME') as arg_sc: return arg_sc
def clone_model_fn(features, labels, mode): """Model function for DN.""" # Input Layer input_layer = tf.convert_to_tensor(features['x']) hidden_layer1 = tf.layers.dense( inputs=input_layer, activation=tf.nn.tanh, kernel_initializer=init_ops.glorot_uniform_initializer(), bias_initializer=init_ops.glorot_normal_initializer(), units=64) drop1 = tf.layers.dropout(inputs=hidden_layer1, rate=0.2) output_layer = tf.layers.dense( inputs=drop1, activation=tf.nn.tanh, bias_initializer=init_ops.glorot_uniform_initializer(), units=17) predictions = { # Generate predictions (for PREDICT and EVAL mode) "actions": output_layer, # Add `softmax_tensor` to the graph. It is used for PREDICT and by the # `logging_hook`. "probabilities": tf.nn.softmax(output_layer, name="softmax_tensor"), } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Calculate Loss (for both TRAIN and EVAL modes) loss = tf.losses.mean_squared_error(labels=labels, predictions=output_layer) # Configure the Training Op (for TRAIN mode) if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.ProximalAdagradOptimizer(learning_rate=0.01) train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) # Add evaluation metrics (for EVAL mode) eval_metric_ops = { "mse": tf.metrics.mean_squared_error(labels=labels, predictions=predictions["actions"]) } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def MADE_layer(inputs, units, mask, name, activation): kernel_initializer = init_ops.glorot_normal_initializer() def masked_initializer( shape=(units, inputs), dtype=None, partition_info=None): return mask.T * kernel_initializer(shape, dtype, partition_info) with ops.name_scope(name, "masked_dense", [inputs, units]): layer = layers.Dense(units, activation=activation, kernel_initializer=masked_initializer, kernel_constraint=lambda x: mask.T * x, name=name) #,_reuse=tf.AUTO_REUSE) return layer.apply(inputs)
def _deep_architecture(input_layer, params, mode): """ Return the output operation following the deep part of network. :param input_layer: (Tensor) Input :param params: (HParams) Hyperparameters (needs to have "hidden_units", "dropout", "batch_norm", "l2_reg", "dnn_activation_fn") :param mode: (ModeKeys) Specifies if training, evaluation or prediction. Dropout and BN need it. :return: Output Op for the deep part. """ hidden_units = params.hidden_units dropout = params.dnn_dropout is_bn = params.batch_norm l2_reg = params.l2_reg net = input_layer # regularizer(deep) if l2_reg: regularizer = tf.contrib.layers.l2_regularizer(l2_reg) else: regularizer = None for layer_id, num_hidden in enumerate(hidden_units): with tf.variable_scope('hidden_layer_%d' % layer_id) as hidden_layer_scope: net = tf.layers.dense( inputs=net, units=num_hidden, activation=params.dnn_activation_fn, kernel_initializer=glorot_normal_initializer(), kernel_regularizer=regularizer, name=hidden_layer_scope) # use Batch Normalization(last layer use no BN, BN after relu) if is_bn and layer_id < len(hidden_units) - 1: is_training = mode == tf.estimator.ModeKeys.TRAIN net = _batch_normalization(input=net, is_training=is_training, scope='bn_%d' % layer_id) # dropout if dropout is not None and mode == tf.estimator.ModeKeys.TRAIN: with tf.name_scope('dropout'): net = tf.layers.dropout(net, rate=dropout, training=True, name="dropout") _add_layer_summary(net, 'hidden_layer_%d' % layer_id) return net
def FC_layer(inputs, output_dim, nonlinearity=tf.nn.relu, is_training=None, dropout_prob=0., name='FC_layer'): if dropout_prob > 0.: inputs = tf.layers.dropout(inputs, dropout_prob, training=is_training) outputs = tf.layers.dense( inputs, output_dim, nonlinearity, kernel_initializer=init_ops.glorot_normal_initializer(), bias_initializer=init_ops.zeros_initializer(), name=name) return outputs
def get(identifier, **kwargs): if identifier is None or isinstance(identifier, init_ops.Initializer): return identifier if np.isscalar(identifier) and identifier == 0.: identifier = 'zeros' # TODO: ... if callable(identifier): return identifier elif isinstance(identifier, six.string_types): # If identifier is a string identifier = identifier.lower() if identifier in ['random_uniform']: rng = kwargs.get('range', None) low, high = checker.get_range(rng) return init_ops.RandomUniform(minval=low, maxval=high) elif identifier in ['random_norm', 'random_normal']: mean = kwargs.get('mean', 0.) stddev = kwargs.get('stddev', 1.) return init_ops.truncated_normal_initializer(mean=mean, stddev=stddev) elif identifier in ['glorot_uniform', 'xavier_uniform']: return glorot_uniform() elif identifier in ['glorot_normal', 'xavier_normal']: return init_ops.glorot_normal_initializer() elif identifier in ['id', 'identity']: return identity() else: # Find initializer in tensorflow.python.ops.init_ops initializer = (init_ops.__dict__.get(identifier, None) or init_ops.__dict__.get( '{}_initializer'.format(identifier), None)) # If nothing is found if initializer is None: raise ValueError('Can not resolve "{}"'.format(identifier)) # Return initializer with default parameters return initializer elif np.isscalar(identifier): # Note string is scalar return tf.initializers.constant(value=identifier) else: raise TypeError('identifier must be a Initializer or a string')
def masked_dense(inputs: tf.Tensor, units: int, mask: np.ndarray, activation=None, kernel_initializer=None, reuse=None, name=None, *args, **kwargs) -> tf.Tensor: """This code has been copied from masked_dense implementation in Tensorflow. See TF documentation: https://www.tensorflow.org/api_docs/python/tf/contrib/distributions/bijectors/masked_dense """ input_depth = inputs.shape.with_rank_at_least(1)[-1].value if input_depth is None: raise NotImplementedError( "Rightmost dimension must be known prior to graph execution.") if kernel_initializer is None: kernel_initializer = init_ops.glorot_normal_initializer() def masked_initializer(shape, dtype=None, partition_info=None): return mask * kernel_initializer(shape, dtype, partition_info) with ops.name_scope(name, "masked_dense", [inputs, units]): layer = layers.Dense(units, activation=activation, kernel_initializer=masked_initializer, kernel_constraint=lambda x: mask * x, name=name, dtype=inputs.dtype.base_dtype, _scope=name, _reuse=reuse, *args, **kwargs) return layer.apply(inputs)
def _base_model(features, mode, params): """base model is DNN""" hidden_units = params.hidden_units dropout = params.dnn_dropout net = tf.feature_column.input_layer(features=features, feature_columns=params.feature_columns) for layer_id, num_hidden in enumerate(hidden_units): with tf.variable_scope('hiddenlayer_%d' % layer_id) as hidden_layer_scope: net = tf.layers.dense( inputs=net, units=num_hidden, activation=params.dnn_activation_fn, kernel_initializer=glorot_normal_initializer(), name=hidden_layer_scope) if dropout is not None and mode == tf.estimator.ModeKeys.TRAIN: net = tf.layers.dropout(net, rate=dropout, training=True, name='dropout') # logits logits = tf.layers.dense(net, 1, activation=None) return logits
def masked_dense(inputs, units, num_blocks=None, exclusive=False, kernel_initializer=None, reuse=None, name=None, *args, **kwargs): """A autoregressively masked dense layer. Analogous to `tf.layers.dense`. See [1] for detailed explanation. [1]: "MADE: Masked Autoencoder for Distribution Estimation." Mathieu Germain, Karol Gregor, Iain Murray, Hugo Larochelle. ICML. 2015. https://arxiv.org/abs/1502.03509 Arguments: inputs: Tensor input. units: Python `int` scalar representing the dimensionality of the output space. num_blocks: Python `int` scalar representing the number of blocks for the MADE masks. exclusive: Python `bool` scalar representing whether to zero the diagonal of the mask, used for the first layer of a MADE. kernel_initializer: Initializer function for the weight matrix. If `None` (default), weights are initialized using the `tf.glorot_random_initializer`. reuse: Python `bool` scalar representing whether to reuse the weights of a previous layer by the same name. name: Python `str` used to describe ops managed by this function. *args: `tf.layers.dense` arguments. **kwargs: `tf.layers.dense` keyword arguments. Returns: Output tensor. Raises: NotImplementedError: if rightmost dimension of `inputs` is unknown prior to graph execution. """ # TODO(b/67594795): Better support of dynamic shape. input_depth = inputs.shape.with_rank_at_least(1)[-1].value if input_depth is None: raise NotImplementedError( "Rightmost dimension must be known prior to graph execution.") mask = _gen_mask(num_blocks, input_depth, units, MASK_EXCLUSIVE if exclusive else MASK_INCLUSIVE).T if kernel_initializer is None: kernel_initializer = init_ops.glorot_normal_initializer() def masked_initializer(shape, dtype=None, partition_info=None): return mask * kernel_initializer(shape, dtype, partition_info) with ops.name_scope(name, "masked_dense", [inputs, units, num_blocks]): layer = layers.Dense( units, kernel_initializer=masked_initializer, kernel_constraint=lambda x: mask * x, name=name, dtype=inputs.dtype.base_dtype, _scope=name, _reuse=reuse, *args, **kwargs) return layer.apply(inputs)
def masked_dense(inputs, units, num_blocks=None, exclusive=False, kernel_initializer=None, reuse=None, name=None, *args, **kwargs): """A autoregressively masked dense layer. Analogous to `tf.layers.dense`. See [Germain et al. (2015)][1] for detailed explanation. Arguments: inputs: Tensor input. units: Python `int` scalar representing the dimensionality of the output space. num_blocks: Python `int` scalar representing the number of blocks for the MADE masks. exclusive: Python `bool` scalar representing whether to zero the diagonal of the mask, used for the first layer of a MADE. kernel_initializer: Initializer function for the weight matrix. If `None` (default), weights are initialized using the `tf.glorot_random_initializer`. reuse: Python `bool` scalar representing whether to reuse the weights of a previous layer by the same name. name: Python `str` used to describe ops managed by this function. *args: `tf.layers.dense` arguments. **kwargs: `tf.layers.dense` keyword arguments. Returns: Output tensor. Raises: NotImplementedError: if rightmost dimension of `inputs` is unknown prior to graph execution. #### References [1]: Mathieu Germain, Karol Gregor, Iain Murray, and Hugo Larochelle. MADE: Masked Autoencoder for Distribution Estimation. In _International Conference on Machine Learning_, 2015. https://arxiv.org/abs/1502.03509 """ # TODO(b/67594795): Better support of dynamic shape. input_depth = inputs.shape.with_rank_at_least(1)[-1].value if input_depth is None: raise NotImplementedError( "Rightmost dimension must be known prior to graph execution.") mask = _gen_mask(num_blocks, input_depth, units, MASK_EXCLUSIVE if exclusive else MASK_INCLUSIVE).T if kernel_initializer is None: kernel_initializer = init_ops.glorot_normal_initializer() def masked_initializer(shape, dtype=None, partition_info=None): return mask * kernel_initializer(shape, dtype, partition_info) with ops.name_scope(name, "masked_dense", [inputs, units, num_blocks]): layer = layers.Dense(units, kernel_initializer=masked_initializer, kernel_constraint=lambda x: mask * x, name=name, dtype=inputs.dtype.base_dtype, _scope=name, _reuse=reuse, *args, **kwargs) return layer.apply(inputs)
def inference_bin(input, is_train, stochastic=False, use_bnorm=False): with tf.name_scope('128C3-128C3-P2'): x = conv2d_bin(stochastic=stochastic, inputs=input, filters=128, kernel_size=3, padding="same", activation=tf.nn.relu, is_train=is_train, use_bias=not use_bnorm, kernel_initializer=init_ops.glorot_normal_initializer()) if use_bnorm: x = tf.layers.batch_normalization(inputs=x, training=is_train) x = conv2d_bin(stochastic=stochastic, inputs=x, filters=128, kernel_size=3, padding="same", activation=tf.nn.relu, is_train=is_train, use_bias=not use_bnorm, kernel_initializer=init_ops.glorot_normal_initializer()) if use_bnorm: x = tf.layers.batch_normalization(inputs=x, training=is_train) x = tf.layers.max_pooling2d(inputs=x, pool_size=2, strides=2) with tf.name_scope('256C3-256C3-P2'): x = conv2d_bin(stochastic=stochastic, inputs=x, filters=256, kernel_size=3, padding="same", activation=tf.nn.relu, is_train=is_train, use_bias=not use_bnorm, kernel_initializer=init_ops.glorot_normal_initializer()) if use_bnorm: x = tf.layers.batch_normalization(inputs=x, training=is_train) x = conv2d_bin(stochastic=stochastic, inputs=x, filters=256, kernel_size=3, padding="same", activation=tf.nn.relu, is_train=is_train, use_bias=not use_bnorm, kernel_initializer=init_ops.glorot_normal_initializer()) if use_bnorm: x = tf.layers.batch_normalization(inputs=x, training=is_train) x = tf.layers.max_pooling2d(inputs=x, pool_size=2, strides=2) with tf.name_scope('512C3-512C3-P2'): x = conv2d_bin(stochastic=stochastic, inputs=x, filters=512, kernel_size=3, padding="same", activation=tf.nn.relu, is_train=is_train, use_bias=not use_bnorm, kernel_initializer=init_ops.glorot_normal_initializer()) if use_bnorm: x = tf.layers.batch_normalization(inputs=x, training=is_train) x = conv2d_bin(stochastic=stochastic, inputs=x, filters=512, kernel_size=3, padding="same", activation=tf.nn.relu, is_train=is_train, use_bias=not use_bnorm, kernel_initializer=init_ops.glorot_normal_initializer()) if use_bnorm: x = tf.layers.batch_normalization(inputs=x, training=is_train) x = tf.layers.max_pooling2d(inputs=x, pool_size=2, strides=2) with tf.name_scope('1024FC-1024FC-10FC'): x = tf.reshape(x, [x.get_shape()[0].value, -1]) x = dense_bin(inputs=x, units=1024, stochastic=stochastic, is_train=is_train, activation=tf.nn.relu, use_bias=not use_bnorm) if use_bnorm: x = tf.layers.batch_normalization(inputs=x, training=is_train) x = dense_bin(inputs=x, units=1024, stochastic=stochastic, is_train=is_train, activation=tf.nn.relu, use_bias=not use_bnorm) if use_bnorm: x = tf.layers.batch_normalization(inputs=x, training=is_train) x = dense_bin(inputs=x, units=cifar10.NB_CLASSES, stochastic=stochastic, is_train=is_train, use_bias=not use_bnorm) if use_bnorm: x = tf.layers.batch_normalization(inputs=x, training=is_train) return x
def _initializer(shape, dtype, partition_info=None): if len(shape) == 2 and (shape[0] == shape[1]): return tf.orthogonal_initializer()(shape, dtype, partition_info) else: return glorot_normal_initializer()(shape, dtype, partition_info)
def ave_model_fn(features, labels, mode): """ The variational autoencoder function The input should be the following features -- a dict containing two keys 'x' and 'y'. The 'x' value should contain the time series of the area around the point of interrest. The format should be 9*9*696. However, the correct format is not enforced. 'y' should contain the position of the example. We might want to change it into a 'lat' and a 'lon' parameter. labels -- this is meaningless and should be set to a constant zero. Labels are not needed for a variational autoencoder mode -- This contains the mode the network is used in and should be one of the following values tf.estimator.ModeKeys.TRAIN - if the network is trained at the moment tf.estimator.ModeKeys.PREDICT - if the network is used to get information on the examples tf.estimator.ModeKeys.EVAL - if the network is used to get general information about the performance of the network like a mean error on all data samples if the value equals non of the above, the network will use the EVAL-Key """ # the input layer reshapes the input into the disired form input_layer = tf.reshape(features['x'], [-1, time_size, 1, 1, 1]) # HAS TO BE CHANGED input_layer = tf.transpose(input_layer, [0, 2, 3, 1, 4]) input_slice_center = input_layer # All of the tensors used in the encoding part will start with the word 'encoding/' with tf.variable_scope('encoding'): # WE HAVE TO INCLUDE THE AFECTED LAYERS DEPENDING ONT THEIR SIZE. LET'S START WITH 696 if time_size == 696: # We use a batch norm at the beginnin. This sets the mean of the inputdata in every step to 0 and the # biggest variance to one. The idea is to approximate a normal distribution. We should be careful here # if the batchsize is to low batch_norm_1 = tf.layers.batch_normalization( inputs=input_slice_center, training=(mode == tf.estimator.ModeKeys.TRAIN), name='batchnorm1') # The first convolution layer. We use strided convolution as it leads to better reproducebility # compared to max pooling. << acording to 'Unsupervised Representation learning with deep convolutional # genartive adversarial networks' by Radford et al. # We also followed there advise to put batch normalization in between every layer and use ReLU # activations conv_1 = tf.layers.conv3d( inputs=batch_norm_1, filters=128, kernel_size=(1, 1, 4), strides=(1, 1, 2), padding='valid', activation=tf.nn.relu, kernel_initializer=glorot_normal_initializer(), name='conv1') pass # End of the if 696 if time_size == 1392: batch_norm_0 = tf.layers.batch_normalization( inputs=input_slice_center, training=(mode == tf.estimator.ModeKeys.TRAIN), name='batchnorm0') #The first convolution layer. We use strided convolution as it leads to better reproducebility #compared to max pooling. << acording to 'Unsupervised Representation learning with deep convolutional #genartive adversarial networks' by Radford et al. #We also followed there advise to put batch normalization in between every layer and use ReLU #activations conv_0 = tf.layers.conv3d( inputs=batch_norm_0, filters=128, kernel_size=(1, 1, 4), strides=(1, 1, 2), padding='valid', activation=tf.nn.relu, kernel_initializer=glorot_normal_initializer(), name='conv0') batch_norm_1 = tf.layers.batch_normalization( inputs=conv_0, training=(mode == tf.estimator.ModeKeys.TRAIN), name='batchnorm1') conv_1 = tf.layers.conv3d( inputs=batch_norm_1, filters=128, kernel_size=(1, 1, 4), strides=(1, 1, 2), padding='valid', activation=tf.nn.relu, kernel_initializer=glorot_normal_initializer(), name='conv1') pass # End of the if 1392 # batch normalization makes the activation almost standart normaly distributed in every layer batch_norm_2 = tf.layers.batch_normalization( inputs=conv_1, training=(mode == tf.estimator.ModeKeys.TRAIN), name='batchnorm2') # a second convolutional layer conv_2 = tf.layers.conv3d( inputs=batch_norm_2, filters=64 * 3, kernel_size=(1, 1, 4), strides=(1, 1, 2), padding='valid', activation=tf.nn.relu, kernel_initializer=glorot_normal_initializer(), name='conv2') # again a batch normalization layer batch_norm_3 = tf.layers.batch_normalization( inputs=conv_2, training=(mode == tf.estimator.ModeKeys.TRAIN), name='batchnorm3') # third convolutional layer conv_3 = tf.layers.conv3d( inputs=batch_norm_3, filters=64 * 4, kernel_size=(1, 1, 4), strides=(1, 1, 2), padding='valid', activation=tf.nn.relu, kernel_initializer=glorot_normal_initializer(), name='conv3') # batch normalization in between every layer batch_norm_4 = tf.layers.batch_normalization( inputs=conv_3, training=(mode == tf.estimator.ModeKeys.TRAIN), name='batchnorm4') # fourth convolutional layer conv_4 = tf.layers.conv3d( inputs=batch_norm_4, filters=64 * 4, kernel_size=(1, 1, 4), strides=(1, 1, 2), padding='valid', activation=tf.nn.relu, kernel_initializer=glorot_normal_initializer(), name='conv4') # batch normalization in between every layer batch_norm_5 = tf.layers.batch_normalization( inputs=conv_4, training=(mode == tf.estimator.ModeKeys.TRAIN), name='batchnorm5') # 5. convolutional layer conv_5 = tf.layers.conv3d( inputs=batch_norm_5, filters=64 * 4, kernel_size=(1, 1, 4), strides=(1, 1, 2), padding='valid', activation=tf.nn.relu, kernel_initializer=glorot_normal_initializer(), name='conv5') # batch normalization in between every layer batch_norm_6 = tf.layers.batch_normalization( inputs=conv_5, training=(mode == tf.estimator.ModeKeys.TRAIN), name='batchnorm6') # 6. convolutional layer conv_6 = tf.layers.conv3d( inputs=batch_norm_6, filters=64 * 6, kernel_size=(1, 1, 4), strides=(1, 1, 2), padding='valid', activation=tf.nn.relu, kernel_initializer=glorot_normal_initializer(), name='conv6') # batch normalization in between every layer batch_norm_7 = tf.layers.batch_normalization( inputs=conv_6, training=(mode == tf.estimator.ModeKeys.TRAIN), name='batchnorm7') # the tensor is flattened with reshape instead of flatten to allow for older versions of tf. The drawback # is that we need to put the size of the vector in manually flatten = tf.contrib.layers.flatten(batch_norm_7) # against the advise of Radford et al.,we use a fully connected layer here. We calculate the means of the # latent distributions. Since the mean of such a distribution could as well be negative we use no ReLU activation # but instead a linear activation means = tf.layers.dense( inputs=flatten, units=encoding_size, activation=None, kernel_initializer=glorot_normal_initializer(), name='fullyconnected_means') # for the standart diviations we again use a fully connected layer. Since these should not be negative, we use # ReLU activations here. deviations = tf.layers.dense( inputs=flatten, units=encoding_size, activation=tf.nn.relu, kernel_initializer=glorot_normal_initializer(), name='fullyconnected_dev') # This completes the encoding part since the latent distributions are assumed to be normal and therfore are completely # defined by the mean and diviation # Here we begin the middle part. All tensors from the middle part, where we draw the encoding from the latent distributions # start with 'vae/' with tf.variable_scope('vae'): # Draw numbers from a standart normal distribution. The numbers have the same shape as means and as seed we use the # seed set above. If no seed is set, the time will be used but the experiment will not be reproducible. random_numbers = tf.random_normal(shape=tf.shape(means), mean=0.0, stddev=1.0, seed=random_seed, name='random_number_generator') # Since drawing from a normal distribution with standart diviation σ is the same as drawing from a standart normal # distribution and multypling by σ, we multiply the drawn values by the deviations scaled_random = tf.multiply(x=random_numbers, y=deviations, name='adjust_variance') # Since drawing from a normal distribution with mean μ is the same as drawing from a normal distribution with mean # 0 and adding μ we add the means encoding = tf.add(x=scaled_random, y=means, name='adjust_means') # now the encodings contain values wich are normally distributed with means 'means' and deviations 'deviations' but we # can still use backpropagation since we do not need to propagate back through the random number generator # Here begins the decoding part of the network. All tensors here will begin with 'decoding/' with tf.variable_scope('decoding'): # We start withe a fully connected layer. This gives the network a chance to rearange the features and makes the architecture # of the decoder somehow independent of the compression factor ϑ, since the dimensionaliety will be the same after this step fc_decoding = tf.layers.dense( inputs=encoding, units=20 * 64 * 4, activation=tf.nn.relu, kernel_initializer=glorot_normal_initializer(), name='fullyconnected') # We reverse the flattening of the tensors and regain a shape suitable for a inverse convolution input_layer_decoding = tf.reshape(fc_decoding, [-1, 1, 1, 20, 64 * 4]) # Since the centering is a big part of the encoding we do not use a batch normalization at this position in the network # for reconstruction we use transposed convolutional layers. These produce an inverse of a convolutional layer. deconv_1 = tf.layers.conv3d_transpose( inputs=input_layer_decoding, filters=64 * 6, kernel_size=(1, 1, 4), strides=(1, 1, 2), padding='valid', activation=tf.nn.relu, kernel_initializer=glorot_normal_initializer(), use_bias=True, name='deconv1') # From here on again batch normalization in between every layer deconv_1_bn = tf.layers.batch_normalization( inputs=deconv_1, training=(mode == tf.estimator.ModeKeys.TRAIN), name='batchnorm2') # Since we incoded using three convolutions, we decode using 3 transposed convolutions deconv_2 = tf.layers.conv3d_transpose( inputs=deconv_1_bn, filters=64 * 4, kernel_size=(1, 1, 4), strides=(1, 1, 2), padding='valid', activation=tf.nn.relu, kernel_initializer=glorot_normal_initializer(), use_bias=True, name='deconv2') # Batch normalization in between every layers deconv_2_bn = tf.layers.batch_normalization( inputs=deconv_2, training=(mode == tf.estimator.ModeKeys.TRAIN), name='batchnorm3') # the third deconvolution should leed to the exact same size as the input deconv_3 = tf.layers.conv3d_transpose( inputs=deconv_2_bn, filters=64 * 3, kernel_size=(1, 1, 4), strides=(1, 1, 2), padding='valid', activation=tf.nn.relu, kernel_initializer=glorot_normal_initializer(), use_bias=True, name='deconv3') # Batch normalization in between every layers deconv_3_bn = tf.layers.batch_normalization( inputs=deconv_3, training=(mode == tf.estimator.ModeKeys.TRAIN), name='batchnorm4') # the 4. deconvolution should leed to the exact same size as the input deconv_4 = tf.layers.conv3d_transpose( inputs=deconv_3_bn, filters=128, kernel_size=(1, 1, 4), strides=(1, 1, 2), padding='valid', activation=tf.nn.relu, kernel_initializer=glorot_normal_initializer(), use_bias=True, name='deconv4') # Batch normalization in between every layers deconv_4_bn = tf.layers.batch_normalization( inputs=deconv_4, training=(mode == tf.estimator.ModeKeys.TRAIN), name='batchnorm5') # the 5. deconvolution should leed to the exact same size as the input deconv_5 = tf.layers.conv3d_transpose( inputs=deconv_4_bn, filters=1, kernel_size=(1, 1, 4), strides=(1, 1, 2), padding='valid', activation=None, kernel_initializer=glorot_normal_initializer(), use_bias=True, name='deconv5') # ADD EXTRA CONVOLUTIONS FOR 1392 SIZE if time_size == 1392: #Batch normalization in between every layers deconv_5_bn = tf.layers.batch_normalization( inputs=deconv_5, training=(mode == tf.estimator.ModeKeys.TRAIN), name='batchnorm6') #the 6. deconvolution should leed to the exact same size as the input deconv_6 = tf.layers.conv3d_transpose( inputs=deconv_5_bn, filters=1, kernel_size=(1, 1, 4), strides=(1, 1, 2), padding='valid', activation=None, kernel_initializer=glorot_normal_initializer(), use_bias=True, name='deconv6') deconv_slice = tf.slice(deconv_6, [0, 0, 0, 0, 0], [-1, 1, 1, 1392, 1]) pass # END OF THE 1392 IF Start the other if if time_size == 696: deconv_slice = tf.slice(deconv_5, [0, 0, 0, 0, 0], [-1, 1, 1, 696, 1]) # The network can report the following quantities: # The reconstructed time series, mostly for comparison to the original # The position given as the 'y' component. This should maybe be substituted for 'lat' and 'lon' # The latent distributions predictions = { 'timeseries': deconv_slice, 'position': features['y'], 'encoding_mean': means, 'encoding_dev': deviations, 'input': input_slice_center } # If the mode was to predict, the above quanteties are reportet back if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # If the mode was not set to predict, the loss gets calculatet. # The epsilon is a parameter to avoid numerical problems if the diviation of one variable goes to zero. epsilon = 0.0000001 # The reconstruction error is the mean squared error of the original time series of the middle point and the # reconstructed time series of the middle point. reconstrucktion_error = tf.losses.mean_squared_error( labels=input_slice_center, predictions=deconv_slice) # The latent loss is the KL-Divergence between the latent distributions and a multivariat normal distribution latent_loss = tf.reduce_mean(0.5 * tf.reduce_sum( tf.square(means) + tf.square(deviations) - tf.log(tf.square(deviations) + epsilon) - 1, 1)) # as loss we use the sum of both losses above weighted by a factor of λ. if λ = 1 both losses are weighted equally, if λ = 0 only the reconstruction loss # is taken into account and if λ is big the latent loss is much more important than the reconstruction loss lamb = 0.0000001 loss = reconstrucktion_error + lamb * latent_loss # if the mode is set to train, the network now uses a minimizer to minimize the loss if mode == tf.estimator.ModeKeys.TRAIN: # As an optimizer we use Adam. optimizer = tf.train.AdamOptimizer(learning_rate=l_rate) # Since the training error does not depend on the sliding avarage used in the batch normalizations, # they are not automatically updatet. Therefore, they have to be updatet by hand batch_norm_update = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # update the weights and the batch norm parameters with tf.control_dependencies(batch_norm_update): train_op = optimizer.minimize( loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) # if the mode is neighter predict nor train, we assume it to be eval # The evaluation metics are calculated. Here we calculate the mean squarred error and the # mean absolute error on the whole region, not just on the center eval_metric_ops = { 'squared_error': tf.metrics.mean_squared_error(labels=input_slice_center, predictions=deconv_slice), 'absolute_error': tf.metrics.mean_absolute_error(labels=input_slice_center, predictions=deconv_slice) } # the metrics are reported back return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def make_trainable(net, val): net.trainable = val for l in net.layers: l.trainable = val shp = X_train.shape[1:] dropout_rate = 0.25 opt = Adam(lr=1e-4) dopt = Adam(lr=1e-3) # Build Generative model ... nch = 200 g_input = Input(shape=[100]) H = Dense(nch * 14 * 14, kernel_initializer=init_ops.glorot_normal_initializer())(g_input) H = BatchNormalization()(H) H = Activation('relu')(H) H = Reshape([14, 14, nch])(H) H = UpSampling2D(size=(2, 2))(H) H = Conv2D(nch / 2, kernel_size=(3, 3), padding='same', kernel_initializer=init_ops.glorot_normal_initializer(), name='Convolution_1')(H) H = BatchNormalization()(H) H = Activation('relu')(H) H = Conv2D(nch / 4, kernel_size=(3, 3), padding='same', kernel_initializer=init_ops.glorot_normal_initializer(),