def initializer(): if hparams.initializer == "orthogonal": return tf.orthogonal_initializer(gain=hparams.initializer_gain) elif hparams.initializer == "uniform": max_val = 0.1 * hparams.initializer_gain return tf.random_uniform_initializer(-max_val, max_val) elif hparams.initializer == "normal_unit_scaling": return init_ops.variance_scaling_initializer( hparams.initializer_gain, mode="fan_avg", distribution="normal") elif hparams.initializer == "uniform_unit_scaling": return init_ops.variance_scaling_initializer( hparams.initializer_gain, mode="fan_avg", distribution="uniform") else: raise ValueError("Unrecognized initializer: %s" % hparams.initializer)
def prepare_training(self): with self.graph.as_default(): # Optimizer self.global_step = tf.get_variable( name='global_step', dtype=tf.int64, shape=[], trainable=False, initializer=tf.zeros_initializer) self.learning_rate = tf.convert_to_tensor( self._config.train.learning_rate, dtype=tf.float32) if self._config.train.optimizer == 'adam': self._optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate) elif self._config.train.optimizer == 'adam_decay': self.learning_rate *= learning_rate_decay( self._config, self.global_step) self._optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate, beta1=0.9, beta2=0.98, epsilon=1e-9) elif self._config.train.optimizer == 'sgd': self._optimizer = tf.train.GradientDescentOptimizer( learning_rate=self.learning_rate) elif self._config.train.optimizer == 'mom': self._optimizer = tf.train.MomentumOptimizer( self.learning_rate, momentum=0.9) # Uniform scaling initializer. self._initializer = init_ops.variance_scaling_initializer( scale=1.0, mode='fan_avg', distribution='uniform')
def __init__(self, config, num_gpus): self._config = config self._devices = ['/gpu:%d' % i for i in range(num_gpus)] if num_gpus > 0 else ['/cpu:0'] # Placeholders and saver. src_pls = [] dst_pls = [] for i, device in enumerate(self._devices): with tf.device(device): src_pls.append(tf.placeholder(dtype=tf.int32, shape=[None, None], name='src_pl_{}'.format(i))) dst_pls.append(tf.placeholder(dtype=tf.int32, shape=[None, None], name='dst_pl_{}'.format(i))) self.src_pls = tuple(src_pls) self.dst_pls = tuple(dst_pls) self.encoder_scope = self._config.encoder_scope or 'encoder' self.decoder_scope = self._config.decoder_scope or 'decoder' self.losses = defaultdict(list) # self.losses[name][device] self.grads_and_vars = defaultdict(list) # self.grads_and_vars[name][device] # Uniform scaling initializer. self._initializer = init_ops.variance_scaling_initializer(scale=1.0, mode='fan_avg', distribution='uniform') self.prepare_shared_weights() self._use_cache = True self._use_daisy_chain_getter = True
def testUniformDistribution(self): shape = [100, 100] expect_mean = 0. expect_var = 1. / shape[0] init = init_ops.variance_scaling_initializer(distribution='uniform') with self.test_session(use_gpu=True): x = init(shape).eval() self.assertNear(np.mean(x), expect_mean, err=1e-2) self.assertNear(np.var(x), expect_var, err=1e-2)
def bbb_conv3d(inputs, filters, kernel_size, stochastic=True, strides=1, padding='valid', data_format='channels_last', dilation_rate=1, activation=None, use_bias=True, clip_std=None, prior_pi=0.2, prior_logsigma_1=-2.0, prior_logsigma_2=-5.0, kernel_mu_initializer=init.variance_scaling_initializer(), kernel_rho_initializer=init.random_normal_initializer( -9., 1e-3), bias_mu_initializer=init.random_normal_initializer(0., 1e-3), bias_rho_initializer=init.random_normal_initializer(-9., 1e-4), local_reparametrization=False, flipout=False, trainable=True, seed=None, name=None, reuse=None): layer = BayesBackpropConv3D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, clip_std=clip_std, prior_pi=prior_pi, prior_logsigma_1=prior_logsigma_1, prior_logsigma_2=prior_logsigma_2, kernel_mu_initializer=kernel_mu_initializer, kernel_rho_initializer=kernel_rho_initializer, bias_mu_initializer=bias_mu_initializer, bias_rho_initializer=bias_rho_initializer, local_reparametrization=local_reparametrization, flipout=flipout, trainable=trainable, seed=seed, name=name, dtype=inputs.dtype.base_dtype, _reuse=reuse, _scope=name) return layer.apply(inputs, stochastic)
def __init__( self, rank, filters, kernel_size, strides=1, padding='valid', data_format='channels_last', dilation_rate=1, activation=None, use_bias=True, clip_std=None, prior_pi=0.2, prior_logsigma_1=-2.0, prior_logsigma_2=-5.0, kernel_mu_initializer=init.variance_scaling_initializer(), kernel_rho_initializer=init.random_normal_initializer(-9., 1e-3), bias_mu_initializer=init.random_normal_initializer(0., 1e-3), bias_rho_initializer=init.random_normal_initializer(-9., 1e-4), local_reparametrization=False, flipout=False, trainable=True, seed=None, name=None, **kwargs): super(_BayesBackpropConv, self).__init__(rank=rank, filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, local_reparametrization=local_reparametrization, flipout=flipout, trainable=trainable, seed=seed, name=name, **kwargs) self.clip_std = clip_std self.prior_pi = prior_pi self.prior_logsigma_1 = prior_logsigma_1 self.prior_logsigma_2 = prior_logsigma_2 self.kernel_mu_initializer = kernel_mu_initializer self.kernel_rho_initializer = kernel_rho_initializer self.bias_mu_initializer = bias_mu_initializer self.bias_rho_initializer = bias_rho_initializer self.input_spec = base.InputSpec(ndim=self.rank + 2)
def testNormalDistribution(self): shape = [100, 100] expect_mean = 0. expect_var = 1. / shape[0] init = init_ops.variance_scaling_initializer(distribution='normal') with self.test_session(use_gpu=True), \ test.mock.patch.object( random_ops, 'truncated_normal', wraps=random_ops.truncated_normal) \ as mock_truncated_normal: x = init(shape).eval() self.assertTrue(mock_truncated_normal.called) self.assertNear(np.mean(x), expect_mean, err=1e-2) self.assertNear(np.var(x), expect_var, err=1e-2)
def prepare(self, is_training): assert not self._prepared self.is_training = is_training # Select devices according to running is_training flag. devices = self.config.train.devices if is_training else self.config.test.devices self.devices = ['/gpu:' + i for i in devices.split(',')] or ['/cpu:0'] # If we have multiple devices (typically GPUs), we set /cpu:0 as the sync device. self.sync_device = self.devices[0] if len( self.devices) == 1 else '/cpu:0' if is_training: with self.graph.as_default(): with tf.device(self.sync_device): # Preparing optimizer. self.global_step = tf.get_variable( name='global_step', dtype=INT_TYPE, shape=[], trainable=False, initializer=tf.zeros_initializer) self.learning_rate = tf.convert_to_tensor( self.config.train.learning_rate) if self.config.train.optimizer == 'adam': self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate) elif self.config.train.optimizer == 'adam_decay': self.learning_rate = learning_rate_decay( self.config, self.global_step) self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate, beta1=0.9, beta2=0.98, epsilon=1e-9) elif self.config.train.optimizer == 'sgd': self.optimizer = tf.train.GradientDescentOptimizer( learning_rate=self.learning_rate) elif self.config.train.optimizer == 'mom': self.optimizer = tf.train.MomentumOptimizer( self.learning_rate, momentum=0.9) else: logging.info("No optimizer is defined for the model") raise ValueError self._initializer = init_ops.variance_scaling_initializer( scale=1, mode='fan_avg', distribution='uniform') # self._initializer = tf.uniform_unit_scaling_initializer() self._prepared = True
def conv2d_decoder(inputs, encoder, shapes, strides, scope=None, activation=None, weight_sharing=False, reuse=False): with variable_scope.variable_scope(scope or "decoder", reuse=reuse) as varscope: # Create a new scope in which the caching device is either # determined by the parent scope, or is set to place the cached if not context.executing_eagerly(): if varscope.caching_device is None: varscope.set_caching_device(lambda op: op.device) encoder.reverse() shapes.reverse() strides.reverse() for idx, shape in enumerate(shapes): encoder_W = encoder[idx] dtype = encoder_W.dtype W = encoder_W if weight_sharing else variable_scope.get_variable( 'w_{}'.format(idx), encoder_W.get_shape().as_list(), dtype, initializer=init_ops.variance_scaling_initializer()) b = variable_scope.get_variable( 'b_decoder_{}'.format(idx), [W.get_shape().as_list()[2]], dtype, initializer=init_ops.zeros_initializer()) outputs = math_ops.add( nn_ops.conv2d_transpose( inputs, W, array_ops.stack([ array_ops.shape(inputs)[0], shape[1], shape[2], shape[3] ]), strides=[1, strides[idx], strides[idx], 1], padding='SAME'), b) if activation: outputs = activation(outputs) inputs = outputs return inputs
def __init__(self, hp, num_gpu): super(Model, self).__init__() self._hp = hp self.num_gpu = num_gpu self.graph = tf.Graph() self._devices = ['/gpu:%d' % i for i in range(num_gpu) ] if num_gpu > 0 else ['/cpu:0'] self.src_pls = tuple() self.dst_pls = tuple() self.preds, self.istarget = None, None self.mean_loss, self.train_op = None, None self.test_distance, self.test_length = 0, 0 self.distance, self.length = 0, 0 self.global_step, self.learning_rate, self._optimizer = self.prepare_training( ) self._initializer = init_ops.variance_scaling_initializer( scale=1.0, mode='fan_avg', distribution='uniform')
def conv2d_encoder(inputs, filters, kernel_sizes, strides, scope=None, activation=None, reuse=False): with variable_scope.variable_scope(scope or "encoder", reuse=reuse) as varscope: # Create a new scope in which the caching device is either # determined by the parent scope, or is set to place the cached if not context.executing_eagerly(): if varscope.caching_device is None: varscope.set_caching_device(lambda op: op.device) encoder = [] shapes = [] for idx, n_outputs in enumerate(filters): n_input = inputs.get_shape().as_list()[3] shapes.append(inputs.get_shape().as_list()) W = variable_scope.get_variable( 'w_{}'.format(idx), [kernel_sizes[idx], kernel_sizes[idx], n_input, n_outputs], initializer=init_ops.variance_scaling_initializer()) b = variable_scope.get_variable( 'b_encoder_{}'.format(idx), [n_outputs], inputs.dtype, initializer=init_ops.zeros_initializer()) encoder.append(W) outputs = math_ops.add( nn_ops.conv2d(inputs, W, strides=[1, strides[idx], strides[idx], 1], padding='SAME'), b) if activation: outputs = activation(outputs) inputs = outputs return inputs, encoder, shapes