Ejemplo n.º 1
0
 def initializer():
   if hparams.initializer == "orthogonal":
     return tf.orthogonal_initializer(gain=hparams.initializer_gain)
   elif hparams.initializer == "uniform":
     max_val = 0.1 * hparams.initializer_gain
     return tf.random_uniform_initializer(-max_val, max_val)
   elif hparams.initializer == "normal_unit_scaling":
     return init_ops.variance_scaling_initializer(
         hparams.initializer_gain, mode="fan_avg", distribution="normal")
   elif hparams.initializer == "uniform_unit_scaling":
     return init_ops.variance_scaling_initializer(
         hparams.initializer_gain, mode="fan_avg", distribution="uniform")
   else:
     raise ValueError("Unrecognized initializer: %s" % hparams.initializer)
Ejemplo n.º 2
0
    def prepare_training(self):
        with self.graph.as_default():
            # Optimizer
            self.global_step = tf.get_variable(
                name='global_step',
                dtype=tf.int64,
                shape=[],
                trainable=False,
                initializer=tf.zeros_initializer)

            self.learning_rate = tf.convert_to_tensor(
                self._config.train.learning_rate, dtype=tf.float32)
            if self._config.train.optimizer == 'adam':
                self._optimizer = tf.train.AdamOptimizer(
                    learning_rate=self.learning_rate)
            elif self._config.train.optimizer == 'adam_decay':
                self.learning_rate *= learning_rate_decay(
                    self._config, self.global_step)
                self._optimizer = tf.train.AdamOptimizer(
                    learning_rate=self.learning_rate,
                    beta1=0.9,
                    beta2=0.98,
                    epsilon=1e-9)
            elif self._config.train.optimizer == 'sgd':
                self._optimizer = tf.train.GradientDescentOptimizer(
                    learning_rate=self.learning_rate)
            elif self._config.train.optimizer == 'mom':
                self._optimizer = tf.train.MomentumOptimizer(
                    self.learning_rate, momentum=0.9)

            # Uniform scaling initializer.
            self._initializer = init_ops.variance_scaling_initializer(
                scale=1.0, mode='fan_avg', distribution='uniform')
Ejemplo n.º 3
0
    def __init__(self, config, num_gpus):
        self._config = config

        self._devices = ['/gpu:%d' % i for i in range(num_gpus)] if num_gpus > 0 else ['/cpu:0']

        # Placeholders and saver.
        src_pls = []
        dst_pls = []
        for i, device in enumerate(self._devices):
            with tf.device(device):
                src_pls.append(tf.placeholder(dtype=tf.int32, shape=[None, None], name='src_pl_{}'.format(i)))
                dst_pls.append(tf.placeholder(dtype=tf.int32, shape=[None, None], name='dst_pl_{}'.format(i)))
        self.src_pls = tuple(src_pls)
        self.dst_pls = tuple(dst_pls)

        self.encoder_scope = self._config.encoder_scope or 'encoder'
        self.decoder_scope = self._config.decoder_scope or 'decoder'

        self.losses = defaultdict(list)  # self.losses[name][device]
        self.grads_and_vars = defaultdict(list)  # self.grads_and_vars[name][device]

        # Uniform scaling initializer.
        self._initializer = init_ops.variance_scaling_initializer(scale=1.0, mode='fan_avg', distribution='uniform')

        self.prepare_shared_weights()

        self._use_cache = True
        self._use_daisy_chain_getter = True
Ejemplo n.º 4
0
  def testUniformDistribution(self):
    shape = [100, 100]
    expect_mean = 0.
    expect_var = 1. / shape[0]
    init = init_ops.variance_scaling_initializer(distribution='uniform')

    with self.test_session(use_gpu=True):
      x = init(shape).eval()

    self.assertNear(np.mean(x), expect_mean, err=1e-2)
    self.assertNear(np.var(x), expect_var, err=1e-2)
Ejemplo n.º 5
0
  def testUniformDistribution(self):
    shape = [100, 100]
    expect_mean = 0.
    expect_var = 1. / shape[0]
    init = init_ops.variance_scaling_initializer(distribution='uniform')

    with self.test_session(use_gpu=True):
      x = init(shape).eval()

    self.assertNear(np.mean(x), expect_mean, err=1e-2)
    self.assertNear(np.var(x), expect_var, err=1e-2)
Ejemplo n.º 6
0
def bbb_conv3d(inputs,
               filters,
               kernel_size,
               stochastic=True,
               strides=1,
               padding='valid',
               data_format='channels_last',
               dilation_rate=1,
               activation=None,
               use_bias=True,
               clip_std=None,
               prior_pi=0.2,
               prior_logsigma_1=-2.0,
               prior_logsigma_2=-5.0,
               kernel_mu_initializer=init.variance_scaling_initializer(),
               kernel_rho_initializer=init.random_normal_initializer(
                   -9., 1e-3),
               bias_mu_initializer=init.random_normal_initializer(0., 1e-3),
               bias_rho_initializer=init.random_normal_initializer(-9., 1e-4),
               local_reparametrization=False,
               flipout=False,
               trainable=True,
               seed=None,
               name=None,
               reuse=None):
    layer = BayesBackpropConv3D(
        filters=filters,
        kernel_size=kernel_size,
        strides=strides,
        padding=padding,
        data_format=data_format,
        dilation_rate=dilation_rate,
        activation=activation,
        use_bias=use_bias,
        clip_std=clip_std,
        prior_pi=prior_pi,
        prior_logsigma_1=prior_logsigma_1,
        prior_logsigma_2=prior_logsigma_2,
        kernel_mu_initializer=kernel_mu_initializer,
        kernel_rho_initializer=kernel_rho_initializer,
        bias_mu_initializer=bias_mu_initializer,
        bias_rho_initializer=bias_rho_initializer,
        local_reparametrization=local_reparametrization,
        flipout=flipout,
        trainable=trainable,
        seed=seed,
        name=name,
        dtype=inputs.dtype.base_dtype,
        _reuse=reuse,
        _scope=name)
    return layer.apply(inputs, stochastic)
Ejemplo n.º 7
0
 def __init__(
         self,
         rank,
         filters,
         kernel_size,
         strides=1,
         padding='valid',
         data_format='channels_last',
         dilation_rate=1,
         activation=None,
         use_bias=True,
         clip_std=None,
         prior_pi=0.2,
         prior_logsigma_1=-2.0,
         prior_logsigma_2=-5.0,
         kernel_mu_initializer=init.variance_scaling_initializer(),
         kernel_rho_initializer=init.random_normal_initializer(-9., 1e-3),
         bias_mu_initializer=init.random_normal_initializer(0., 1e-3),
         bias_rho_initializer=init.random_normal_initializer(-9., 1e-4),
         local_reparametrization=False,
         flipout=False,
         trainable=True,
         seed=None,
         name=None,
         **kwargs):
     super(_BayesBackpropConv,
           self).__init__(rank=rank,
                          filters=filters,
                          kernel_size=kernel_size,
                          strides=strides,
                          padding=padding,
                          data_format=data_format,
                          dilation_rate=dilation_rate,
                          activation=activation,
                          use_bias=use_bias,
                          local_reparametrization=local_reparametrization,
                          flipout=flipout,
                          trainable=trainable,
                          seed=seed,
                          name=name,
                          **kwargs)
     self.clip_std = clip_std
     self.prior_pi = prior_pi
     self.prior_logsigma_1 = prior_logsigma_1
     self.prior_logsigma_2 = prior_logsigma_2
     self.kernel_mu_initializer = kernel_mu_initializer
     self.kernel_rho_initializer = kernel_rho_initializer
     self.bias_mu_initializer = bias_mu_initializer
     self.bias_rho_initializer = bias_rho_initializer
     self.input_spec = base.InputSpec(ndim=self.rank + 2)
Ejemplo n.º 8
0
  def testNormalDistribution(self):
    shape = [100, 100]
    expect_mean = 0.
    expect_var = 1. / shape[0]
    init = init_ops.variance_scaling_initializer(distribution='normal')

    with self.test_session(use_gpu=True), \
      test.mock.patch.object(
          random_ops, 'truncated_normal', wraps=random_ops.truncated_normal) \
          as mock_truncated_normal:
      x = init(shape).eval()
      self.assertTrue(mock_truncated_normal.called)

    self.assertNear(np.mean(x), expect_mean, err=1e-2)
    self.assertNear(np.var(x), expect_var, err=1e-2)
Ejemplo n.º 9
0
  def testNormalDistribution(self):
    shape = [100, 100]
    expect_mean = 0.
    expect_var = 1. / shape[0]
    init = init_ops.variance_scaling_initializer(distribution='normal')

    with self.test_session(use_gpu=True), \
      test.mock.patch.object(
          random_ops, 'truncated_normal', wraps=random_ops.truncated_normal) \
          as mock_truncated_normal:
      x = init(shape).eval()
      self.assertTrue(mock_truncated_normal.called)

    self.assertNear(np.mean(x), expect_mean, err=1e-2)
    self.assertNear(np.var(x), expect_var, err=1e-2)
Ejemplo n.º 10
0
    def prepare(self, is_training):
        assert not self._prepared
        self.is_training = is_training
        # Select devices according to running is_training flag.
        devices = self.config.train.devices if is_training else self.config.test.devices
        self.devices = ['/gpu:' + i for i in devices.split(',')] or ['/cpu:0']
        # If we have multiple devices (typically GPUs), we set /cpu:0 as the sync device.
        self.sync_device = self.devices[0] if len(
            self.devices) == 1 else '/cpu:0'

        if is_training:
            with self.graph.as_default():
                with tf.device(self.sync_device):
                    # Preparing optimizer.
                    self.global_step = tf.get_variable(
                        name='global_step',
                        dtype=INT_TYPE,
                        shape=[],
                        trainable=False,
                        initializer=tf.zeros_initializer)
                    self.learning_rate = tf.convert_to_tensor(
                        self.config.train.learning_rate)
                    if self.config.train.optimizer == 'adam':
                        self.optimizer = tf.train.AdamOptimizer(
                            learning_rate=self.learning_rate)
                    elif self.config.train.optimizer == 'adam_decay':
                        self.learning_rate = learning_rate_decay(
                            self.config, self.global_step)
                        self.optimizer = tf.train.AdamOptimizer(
                            learning_rate=self.learning_rate,
                            beta1=0.9,
                            beta2=0.98,
                            epsilon=1e-9)
                    elif self.config.train.optimizer == 'sgd':
                        self.optimizer = tf.train.GradientDescentOptimizer(
                            learning_rate=self.learning_rate)
                    elif self.config.train.optimizer == 'mom':
                        self.optimizer = tf.train.MomentumOptimizer(
                            self.learning_rate, momentum=0.9)
                    else:
                        logging.info("No optimizer is defined for the model")
                        raise ValueError
        self._initializer = init_ops.variance_scaling_initializer(
            scale=1, mode='fan_avg', distribution='uniform')
        # self._initializer = tf.uniform_unit_scaling_initializer()
        self._prepared = True
Ejemplo n.º 11
0
def conv2d_decoder(inputs,
                   encoder,
                   shapes,
                   strides,
                   scope=None,
                   activation=None,
                   weight_sharing=False,
                   reuse=False):
    with variable_scope.variable_scope(scope or "decoder",
                                       reuse=reuse) as varscope:
        # Create a new scope in which the caching device is either
        # determined by the parent scope, or is set to place the cached
        if not context.executing_eagerly():
            if varscope.caching_device is None:
                varscope.set_caching_device(lambda op: op.device)

        encoder.reverse()
        shapes.reverse()
        strides.reverse()
        for idx, shape in enumerate(shapes):
            encoder_W = encoder[idx]
            dtype = encoder_W.dtype
            W = encoder_W if weight_sharing else variable_scope.get_variable(
                'w_{}'.format(idx),
                encoder_W.get_shape().as_list(),
                dtype,
                initializer=init_ops.variance_scaling_initializer())
            b = variable_scope.get_variable(
                'b_decoder_{}'.format(idx), [W.get_shape().as_list()[2]],
                dtype,
                initializer=init_ops.zeros_initializer())
            outputs = math_ops.add(
                nn_ops.conv2d_transpose(
                    inputs,
                    W,
                    array_ops.stack([
                        array_ops.shape(inputs)[0], shape[1], shape[2],
                        shape[3]
                    ]),
                    strides=[1, strides[idx], strides[idx], 1],
                    padding='SAME'), b)
            if activation:
                outputs = activation(outputs)
            inputs = outputs
        return inputs
Ejemplo n.º 12
0
    def __init__(self, hp, num_gpu):
        super(Model, self).__init__()
        self._hp = hp
        self.num_gpu = num_gpu
        self.graph = tf.Graph()

        self._devices = ['/gpu:%d' % i for i in range(num_gpu)
                         ] if num_gpu > 0 else ['/cpu:0']
        self.src_pls = tuple()
        self.dst_pls = tuple()

        self.preds, self.istarget = None, None
        self.mean_loss, self.train_op = None, None
        self.test_distance, self.test_length = 0, 0
        self.distance, self.length = 0, 0

        self.global_step, self.learning_rate, self._optimizer = self.prepare_training(
        )
        self._initializer = init_ops.variance_scaling_initializer(
            scale=1.0, mode='fan_avg', distribution='uniform')
Ejemplo n.º 13
0
def conv2d_encoder(inputs,
                   filters,
                   kernel_sizes,
                   strides,
                   scope=None,
                   activation=None,
                   reuse=False):
    with variable_scope.variable_scope(scope or "encoder",
                                       reuse=reuse) as varscope:
        # Create a new scope in which the caching device is either
        # determined by the parent scope, or is set to place the cached
        if not context.executing_eagerly():
            if varscope.caching_device is None:
                varscope.set_caching_device(lambda op: op.device)

        encoder = []
        shapes = []

        for idx, n_outputs in enumerate(filters):
            n_input = inputs.get_shape().as_list()[3]
            shapes.append(inputs.get_shape().as_list())
            W = variable_scope.get_variable(
                'w_{}'.format(idx),
                [kernel_sizes[idx], kernel_sizes[idx], n_input, n_outputs],
                initializer=init_ops.variance_scaling_initializer())
            b = variable_scope.get_variable(
                'b_encoder_{}'.format(idx), [n_outputs],
                inputs.dtype,
                initializer=init_ops.zeros_initializer())
            encoder.append(W)
            outputs = math_ops.add(
                nn_ops.conv2d(inputs,
                              W,
                              strides=[1, strides[idx], strides[idx], 1],
                              padding='SAME'), b)
            if activation:
                outputs = activation(outputs)

            inputs = outputs
        return inputs, encoder, shapes