def set_input_shape(self, input_shape):
     batch_size, rows, cols, input_channels = input_shape
     kernel_shape = tuple(self.kernel_shape) + (input_channels,
                                                self.output_channels)
     assert len(kernel_shape) == 4
     assert all(isinstance(e, int) for e in kernel_shape), kernel_shape
     if self.init_mode == "norm":
         init = tf.random_normal(kernel_shape, dtype=tf.float32)
         squared_norms = tf.reduce_sum(tf.square(init), axis=(0, 1, 2))
         denom = tf.sqrt(1e-7 + squared_norms)
         init = self.init_scale * init / denom
     elif self.init_mode == "inv_sqrt":
         fan_out = self.kernel_shape[0] * \
             self.kernel_shape[1] * self.output_channels
         init = tf.random_normal(kernel_shape, dtype=tf.float32,
                                 stddev=np.sqrt(2.0 / fan_out))
     else:
         raise ValueError(self.init_mode)
     self.kernels = PV(init, name=self.name + "_kernels")
     if self.use_bias:
         self.b = PV(np.zeros((self.output_channels,)).astype('float32'))
     input_shape = list(input_shape)
     orig_batch_size = input_shape[0]
     input_shape[0] = 1
     dummy_batch = tf.zeros(input_shape)
     dummy_output = self.fprop(dummy_batch)
     output_shape = [int(e) for e in dummy_output.get_shape()]
     output_shape[0] = orig_batch_size
     self.output_shape = tuple(output_shape)
Beispiel #2
0
 def set_input_shape(self, input_shape):
     batch_size, dim = input_shape
     self.input_shape = [batch_size, dim]
     self.output_shape = [batch_size, self.num_hid]
     if self.init_mode == "norm":
         init = tf.random_normal([dim, self.num_hid], dtype=tf.float32)
         init = init / tf.sqrt(
             1e-7 + tf.reduce_sum(tf.square(init), axis=0, keep_dims=True))
         init = init * self.init_scale
     elif self.init_mode == "uniform_unit_scaling":
         scale = np.sqrt(3. / dim)
         init = tf.random_uniform([dim, self.num_hid],
                                  dtype=tf.float32,
                                  minval=-scale,
                                  maxval=scale)
     elif self.init_mode == 'he':
         std = tf.rsqrt(tf.cast(tf.reduce_prod(dim), tf.float32) + 1e-7)
         init = tf.random_normal([dim, self.num_hid],
                                 stddev=std,
                                 dtype=tf.float32)
     else:
         raise ValueError(self.init_mode)
     self.W = PV(init)
     if self.use_bias:
         self.b = PV((np.zeros(
             (self.num_hid, )) + self.init_b).astype('float32'))
Beispiel #3
0
 def set_input_shape(self, shape):
     self.input_shape = shape
     self.output_shape = shape
     channels = shape[-1]
     init_value = np.ones((channels,), dtype="float32") * self.init_gamma
     self.gamma = PV(init_value, name=self.name + "_gamma")
     self.beta = PV(np.zeros((channels,), dtype="float32"), name=self.name + "_beta")
 def set_input_shape(self, shape):
     self.input_shape = shape
     self.output_shape = shape
     channels = shape[-1]
     init_value = np.ones((channels, ), dtype='float32') * self.init_gamma
     self.gamma = PV(name=self.name + "_gamma", initializer=init_value)
     self.beta = PV(name=self.name + "_beta",
                    initializer=np.zeros((channels, ), dtype='float32'))
Beispiel #5
0
 def set_input_shape(self, input_shape):
     batch_size, dim = input_shape
     self.input_shape = [batch_size, dim]
     self.output_shape = [batch_size, self.num_hid]
     init = tf.random_normal([dim, self.num_hid], dtype=tf.float32)
     init = init / tf.sqrt(1e-7 + tf.reduce_sum(tf.square(init), axis=0,
                                                keep_dims=True))
     init = init * self.init_scale
     self.W = PV(init)
     self.b = PV((np.zeros((self.num_hid,))
                  + self.init_b).astype('float32'))
 def set_input_shape(self, shape):
   self.input_shape = shape
   self.output_shape = shape
   channels = shape[-1]
   self.channels = channels
   self.actual_num_groups = min(self.channels, self.num_groups)
   extra_dims = (self.channels // self.actual_num_groups,
                 self.actual_num_groups)
   self.expanded_shape = tuple(shape[1:3]) + tuple(extra_dims)
   init_value = np.ones((channels,), dtype='float32') * self.init_gamma
   self.gamma = PV(init_value, name=self.name + "_gamma")
   self.beta = PV(np.zeros((self.channels,), dtype='float32'),
                  name=self.name + "_beta")
Beispiel #7
0
 def set_input_shape(self, input_shape):
     batch_size, rows, cols, input_channels = input_shape
     kernel_shape = tuple(self.kernel_shape) + (input_channels,
                                                self.output_channels)
     assert len(kernel_shape) == 4
     assert all(isinstance(e, int) for e in kernel_shape), kernel_shape
     init = tf.random_normal(kernel_shape, dtype=tf.float32)
     init = self.init_scale * init / tf.sqrt(1e-7 +
                                             tf.reduce_sum(tf.square(init),
                                                           axis=(0, 1, 2)))
     self.kernels = PV(init)
     if self.use_bias:
         self.b = PV(np.zeros((self.output_channels,)).astype('float32'))
     input_shape = list(input_shape)
     orig_batch_size = input_shape[0]
     input_shape[0] = 1
     dummy_batch = tf.zeros(input_shape)
     dummy_output = self.fprop(dummy_batch)
     output_shape = [int(e) for e in dummy_output.get_shape()]
     output_shape[0] = orig_batch_size
     self.output_shape = tuple(output_shape)
 def set_input_shape(self, input_shape):
     batch_size, dim = input_shape
     self.input_shape = [batch_size, dim]
     self.output_shape = [batch_size, self.num_hid]
     if self.init_mode == "norm":
         init = tf.random_normal([dim, self.num_hid], dtype=tf.float32)
         init = init / tf.sqrt(
             1e-7 + tf.reduce_sum(tf.square(init), axis=0, keep_dims=True))
         init = init * self.init_scale
     elif self.init_mode == "uniform_unit_scaling":
         scale = np.sqrt(3. / dim)
         init = tf.random_uniform([dim, self.num_hid],
                                  dtype=tf.float32,
                                  minval=-scale,
                                  maxval=scale)
     else:
         raise ValueError(self.init_mode)
     self.W = PV(name=self.name + 'linear_weight', initializer=init)
     if self.use_bias:
         self.b = PV(name=self.name + 'linear_bias',
                     initializer=(np.zeros(
                         (self.num_hid, )) + self.init_b).astype('float32'))