def set_input_shape(self, input_shape): batch_size, rows, cols, input_channels = input_shape kernel_shape = tuple(self.kernel_shape) + (input_channels, self.output_channels) assert len(kernel_shape) == 4 assert all(isinstance(e, int) for e in kernel_shape), kernel_shape if self.init_mode == "norm": init = tf.random_normal(kernel_shape, dtype=tf.float32) squared_norms = tf.reduce_sum(tf.square(init), axis=(0, 1, 2)) denom = tf.sqrt(1e-7 + squared_norms) init = self.init_scale * init / denom elif self.init_mode == "inv_sqrt": fan_out = self.kernel_shape[0] * \ self.kernel_shape[1] * self.output_channels init = tf.random_normal(kernel_shape, dtype=tf.float32, stddev=np.sqrt(2.0 / fan_out)) else: raise ValueError(self.init_mode) self.kernels = PV(init, name=self.name + "_kernels") if self.use_bias: self.b = PV(np.zeros((self.output_channels,)).astype('float32')) input_shape = list(input_shape) orig_batch_size = input_shape[0] input_shape[0] = 1 dummy_batch = tf.zeros(input_shape) dummy_output = self.fprop(dummy_batch) output_shape = [int(e) for e in dummy_output.get_shape()] output_shape[0] = orig_batch_size self.output_shape = tuple(output_shape)
def set_input_shape(self, input_shape): batch_size, dim = input_shape self.input_shape = [batch_size, dim] self.output_shape = [batch_size, self.num_hid] if self.init_mode == "norm": init = tf.random_normal([dim, self.num_hid], dtype=tf.float32) init = init / tf.sqrt( 1e-7 + tf.reduce_sum(tf.square(init), axis=0, keep_dims=True)) init = init * self.init_scale elif self.init_mode == "uniform_unit_scaling": scale = np.sqrt(3. / dim) init = tf.random_uniform([dim, self.num_hid], dtype=tf.float32, minval=-scale, maxval=scale) elif self.init_mode == 'he': std = tf.rsqrt(tf.cast(tf.reduce_prod(dim), tf.float32) + 1e-7) init = tf.random_normal([dim, self.num_hid], stddev=std, dtype=tf.float32) else: raise ValueError(self.init_mode) self.W = PV(init) if self.use_bias: self.b = PV((np.zeros( (self.num_hid, )) + self.init_b).astype('float32'))
def set_input_shape(self, shape): self.input_shape = shape self.output_shape = shape channels = shape[-1] init_value = np.ones((channels,), dtype="float32") * self.init_gamma self.gamma = PV(init_value, name=self.name + "_gamma") self.beta = PV(np.zeros((channels,), dtype="float32"), name=self.name + "_beta")
def set_input_shape(self, shape): self.input_shape = shape self.output_shape = shape channels = shape[-1] init_value = np.ones((channels, ), dtype='float32') * self.init_gamma self.gamma = PV(name=self.name + "_gamma", initializer=init_value) self.beta = PV(name=self.name + "_beta", initializer=np.zeros((channels, ), dtype='float32'))
def set_input_shape(self, input_shape): batch_size, dim = input_shape self.input_shape = [batch_size, dim] self.output_shape = [batch_size, self.num_hid] init = tf.random_normal([dim, self.num_hid], dtype=tf.float32) init = init / tf.sqrt(1e-7 + tf.reduce_sum(tf.square(init), axis=0, keep_dims=True)) init = init * self.init_scale self.W = PV(init) self.b = PV((np.zeros((self.num_hid,)) + self.init_b).astype('float32'))
def set_input_shape(self, shape): self.input_shape = shape self.output_shape = shape channels = shape[-1] self.channels = channels self.actual_num_groups = min(self.channels, self.num_groups) extra_dims = (self.channels // self.actual_num_groups, self.actual_num_groups) self.expanded_shape = tuple(shape[1:3]) + tuple(extra_dims) init_value = np.ones((channels,), dtype='float32') * self.init_gamma self.gamma = PV(init_value, name=self.name + "_gamma") self.beta = PV(np.zeros((self.channels,), dtype='float32'), name=self.name + "_beta")
def set_input_shape(self, input_shape): batch_size, rows, cols, input_channels = input_shape kernel_shape = tuple(self.kernel_shape) + (input_channels, self.output_channels) assert len(kernel_shape) == 4 assert all(isinstance(e, int) for e in kernel_shape), kernel_shape init = tf.random_normal(kernel_shape, dtype=tf.float32) init = self.init_scale * init / tf.sqrt(1e-7 + tf.reduce_sum(tf.square(init), axis=(0, 1, 2))) self.kernels = PV(init) if self.use_bias: self.b = PV(np.zeros((self.output_channels,)).astype('float32')) input_shape = list(input_shape) orig_batch_size = input_shape[0] input_shape[0] = 1 dummy_batch = tf.zeros(input_shape) dummy_output = self.fprop(dummy_batch) output_shape = [int(e) for e in dummy_output.get_shape()] output_shape[0] = orig_batch_size self.output_shape = tuple(output_shape)
def set_input_shape(self, input_shape): batch_size, dim = input_shape self.input_shape = [batch_size, dim] self.output_shape = [batch_size, self.num_hid] if self.init_mode == "norm": init = tf.random_normal([dim, self.num_hid], dtype=tf.float32) init = init / tf.sqrt( 1e-7 + tf.reduce_sum(tf.square(init), axis=0, keep_dims=True)) init = init * self.init_scale elif self.init_mode == "uniform_unit_scaling": scale = np.sqrt(3. / dim) init = tf.random_uniform([dim, self.num_hid], dtype=tf.float32, minval=-scale, maxval=scale) else: raise ValueError(self.init_mode) self.W = PV(name=self.name + 'linear_weight', initializer=init) if self.use_bias: self.b = PV(name=self.name + 'linear_bias', initializer=(np.zeros( (self.num_hid, )) + self.init_b).astype('float32'))