def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, return_sequences=False, go_backwards=False, stateful=False, dropout=0., recurrent_dropout=0., **kwargs): super(ConvLSTM2D, self).__init__( filters, kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, return_sequences=return_sequences, go_backwards=go_backwards, stateful=stateful, activity_regularizer=regularizers.get(activity_regularizer), **kwargs) self.activation = activations.get(activation) self.recurrent_activation = activations.get(recurrent_activation) self.use_bias = use_bias self.kernel_initializer = initializers.get(kernel_initializer) self.recurrent_initializer = initializers.get(recurrent_initializer) self.bias_initializer = initializers.get(bias_initializer) self.unit_forget_bias = unit_forget_bias self.kernel_regularizer = regularizers.get(kernel_regularizer) self.recurrent_regularizer = regularizers.get(recurrent_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.recurrent_constraint = constraints.get(recurrent_constraint) self.bias_constraint = constraints.get(bias_constraint) self.dropout = min(1., max(0., dropout)) self.recurrent_dropout = min(1., max(0., recurrent_dropout)) self.state_spec = [InputSpec(ndim=4), InputSpec(ndim=4)]
def build(self, input_shape): # Note input_shape will be list of shapes of initial states and # constants if these are passed in __call__. if self._num_constants is not None: constants_shape = input_shape[-self._num_constants:] else: constants_shape = None if isinstance(input_shape, list): input_shape = input_shape[0] batch_size = input_shape[0] if self.stateful else None self.input_spec[0] = InputSpec(shape=(batch_size, None) + input_shape[2:5]) # allow cell (if layer) to build before we set or validate state_spec if isinstance(self.cell, Layer): step_input_shape = (input_shape[0], ) + input_shape[2:] if constants_shape is not None: self.cell.build([step_input_shape] + constants_shape) else: self.cell.build(step_input_shape) # set or validate state_spec if hasattr(self.cell.state_size, '__len__'): state_size = list(self.cell.state_size) else: state_size = [self.cell.state_size] if self.state_spec is not None: # initial_state was passed in call, check compatibility if self.cell.data_format == 'channels_first': ch_dim = 1 elif self.cell.data_format == 'channels_last': ch_dim = 3 if [spec.shape[ch_dim] for spec in self.state_spec] != state_size: raise ValueError( 'An initial_state was passed that is not compatible with ' '`cell.state_size`. Received `state_spec`={}; ' 'However `cell.state_size` is ' '{}'.format([spec.shape for spec in self.state_spec], self.cell.state_size)) else: if self.cell.data_format == 'channels_first': self.state_spec = [ InputSpec(shape=(None, dim, None, None)) for dim in state_size ] elif self.cell.data_format == 'channels_last': self.state_spec = [ InputSpec(shape=(None, None, None, dim)) for dim in state_size ] if self.stateful: self.reset_states() self.built = True
def __call__(self, inputs, initial_state=None, constants=None, **kwargs): inputs, initial_state, constants = self._standardize_args( inputs, initial_state, constants) if initial_state is None and constants is None: return super(ConvRNN2D, self).__call__(inputs, **kwargs) # If any of `initial_state` or `constants` are specified and are Keras # tensors, then add them to the inputs and temporarily modify the # input_spec to include them. additional_inputs = [] additional_specs = [] if initial_state is not None: kwargs['initial_state'] = initial_state additional_inputs += initial_state self.state_spec = [] for state in initial_state: shape = K.int_shape(state) self.state_spec.append(InputSpec(shape=shape)) additional_specs += self.state_spec if constants is not None: kwargs['constants'] = constants additional_inputs += constants self.constants_spec = [ InputSpec(shape=K.int_shape(constant)) for constant in constants ] self._num_constants = len(constants) additional_specs += self.constants_spec # at this point additional_inputs cannot be empty for tensor in additional_inputs: if K.is_keras_tensor(tensor) != K.is_keras_tensor( additional_inputs[0]): raise ValueError('The initial state or constants of an RNN' ' layer cannot be specified with a mix of' ' Keras tensors and non-Keras tensors') if K.is_keras_tensor(additional_inputs[0]): # Compute the full input spec, including state and constants full_input = [inputs] + additional_inputs full_input_spec = self.input_spec + additional_specs # Perform the call with temporarily replaced input_spec original_input_spec = self.input_spec self.input_spec = full_input_spec output = super(ConvRNN2D, self).__call__(full_input, **kwargs) self.input_spec = original_input_spec return output else: return super(ConvRNN2D, self).__call__(inputs, **kwargs)
def __init__(self, units, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs): if 'input_shape' not in kwargs and 'input_dim' in kwargs: kwargs['input_shape'] = (kwargs.pop('input_dim'), ) super(Dense, self).__init__( activity_regularizer=regularizers.get(activity_regularizer), **kwargs) self.units = int(units) self.activation = activations.get(activation) self.use_bias = use_bias self.kernel_initializer = initializers.get(kernel_initializer) self.bias_initializer = initializers.get(bias_initializer) self.kernel_regularizer = regularizers.get(kernel_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.bias_constraint = constraints.get(bias_constraint) self.supports_masking = True self.input_spec = InputSpec(min_ndim=2)
def build(self, input_shape): input_shape = tensor_shape.TensorShape(input_shape) if input_shape[-1].value is None: raise ValueError('The last dimension of the inputs to `Dense` ' 'should be defined. Found `None`.') self.input_spec = InputSpec(min_ndim=2, axes={-1: input_shape[-1].value}) self.kernel = self.add_variable( 'kernel', shape=[input_shape[-1].value, self.units], initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint, dtype=self.dtype, trainable=True) if self.use_bias: self.bias = self.add_variable('bias', shape=[ self.units, ], initializer=self.bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint, dtype=self.dtype, trainable=True) else: self.bias = None self.built = True
def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), return_sequences=False, go_backwards=False, stateful=False, **kwargs): super(ConvRecurrent2D, self).__init__(**kwargs) self.filters = filters self.kernel_size = conv_utils.normalize_tuple(kernel_size, 2, 'kernel_size') self.strides = conv_utils.normalize_tuple(strides, 2, 'strides') self.padding = conv_utils.normalize_padding(padding) self.data_format = conv_utils.normalize_data_format(data_format) self.dilation_rate = conv_utils.normalize_tuple( dilation_rate, 2, 'dilation_rate') self.return_sequences = return_sequences self.go_backwards = go_backwards self.stateful = stateful self.input_spec = [InputSpec(ndim=5)] self.state_spec = None
def build(self, input_shape): input_dim = input_shape[2] if input_dim is None: raise ValueError( 'Axis 2 of input should be fully-defined. ' 'Found shape:', input_shape) output_length = conv_utils.conv_output_length(input_shape[1], self.kernel_size[0], self.padding, self.strides[0]) self.kernel_shape = (output_length, self.kernel_size[0] * input_dim, self.filters) self.kernel = self.add_weight(shape=self.kernel_shape, initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) if self.use_bias: self.bias = self.add_weight(shape=(output_length, self.filters), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None self.input_spec = InputSpec(ndim=3, axes={2: input_dim}) self.built = True
def __init__(self, filters, kernel_size, strides=1, padding='valid', data_format=None, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs): super(LocallyConnected1D, self).__init__(**kwargs) self.filters = filters self.kernel_size = conv_utils.normalize_tuple(kernel_size, 1, 'kernel_size') self.strides = conv_utils.normalize_tuple(strides, 1, 'strides') self.padding = conv_utils.normalize_padding(padding) if self.padding != 'valid': raise ValueError('Invalid border mode for LocallyConnected1D ' '(only "valid" is supported): ' + padding) self.data_format = conv_utils.normalize_data_format(data_format) self.activation = activations.get(activation) self.use_bias = use_bias self.kernel_initializer = initializers.get(kernel_initializer) self.bias_initializer = initializers.get(bias_initializer) self.kernel_regularizer = regularizers.get(kernel_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.bias_constraint = constraints.get(bias_constraint) self.input_spec = InputSpec(ndim=3)
def build(self, input_shape): input_shape = tensor_shape.TensorShape(input_shape).as_list() if self.data_format == 'channels_last': input_row, input_col = input_shape[1:-1] input_filter = input_shape[3] else: input_row, input_col = input_shape[2:] input_filter = input_shape[1] if input_row is None or input_col is None: raise ValueError('The spatial dimensions of the inputs to ' ' a LocallyConnected2D layer ' 'should be fully-defined, but layer received ' 'the inputs shape ' + str(input_shape)) output_row = conv_utils.conv_output_length(input_row, self.kernel_size[0], self.padding, self.strides[0]) output_col = conv_utils.conv_output_length(input_col, self.kernel_size[1], self.padding, self.strides[1]) self.output_row = output_row self.output_col = output_col self.kernel_shape = (output_row * output_col, self.kernel_size[0] * self.kernel_size[1] * input_filter, self.filters) self.kernel = self.add_weight(shape=self.kernel_shape, initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) if self.use_bias: self.bias = self.add_weight(shape=(output_row, output_col, self.filters), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None if self.data_format == 'channels_first': self.input_spec = InputSpec(ndim=4, axes={1: input_filter}) else: self.input_spec = InputSpec(ndim=4, axes={-1: input_filter}) self.built = True
def __init__(self, rate, data_format=None, **kwargs): super(SpatialDropout3D, self).__init__(rate, **kwargs) if data_format is None: data_format = K.image_data_format() if data_format not in {'channels_last', 'channels_first'}: raise ValueError('data_format must be in ' '{"channels_last", "channels_first"}') self.data_format = data_format self.input_spec = InputSpec(ndim=5)
def build(self, input_shape): input_shape = tensor_shape.TensorShape(input_shape).as_list() assert len(input_shape) >= 3 self.input_spec = InputSpec(shape=input_shape) child_input_shape = [input_shape[0]] + input_shape[2:] if not self.layer.built: self.layer.build(child_input_shape) self.layer.built = True super(TimeDistributed, self).build() self.built = True
def __call__(self, inputs, initial_state=None, **kwargs): if isinstance(inputs, list): if len(inputs) > 1: initial_state = inputs[1:] inputs = inputs[0] if initial_state is None: return super(Bidirectional, self).__call__(inputs, **kwargs) # Standardize `initial_state` into list if isinstance(initial_state, tuple): initial_state = list(initial_state) elif not isinstance(initial_state, list): initial_state = [initial_state] # Check if `initial_state` can be splitted into half num_states = len(initial_state) if num_states % 2 > 0: raise ValueError( 'When passing `initial_state` to a Bidirectional RNN, the state ' 'should be a list containing the states of the underlying RNNs. ' 'Found: ' + str(initial_state)) # Applies the same workaround as in `RNN.__call__`, without handling # constants kwargs['initial_state'] = initial_state additional_inputs = initial_state additional_specs = [ InputSpec(shape=K.int_shape(state)) for state in initial_state ] self.forward_layer.state_spec = additional_specs[:num_states // 2] self.backward_layer.state_spec = additional_specs[num_states // 2:] is_keras_tensor = K.is_keras_tensor(additional_inputs[0]) for tensor in additional_inputs: if K.is_keras_tensor(tensor) != is_keras_tensor: raise ValueError('The initial state of a Bidirectional' ' layer cannot be specified with a mix of' ' Keras tensors and non-Keras tensors' ' (a "Keras tensor" is a tensor that was' ' returned by a Keras layer, or by `Input`)') if is_keras_tensor: # Compute the full input spec, including state full_input = [inputs] + additional_inputs full_input_spec = self.input_spec + additional_specs # Perform the call with temporarily replaced input_spec original_input_spec = self.input_spec self.input_spec = full_input_spec output = super(Bidirectional, self).__call__(full_input, **kwargs) self.input_spec = original_input_spec return output else: return super(Bidirectional, self).__call__(inputs, **kwargs)
def __init__(self, return_sequences=False, return_state=False, go_backwards=False, stateful=False, **kwargs): # We invoke the base layer's initializer directly here because we do not # want to create RNN cell instance. super(RNN, self).__init__(**kwargs) # pylint: disable=bad-super-call self.return_sequences = return_sequences self.return_state = return_state self.go_backwards = go_backwards self.stateful = stateful self.supports_masking = False self.input_spec = [InputSpec(ndim=3)] if hasattr(self.cell.state_size, '__len__'): state_size = self.cell.state_size else: state_size = [self.cell.state_size] self.state_spec = [InputSpec(shape=(None, dim)) for dim in state_size] self.constants_spec = None self._states = None self._num_constants = None
def __init__(self, return_sequences=False, return_state=False, go_backwards=False, stateful=False, **kwargs): if K.backend() != 'tensorflow': raise RuntimeError('CuDNN RNNs are only available ' 'with the TensorFlow backend.') super(RNN, self).__init__(**kwargs) self.return_sequences = return_sequences self.return_state = return_state self.go_backwards = go_backwards self.stateful = stateful self.supports_masking = False self.input_spec = [InputSpec(ndim=3)] if hasattr(self.cell.state_size, '__len__'): state_size = self.cell.state_size else: state_size = [self.cell.state_size] self.state_spec = [InputSpec(shape=(None, dim)) for dim in state_size] self.constants_spec = None self._states = None self._num_constants = None
def __init__(self, pool_function, pool_size, strides, padding='valid', data_format=None, name=None, **kwargs): super(Pooling1D, self).__init__(name=name, **kwargs) if data_format is None: data_format = backend.image_data_format() if strides is None: strides = pool_size self.pool_function = pool_function self.pool_size = conv_utils.normalize_tuple(pool_size, 1, 'pool_size') self.strides = conv_utils.normalize_tuple(strides, 1, 'strides') self.padding = conv_utils.normalize_padding(padding) self.data_format = conv_utils.normalize_data_format(data_format) self.input_spec = InputSpec(ndim=3)
def build(self, input_shape): param_shape = list(input_shape[1:]) self.param_broadcast = [False] * len(param_shape) if self.shared_axes is not None: for i in self.shared_axes: param_shape[i - 1] = 1 self.param_broadcast[i - 1] = True self.alpha = self.add_weight(shape=param_shape, name='alpha', initializer=self.alpha_initializer, regularizer=self.alpha_regularizer, constraint=self.alpha_constraint) # Set input spec axes = {} if self.shared_axes: for i in range(1, len(input_shape)): if i not in self.shared_axes: axes[i] = input_shape[i] self.input_spec = InputSpec(ndim=len(input_shape), axes=axes) self.built = True
def __init__(self, cell, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, **kwargs): if unroll: raise TypeError('Unrolling isn\'t possible with ' 'convolutional RNNs.') if isinstance(cell, (list, tuple)): # The StackedConvRNN2DCells isn't implemented yet. raise TypeError('It is not possible at the moment to' 'stack convolutional cells.') super(ConvRNN2D, self).__init__(cell, return_sequences, return_state, go_backwards, stateful, unroll, **kwargs) self.input_spec = [InputSpec(ndim=5)] self.states = None
def build(self, input_shape): if len(input_shape) < 4: raise ValueError( 'Inputs to `DepthwiseConv2D` should have rank 4. ' 'Received input shape:', str(input_shape)) if self.data_format == 'channels_first': channel_axis = 1 else: channel_axis = 3 if input_shape[channel_axis] is None: raise ValueError('The channel dimension of the inputs to ' '`DepthwiseConv2D` ' 'should be defined. Found `None`.') input_dim = int(input_shape[channel_axis]) depthwise_kernel_shape = (self.kernel_size[0], self.kernel_size[1], input_dim, self.depth_multiplier) self.depthwise_kernel = self.add_weight( shape=depthwise_kernel_shape, initializer=self.depthwise_initializer, name='depthwise_kernel', regularizer=self.depthwise_regularizer, constraint=self.depthwise_constraint) if self.use_bias: self.bias = self.add_weight(shape=(input_dim * self.depth_multiplier, ), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None # Set input spec. self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) self.built = True
def build(self, input_shape): if isinstance(input_shape, list): input_shape = input_shape[0] input_shape = tuple(tensor_shape.TensorShape(input_shape).as_list()) batch_size = input_shape[0] if self.stateful else None self.input_spec[0] = InputSpec(shape=(batch_size, None) + input_shape[2:]) if self.stateful: self.reset_states() else: # initial states: 2 all-zero tensor of shape (filters) self.states = [None, None] if self.data_format == 'channels_first': channel_axis = 2 else: channel_axis = -1 if input_shape[channel_axis] is None: raise ValueError('The channel dimension of the inputs ' 'should be defined. Found `None`.') input_dim = input_shape[channel_axis] state_shape = [None] * 4 state_shape[channel_axis] = input_dim state_shape = tuple(state_shape) self.state_spec = [ InputSpec(shape=state_shape), InputSpec(shape=state_shape) ] kernel_shape = self.kernel_size + (input_dim, self.filters * 4) self.kernel_shape = kernel_shape recurrent_kernel_shape = self.kernel_size + (self.filters, self.filters * 4) self.kernel = self.add_weight(shape=kernel_shape, initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) self.recurrent_kernel = self.add_weight( shape=recurrent_kernel_shape, initializer=self.recurrent_initializer, name='recurrent_kernel', regularizer=self.recurrent_regularizer, constraint=self.recurrent_constraint) if self.use_bias: self.bias = self.add_weight(shape=(self.filters * 4, ), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) if self.unit_forget_bias: bias_value = np.zeros((self.filters * 4, )) bias_value[self.filters:self.filters * 2] = 1. K.set_value(self.bias, bias_value) else: self.bias = None self.kernel_i = self.kernel[:, :, :, :self.filters] self.recurrent_kernel_i = self.recurrent_kernel[:, :, :, :self.filters] self.kernel_f = self.kernel[:, :, :, self.filters:self.filters * 2] self.recurrent_kernel_f = self.recurrent_kernel[:, :, :, self. filters:self.filters * 2] self.kernel_c = self.kernel[:, :, :, self.filters * 2:self.filters * 3] self.recurrent_kernel_c = self.recurrent_kernel[:, :, :, self.filters * 2:self.filters * 3] self.kernel_o = self.kernel[:, :, :, self.filters * 3:] self.recurrent_kernel_o = self.recurrent_kernel[:, :, :, self.filters * 3:] if self.use_bias: self.bias_i = self.bias[:self.filters] self.bias_f = self.bias[self.filters:self.filters * 2] self.bias_c = self.bias[self.filters * 2:self.filters * 3] self.bias_o = self.bias[self.filters * 3:] else: self.bias_i = None self.bias_f = None self.bias_c = None self.bias_o = None self.built = True
def __call__(self, inputs, initial_state=None, constants=None, **kwargs): """`Bidirectional.__call__` implements the same API as the wrapped `RNN`.""" inputs, initial_state, constants = _standardize_args( inputs, initial_state, constants, self._num_constants) if isinstance(inputs, list): if len(inputs) > 1: initial_state = inputs[1:] inputs = inputs[0] if initial_state is None and constants is None: return super(Bidirectional, self).__call__(inputs, **kwargs) # Applies the same workaround as in `RNN.__call__` additional_inputs = [] additional_specs = [] if initial_state is not None: # Check if `initial_state` can be splitted into half num_states = len(initial_state) if num_states % 2 > 0: raise ValueError( 'When passing `initial_state` to a Bidirectional RNN, ' 'the state should be a list containing the states of ' 'the underlying RNNs. ' 'Found: ' + str(initial_state)) kwargs['initial_state'] = initial_state additional_inputs += initial_state state_specs = [ InputSpec(shape=K.int_shape(state)) for state in initial_state ] self.forward_layer.state_spec = state_specs[:num_states // 2] self.backward_layer.state_spec = state_specs[num_states // 2:] additional_specs += state_specs if constants is not None: kwargs['constants'] = constants additional_inputs += constants constants_spec = [ InputSpec(shape=K.int_shape(constant)) for constant in constants ] self.forward_layer.constants_spec = constants_spec self.backward_layer.constants_spec = constants_spec additional_specs += constants_spec self._num_constants = len(constants) self.forward_layer._num_constants = self._num_constants self.backward_layer._num_constants = self._num_constants is_keras_tensor = K.is_keras_tensor(additional_inputs[0]) for tensor in additional_inputs: if K.is_keras_tensor(tensor) != is_keras_tensor: raise ValueError('The initial state of a Bidirectional' ' layer cannot be specified with a mix of' ' Keras tensors and non-Keras tensors' ' (a "Keras tensor" is a tensor that was' ' returned by a Keras layer, or by `Input`)') if is_keras_tensor: # Compute the full input spec, including state full_input = [inputs] + additional_inputs full_input_spec = self.input_spec + additional_specs # Perform the call with temporarily replaced input_spec original_input_spec = self.input_spec self.input_spec = full_input_spec output = super(Bidirectional, self).__call__(full_input, **kwargs) self.input_spec = original_input_spec return output else: return super(Bidirectional, self).__call__(inputs, **kwargs)
def __init__(self, data_format=None, **kwargs): super(Flatten, self).__init__(**kwargs) self.data_format = conv_utils.normalize_data_format(data_format) self.input_spec = InputSpec(min_ndim=2)
def build(self, input_shape): input_shape = tensor_shape.TensorShape(input_shape) if not input_shape.ndims: raise ValueError('Input has undefined rank:', input_shape) ndims = len(input_shape) # Convert axis to list and resolve negatives if isinstance(self.axis, int): self.axis = [self.axis] if not isinstance(self.axis, list): raise TypeError('axis must be int or list, type given: %s' % type(self.axis)) for idx, x in enumerate(self.axis): if x < 0: self.axis[idx] = ndims + x # Validate axes for x in self.axis: if x < 0 or x >= ndims: raise ValueError('Invalid axis: %d' % x) if len(self.axis) != len(set(self.axis)): raise ValueError('Duplicate axis: %s' % self.axis) if self.virtual_batch_size is not None: if self.virtual_batch_size <= 0: raise ValueError( 'virtual_batch_size must be a positive integer that ' 'divides the true batch size of the input Tensor') # If using virtual batches, the first dimension must be the batch # dimension and cannot be the batch norm axis if 0 in self.axis: raise ValueError( 'When using virtual_batch_size, the batch dimension ' 'must be 0 and thus axis cannot include 0') if self.adjustment is not None: raise ValueError( 'When using virtual_batch_size, adjustment cannot ' 'be specified') if self.fused: # Currently fused batch norm doesn't support renorm. It also only supports # an input tensor of rank 4 and a channel dimension on axis 1 or 3. # TODO(yaozhang): if input is not 4D, reshape it to 4D and reshape the # output back to its original shape accordingly. self.fused = (not self.renorm and ndims == 4 and self.axis in [[1], [3]] and self.virtual_batch_size is None and self.adjustment is None) # TODO(chrisying): fused batch norm is currently not supported for # multi-axis batch norm and by extension virtual batches. In some cases, # it might be possible to use fused batch norm but would require reshaping # the Tensor to 4D with the axis in 1 or 3 (preferred 1) which is # particularly tricky. A compromise might be to just support the most # common use case (turning 5D w/ virtual batch to NCHW) if self.fused: if self.axis == [1]: self._data_format = 'NCHW' elif self.axis == [3]: self._data_format = 'NHWC' else: raise ValueError( 'Unsupported axis, fused batch norm only supports ' 'axis == [1] or axis == [3]') # Raise parameters of fp16 batch norm to fp32 if self.dtype == dtypes.float16 or self.dtype == dtypes.bfloat16: param_dtype = dtypes.float32 else: param_dtype = self.dtype or dtypes.float32 axis_to_dim = {x: input_shape[x].value for x in self.axis} for x in axis_to_dim: if axis_to_dim[x] is None: raise ValueError( 'Input has undefined `axis` dimension. Input shape: ', input_shape) self.input_spec = InputSpec(ndim=ndims, axes=axis_to_dim) if len(axis_to_dim) == 1 and self.virtual_batch_size is None: # Single axis batch norm (most common/default use-case) param_shape = (list(axis_to_dim.values())[0], ) else: # Parameter shape is the original shape but with 1 in all non-axis dims param_shape = [ axis_to_dim[i] if i in axis_to_dim else 1 for i in range(ndims) ] if self.virtual_batch_size is not None: # When using virtual batches, add an extra dim at index 1 param_shape.insert(1, 1) for idx, x in enumerate(self.axis): self.axis[idx] = x + 1 # Account for added dimension if self.scale: self.gamma = self.add_variable(name='gamma', shape=param_shape, dtype=param_dtype, initializer=self.gamma_initializer, regularizer=self.gamma_regularizer, constraint=self.gamma_constraint, trainable=True) else: self.gamma = None if self.fused: self._gamma_const = array_ops.constant(1.0, dtype=param_dtype, shape=param_shape) if self.center: self.beta = self.add_variable(name='beta', shape=param_shape, dtype=param_dtype, initializer=self.beta_initializer, regularizer=self.beta_regularizer, constraint=self.beta_constraint, trainable=True) else: self.beta = None if self.fused: self._beta_const = array_ops.constant(0.0, dtype=param_dtype, shape=param_shape) try: # Disable variable partitioning when creating the moving mean and variance if hasattr(self, '_scope') and self._scope: partitioner = self._scope.partitioner self._scope.set_partitioner(None) else: partitioner = None self.moving_mean = self._add_tower_local_variable( name='moving_mean', shape=param_shape, dtype=param_dtype, initializer=self.moving_mean_initializer, trainable=False) self.moving_variance = self._add_tower_local_variable( name='moving_variance', shape=param_shape, dtype=param_dtype, initializer=self.moving_variance_initializer, trainable=False) if self.renorm: # Create variables to maintain the moving mean and standard deviation. # These are used in training and thus are different from the moving # averages above. The renorm variables are colocated with moving_mean # and moving_variance. # NOTE: below, the outer `with device` block causes the current device # stack to be cleared. The nested ones use a `lambda` to set the desired # device and ignore any devices that may be set by the custom getter. def _renorm_variable(name, shape): var = self._add_tower_local_variable( name=name, shape=shape, dtype=param_dtype, initializer=init_ops.zeros_initializer(), trainable=False) return var with distribute_lib.get_distribution_strategy( ).colocate_vars_with(self.moving_mean): self.renorm_mean = _renorm_variable( 'renorm_mean', param_shape) self.renorm_mean_weight = _renorm_variable( 'renorm_mean_weight', ()) # We initialize renorm_stddev to 0, and maintain the (0-initialized) # renorm_stddev_weight. This allows us to (1) mix the average # stddev with the minibatch stddev early in training, and (2) compute # the unbiased average stddev by dividing renorm_stddev by the weight. with distribute_lib.get_distribution_strategy( ).colocate_vars_with(self.moving_variance): self.renorm_stddev = _renorm_variable( 'renorm_stddev', param_shape) self.renorm_stddev_weight = _renorm_variable( 'renorm_stddev_weight', ()) finally: if partitioner: self._scope.set_partitioner(partitioner) self.built = True
def __init__(self, n, **kwargs): super(RepeatVector, self).__init__(**kwargs) self.n = n self.input_spec = InputSpec(ndim=2)
def __init__(self, **kwargs): super(Flatten, self).__init__(**kwargs) self.input_spec = InputSpec(min_ndim=2)
def __init__(self, dims, **kwargs): super(Permute, self).__init__(**kwargs) self.dims = tuple(dims) self.input_spec = InputSpec(ndim=len(self.dims) + 1)
def __init__(self, rate, **kwargs): super(SpatialDropout1D, self).__init__(rate, **kwargs) self.input_spec = InputSpec(ndim=3)
def __init__(self, data_format=None, **kwargs): super(_GlobalPooling3D, self).__init__(**kwargs) self.data_format = conv_utils.normalize_data_format(data_format) self.input_spec = InputSpec(ndim=5)
def __init__(self, **kwargs): super(_GlobalPooling1D, self).__init__(**kwargs) self.input_spec = InputSpec(ndim=3)