def compute_output_shape(self, input_shape): input_shape = tuple(tensor_shape.TensorShape(input_shape).as_list()) if self._output_shape is None: if context.executing_eagerly(): raise NotImplementedError x = K.placeholder(shape=input_shape) x = self.call(x) if isinstance(x, list): return [tensor_shape.TensorShape(K.int_shape(x_elem)) for x_elem in x] else: return tensor_shape.TensorShape(K.int_shape(x)) elif isinstance(self._output_shape, (tuple, list)): if isinstance(input_shape, list): num_samples = input_shape[0][0] else: num_samples = input_shape[0] if input_shape else None return tensor_shape.TensorShape((num_samples,) + tuple(self._output_shape)) else: shape = self._output_shape(input_shape) if not isinstance(shape, (list, tuple)): raise ValueError( '`output_shape` function must return a tuple or a list of tuples.') if isinstance(shape, list): if isinstance(shape[0], int) or shape[0] is None: shape = tuple(shape) return tensor_shape.TensorShape(shape)
def set_model(self, model): """Sets Keras model and creates summary ops.""" self.model = model self.sess = K.get_session() # only make histogram summary op if it hasn't already been made if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: mapped_weight_name = weight.name.replace(':', '_') tf_summary.histogram(mapped_weight_name, weight) if self.write_images: w_img = array_ops.squeeze(weight) shape = K.int_shape(w_img) if len(shape) == 2: # dense layer kernel case if shape[0] > shape[1]: w_img = array_ops.transpose(w_img) shape = K.int_shape(w_img) w_img = array_ops.reshape(w_img, [1, shape[0], shape[1], 1]) elif len(shape) == 3: # convnet case if K.image_data_format() == 'channels_last': # switch to channels_first to display # every kernel as a separate image w_img = array_ops.transpose(w_img, perm=[2, 0, 1]) shape = K.int_shape(w_img) w_img = array_ops.reshape(w_img, [shape[0], shape[1], shape[2], 1]) elif len(shape) == 1: # bias case w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1]) else: # not possible to handle 3D convnets etc. continue shape = K.int_shape(w_img) assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf_summary.image(mapped_weight_name, w_img) if self.write_grads: for weight in layer.trainable_weights: mapped_weight_name = weight.name.replace(':', '_') grads = model.optimizer.get_gradients(model.total_loss, weight) def is_indexed_slices(grad): return type(grad).__name__ == 'IndexedSlices' grads = [grad.values if is_indexed_slices(grad) else grad for grad in grads] tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads) if hasattr(layer, 'output'): tf_summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf_summary.merge_all() if self.write_graph: self.writer = self._writer_class(self.log_dir, self.sess.graph) else: self.writer = self._writer_class(self.log_dir)
def sparse_categorical_accuracy(y_true, y_pred): # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,) if (len(K.int_shape(y_true)) == len(K.int_shape(y_pred))): y_true = array_ops.squeeze(y_true, [-1]) y_pred = math_ops.argmax(y_pred, axis=-1) # If the expected labels are float, we need to cast the int returned by # argmax to compare. if K.dtype(y_true) == K.floatx(): y_pred = math_ops.cast(y_pred, K.floatx()) return math_ops.cast(math_ops.equal(y_true, y_pred), K.floatx())
def sparse_categorical_accuracy(y_true, y_pred): # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,) if (len(K.int_shape(y_true)) == len(K.int_shape(y_pred))): y_true = array_ops.squeeze(y_true, [-1]) y_pred = math_ops.argmax(y_pred, axis=-1) # If the predicted output and actual output types don't match, force cast them # to match. if K.dtype(y_pred) != K.dtype(y_true): y_pred = math_ops.cast(y_pred, K.dtype(y_true)) return math_ops.cast(math_ops.equal(y_true, y_pred), K.floatx())
def __call__(self, inputs, initial_state=None, constants=None, **kwargs): inputs, initial_state, constants = _standardize_args( inputs, initial_state, constants, self._num_constants) if initial_state is None and constants is None: return super(ConvRNN2D, self).__call__(inputs, **kwargs) # If any of `initial_state` or `constants` are specified and are Keras # tensors, then add them to the inputs and temporarily modify the # input_spec to include them. additional_inputs = [] additional_specs = [] if initial_state is not None: kwargs['initial_state'] = initial_state additional_inputs += initial_state self.state_spec = [] for state in initial_state: shape = K.int_shape(state) self.state_spec.append(InputSpec(shape=shape)) additional_specs += self.state_spec if constants is not None: kwargs['constants'] = constants additional_inputs += constants self.constants_spec = [InputSpec(shape=K.int_shape(constant)) for constant in constants] self._num_constants = len(constants) additional_specs += self.constants_spec # at this point additional_inputs cannot be empty for tensor in additional_inputs: if K.is_keras_tensor(tensor) != K.is_keras_tensor(additional_inputs[0]): raise ValueError('The initial state or constants of an RNN' ' layer cannot be specified with a mix of' ' Keras tensors and non-Keras tensors') if K.is_keras_tensor(additional_inputs[0]): # Compute the full input spec, including state and constants full_input = [inputs] + additional_inputs full_input_spec = self.input_spec + additional_specs # Perform the call with temporarily replaced input_spec original_input_spec = self.input_spec self.input_spec = full_input_spec output = super(ConvRNN2D, self).__call__(full_input, **kwargs) self.input_spec = original_input_spec return output else: return super(ConvRNN2D, self).__call__(inputs, **kwargs)
def compute_output_shape(self, input_shape): if self._output_shape is None: if context.executing_eagerly(): # Make use of existing autocomputation for Eager mode but provide # Lambda-specific error message. try: return super(Lambda, self).compute_output_shape(input_shape) except NotImplementedError: raise NotImplementedError('We could not automatically infer ' 'the static shape of the Lambda\'s output.' ' Please specify the `output_shape` for' ' this Lambda.') if isinstance(input_shape, list): x = [K.placeholder(shape=shape) for shape in input_shape] else: x = K.placeholder(shape=input_shape) x = self.call(x) if isinstance(x, list): return [tensor_shape.TensorShape(K.int_shape(x_elem)) for x_elem in x] else: return tensor_shape.TensorShape(K.int_shape(x)) elif isinstance(self._output_shape, (tuple, list)): if isinstance(input_shape, list): num_samples = input_shape[0][0] else: num_samples = input_shape[0] if input_shape else None # List here represents multiple outputs. if isinstance(self._output_shape, list): return [ tensor_shape.TensorShape((num_samples,) + tuple(single_shape)) for single_shape in self._output_shape ] return tensor_shape.TensorShape((num_samples,) + self._output_shape) else: shape = self._output_shape(input_shape) if not isinstance(shape, (list, tuple)): raise ValueError( '`output_shape` function must return a tuple or a list of tuples.') # List here can represent multiple outputs or single output. if isinstance(shape, list): # Convert list representing single output into a tuple. if isinstance(shape[0], (int, type(None))): shape = tuple(shape) else: return [ tensor_shape.TensorShape(single_shape) for single_shape in shape ] return tensor_shape.TensorShape(shape)
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) # momentum shapes = [K.int_shape(p) for p in params] moments = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + moments for p, g, m in zip(params, grads, moments): v = self.momentum * m - lr * g # velocity self.updates.append(state_ops.assign(m, v)) if self.nesterov: new_p = p + self.momentum * v - lr * g else: new_p = p + v # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] delta_accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators + delta_accumulators self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators): # update accumulator new_a = self.rho * a + (1. - self.rho) * math_ops.square(g) self.updates.append(state_ops.assign(a, new_a)) # use the new accumulator and the *old* delta_accumulator update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon) new_p = p - lr * update # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) # update delta_accumulator new_d_a = self.rho * d_a + (1 - self.rho) * math_ops.square(update) self.updates.append(state_ops.assign(d_a, new_d_a)) return self.updates
def opt_variable(value, dtype=None, name=None, constraint=None): """Instantiates a variable and returns it.""" if dtype is None: dtype = backend.floatx() variables = [] for i in range(num_replicas): # Keras holds the variables in optimizer class instance , so the name # does not matter here. ResourceVariable constructor will find a unique # name (including name=None) for each replica. with ops.device("device:TPU:{}".format(i)): v = resource_variable_ops.ResourceVariable( value, dtype=dtypes_module.as_dtype(dtype), name=name, constraint=constraint) variables.append(v) name = "replicate_{}_{}".format("variable" if name is None else name, ops.uid()) v = ReplicatedVariable(name, variables) # pylint: disable=protected-access if isinstance(value, np.ndarray): v._keras_shape = value.shape elif hasattr(value, "shape"): v._keras_shape = backend.int_shape(value) v._uses_learning_phase = False backend.track_variable(v) return v
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) for p, g, a in zip(params, grads, accumulators): new_a = a + math_ops.square(g) # update accumulator self.updates.append(state_ops.assign(a, new_a)) new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def _get_shape_tuple(self, init_tuple, tensor, start_idx, int_shape=None): """Finds non-specific dimensions in the static shapes. The static shapes are replaced with the corresponding dynamic shapes of the tensor. Arguments: init_tuple: a tuple, the first part of the output shape tensor: the tensor from which to get the (static and dynamic) shapes as the last part of the output shape start_idx: int, which indicate the first dimension to take from the static shape of the tensor int_shape: an alternative static shape to take as the last part of the output shape Returns: The new int_shape with the first part from init_tuple and the last part from either `int_shape` (if provided) or `tensor.shape`, where every `None` is replaced by the corresponding dimension from `tf.shape(tensor)`. """ # replace all None in int_shape by K.shape if int_shape is None: int_shape = K.int_shape(tensor)[start_idx:] if not any(not s for s in int_shape): return init_tuple + tuple(int_shape) shape = K.shape(tensor) int_shape = list(int_shape) for i, s in enumerate(int_shape): if not s: int_shape[i] = shape[start_idx + i] return init_tuple + tuple(int_shape)
def call(self, inputs, training=None, mask=None): kwargs = {} if generic_utils.has_arg(self.layer.call, 'training'): kwargs['training'] = training uses_learning_phase = False # pylint: disable=redefined-outer-name input_shape = K.int_shape(inputs) if input_shape[0]: # batch size matters, use rnn-based implementation def step(x, _): global uses_learning_phase # pylint: disable=global-variable-undefined output = self.layer.call(x, **kwargs) if hasattr(output, '_uses_learning_phase'): uses_learning_phase = (output._uses_learning_phase or uses_learning_phase) return output, [] _, outputs, _ = K.rnn( step, inputs, initial_states=[], input_length=input_shape[1], unroll=False) y = outputs else: # No batch size specified, therefore the layer will be able # to process batches of any size. # We can go with reshape-based implementation for performance. input_length = input_shape[1] if not input_length: input_length = array_ops.shape(inputs)[1] inner_input_shape = self._get_shape_tuple((-1,), inputs, 2) # Shape: (num_samples * timesteps, ...). And track the # transformation in self._input_map. input_uid = generic_utils.object_list_uid(inputs) inputs = array_ops.reshape(inputs, inner_input_shape) self._input_map[input_uid] = inputs # (num_samples * timesteps, ...) if generic_utils.has_arg(self.layer.call, 'mask') and mask is not None: inner_mask_shape = self._get_shape_tuple((-1,), mask, 2) kwargs['mask'] = K.reshape(mask, inner_mask_shape) y = self.layer.call(inputs, **kwargs) if hasattr(y, '_uses_learning_phase'): uses_learning_phase = y._uses_learning_phase # Shape: (num_samples, timesteps, ...) output_shape = self.compute_output_shape(input_shape).as_list() output_shape = self._get_shape_tuple( (-1, input_length), y, 1, output_shape[2:]) y = array_ops.reshape(y, output_shape) # Apply activity regularizer if any: if (hasattr(self.layer, 'activity_regularizer') and self.layer.activity_regularizer is not None): regularization_loss = self.layer.activity_regularizer(y) self.add_loss(regularization_loss, inputs) if uses_learning_phase: y._uses_learning_phase = True return y
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [] lr = self.lr if self.initial_decay > 0: lr = lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]): t = math_ops.cast(self.iterations, K.floatx()) lr_t = lr * ( K.sqrt(1. - math_ops.pow(self.beta_2, t)) / (1. - math_ops.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g) if self.amsgrad: vhat_t = math_ops.maximum(vhat, v_t) p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) self.updates.append(state_ops.assign(vhat, vhat_t)) else: p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(state_ops.assign(m, m_t)) self.updates.append(state_ops.assign(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def call(self, inputs, mask=None, training=None, initial_state=None): # GRU does not support constants. Ignore it during process. inputs, initial_state, _ = self._process_inputs(inputs, initial_state, None) if isinstance(mask, list): mask = mask[0] input_shape = K.int_shape(inputs) timesteps = input_shape[0] if self.time_major else input_shape[1] if not self.could_use_cudnn: # CuDNN does not support masking, fall back to use the normal GRU. kwargs = {'training': training} def step(cell_inputs, cell_states): return self.cell.call(cell_inputs, cell_states, **kwargs) last_output, outputs, states = K.rnn( step, inputs, initial_state, constants=None, go_backwards=self.go_backwards, mask=mask, unroll=self.unroll, input_length=timesteps, time_major=self.time_major, zero_output_for_mask=self.zero_output_for_mask) # This is a dummy tensor for testing purpose. runtime = _runtime('unknown') else: last_output, outputs, runtime, states = self._defun_gru_call( inputs, initial_state, training, mask) if self.stateful: updates = [state_ops.assign(self.states[0], states[0])] self.add_update(updates, inputs) if self.return_sequences: output = outputs else: output = last_output if self.return_state: return [output] + list(states) elif self._return_runtime: return output, runtime else: return output
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [] with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]): t = math_ops.cast(self.iterations, K.floatx()) # Due to the recommendations in [2], i.e. warming momentum schedule momentum_cache_t = self.beta_1 * ( 1. - 0.5 * (math_ops.pow(K.cast_to_floatx(0.96), t * self.schedule_decay))) momentum_cache_t_1 = self.beta_1 * ( 1. - 0.5 * (math_ops.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay))) m_schedule_new = self.m_schedule * momentum_cache_t m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1 self.updates.append((self.m_schedule, m_schedule_new)) shapes = [K.int_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations, self.m_schedule] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): # the following equations given in [1] g_prime = g / (1. - m_schedule_new) m_t = self.beta_1 * m + (1. - self.beta_1) * g m_t_prime = m_t / (1. - m_schedule_next) v_t = self.beta_2 * v + (1. - self.beta_2) * math_ops.square(g) v_t_prime = v_t / (1. - math_ops.pow(self.beta_2, t)) m_t_bar = (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime self.updates.append(state_ops.assign(m, m_t)) self.updates.append(state_ops.assign(v, v_t)) p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def transition_block(x, reduction, name): """A transition block. Arguments: x: input tensor. reduction: float, compression rate at transition layers. name: string, block label. Returns: output tensor for the block. """ bn_axis = 3 if K.image_data_format() == 'channels_last' else 1 x = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_bn')(x) x = Activation('relu', name=name + '_relu')(x) x = Conv2D( int(K.int_shape(x)[bn_axis] * reduction), 1, use_bias=False, name=name + '_conv')( x) x = AveragePooling2D(2, strides=2, name=name + '_pool')(x) return x
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [] lr = self.lr if self.initial_decay > 0: lr = lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]): t = math_ops.cast(self.iterations, K.floatx()) lr_t = lr / (1. - math_ops.pow(self.beta_1, t)) shapes = [K.int_shape(p) for p in params] # zero init of 1st moment ms = [K.zeros(shape) for shape in shapes] # zero init of exponentially weighted infinity norm us = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + us for p, g, m, u in zip(params, grads, ms, us): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g u_t = math_ops.maximum(self.beta_2 * u, math_ops.abs(g)) p_t = p - lr_t * m_t / (u_t + self.epsilon) self.updates.append(state_ops.assign(m, m_t)) self.updates.append(state_ops.assign(u, u_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def normal_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias, units, activation, recurrent_activation): input_shape = K.int_shape(inputs) timesteps = input_shape[1] def step(cell_inputs, cell_states): h_tm1 = cell_states[0] # previous memory state c_tm1 = cell_states[1] # previous carry state # Only use the second half of the bias weights. _, real_bias = array_ops.split(bias, 2) z = K.dot(cell_inputs, kernel) z += K.dot(h_tm1, recurrent_kernel) z = K.bias_add(z, real_bias) z0 = z[:, :units] z1 = z[:, units:2 * units] z2 = z[:, 2 * units:3 * units] z3 = z[:, 3 * units:] i = recurrent_activation(z0) f = recurrent_activation(z1) c = f * c_tm1 + i * activation(z2) o = recurrent_activation(z3) h = o * activation(c) return h, [h, c] _, outputs, new_states = K.rnn( step, inputs, [init_h, init_c], constants=None, unroll=False, input_length=timesteps) return outputs, new_states, constant_op.constant( 'cpu', dtype=dtypes.string, name='runtime')
def _eager_metrics_fn(model, outputs, targets): """Calculates the metrics for each output of the given model. Arguments: model: The model on which metrics are being calculated. outputs: The outputs of the given model. targets: The predictions or targets of the given model. Returns: Returns the metric names and metric results for each output of the model. """ metric_names = [] metric_results = [] if not isinstance(outputs, list): outputs = [outputs] if not isinstance(targets, list): targets = [targets] for i in range(len(model.outputs)): output_metrics = model.nested_metrics[i] for nested_output_metric in output_metrics: metric_name, metric_fn = _get_metrics_info( nested_output_metric, backend.int_shape(model.outputs[i]), model.loss_functions[i]) if len(model.output_names) > 1: metric_name = model.output_names[i] + '_' + metric_name if metric_name not in model.metrics_names: model.metrics_names.append(metric_name) with backend.name_scope(metric_name): metric_result = metric_fn(targets[i], outputs[i]) metric_names.append(metric_name) metric_results.append(backend.mean(metric_result)) return metric_results
def call(self, inputs, training=None): input_shape = K.int_shape(inputs) reduction_axes = list(range(0, len(input_shape))) if self.axis is not None: del reduction_axes[self.axis] del reduction_axes[0] mean = K.mean(inputs, reduction_axes, keepdims=True) stddev = K.std(inputs, reduction_axes, keepdims=True) + self.epsilon normed = (inputs - mean) / stddev broadcast_shape = [1] * len(input_shape) if self.axis is not None: broadcast_shape[self.axis] = input_shape[self.axis] if self.scale: broadcast_gamma = K.reshape(self.gamma, broadcast_shape) normed = normed * broadcast_gamma if self.center: broadcast_beta = K.reshape(self.beta, broadcast_shape) normed = normed + broadcast_beta return normed
def test_get_complex_value_gradients(input_layer, batch_size, conjugate_gradients): with DEFAULT_TF_GRAPH.as_default(): machine = Linear(input_layer) model = Model(inputs=[input_layer], outputs=machine.predictions) if tensorflow.__version__ >= '1.14': optimizer = ComplexValuesOptimizer(model, machine.predictions_jacobian, name='optimizer') else: optimizer = ComplexValuesOptimizer(model, machine.predictions_jacobian) loss = Multiply()([machine.predictions, machine.predictions]) manual_gradients_layer = Lambda( lambda x: tensorflow.reshape(tensorflow.reduce_sum(2.0 * x[0] * x[1], axis=0), machine.dense_layer.kernel.shape)) \ ([machine.predictions, machine.manual_jacobian]) if conjugate_gradients: manual_gradients_layer = Lambda(lambda x: tensorflow.conj(x))( manual_gradients_layer) manual_gradients_function = K.function( inputs=[input_layer], outputs=[manual_gradients_layer]) complex_value_gradients_layer = Lambda( lambda x: optimizer.get_model_parameters_complex_value_gradients( tensorflow.real(x), conjugate_gradients=conjugate_gradients))( loss) complex_value_gradients_function = K.function( inputs=[input_layer], outputs=[complex_value_gradients_layer]) sample = numpy.random.choice( 2, (batch_size, ) + K.int_shape(input_layer)[1:]) * 2 - 1 complex_value_gradients = complex_value_gradients_function([sample])[0] manual_gradients = manual_gradients_function([sample])[0] diff_norm = numpy.linalg.norm(complex_value_gradients - manual_gradients) gradients_norm = numpy.linalg.norm(manual_gradients) assert (diff_norm / gradients_norm) < 1e-5
def normal_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias, units): input_shape = K.int_shape(inputs) timesteps = input_shape[1] def step(cell_inputs, cell_states): h_tm1 = cell_states[0] # previous memory state c_tm1 = cell_states[1] # previous carry state # Only use the second half of the bias weights. _, real_bias = array_ops.split(bias, 2) z = K.dot(cell_inputs, kernel) z += K.dot(h_tm1, recurrent_kernel) z = K.bias_add(z, real_bias) z0 = z[:, :units] z1 = z[:, units:2 * units] z2 = z[:, 2 * units:3 * units] z3 = z[:, 3 * units:] i = activations.get('hard_sigmoid')(z0) f = activations.get('hard_sigmoid')(z1) c = f * c_tm1 + i * activations.get('tanh')(z2) o = activations.get('hard_sigmoid')(z3) h = o * activations.get('tanh')(c) return h, [h, c] _, outputs, new_states = K.rnn( step, inputs, [init_h, init_c], constants=None, unroll=False, input_length=timesteps) return outputs, new_states, constant_op.constant( 'cpu', dtype=dtypes.string, name='runtime')
def transition_block(x, reduction, name): """A transition block. Arguments: x: input tensor. reduction: float, compression rate at transition layers. name: string, block label. Returns: output tensor for the block. """ bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 x = layers.BatchNormalization( axis=bn_axis, epsilon=1.001e-5, name=name + '_bn')( x) x = layers.Activation('relu', name=name + '_relu')(x) x = layers.Conv2D( int(backend.int_shape(x)[bn_axis] * reduction), 1, use_bias=False, name=name + '_conv')( x) x = layers.AveragePooling2D(2, strides=2, name=name + '_pool')(x) return x
def call(self, inputs): if self.conv_singular: sigma, u_bar = max_singular_val_for_convolution( self.kernel, self.u, fully_differentiable=self.fully_diff_spectral, ip=self.spectral_iterations, padding=self.padding, strides=self.strides, data_format=self.data_format) kernel_sn = self.kernel / sigma self.add_update(K.update(self.u, u_bar)) else: kernel_shape = K.int_shape(self.kernel) w = K.reshape(self.kernel, (kernel_shape[0] * kernel_shape[1] * kernel_shape[2], kernel_shape[3])) sigma, u_bar = max_singular_val( w, self.u, fully_differentiable=self.fully_diff_spectral, ip=self.spectral_iterations) w_sn = w / sigma kernel_sn = K.reshape(w_sn, kernel_shape) self.add_update(K.update(self.u, u_bar)) kernel = self.kernel self.kernel = kernel_sn outputs = super(SNConv2D, self).call(inputs) self.kernel = kernel return outputs
def transition_block(x, reduction, name): """A transition block. # Arguments x: input tensor. reduction: float, compression rate at transition layers. name: string, block label. # Returns output tensor for the block. """ bn_axis = 3 x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_bn')(x) x = layers.Activation('relu', name=name + '_relu')(x) x = layers.Conv2D(int(backend.int_shape(x)[bn_axis] * reduction), 1, use_bias=False, kernel_regularizer=regularizers.l2(l2_reg), name=name + '_conv')(x) conv_before_pool = x x = layers.AveragePooling2D(2, strides=2, name=name + '_pool')(x) return conv_before_pool, x
def call(self, inputs, **kwargs): outputs = inputs for i in range(self.depth): if self.use_bias: outputs = outputs + self.conv_biases[i] outputs = self.conv_layers[i](outputs) if i < (self.depth - 1): if self.activation is not None: if self.use_bias: outputs = outputs + self.activation_biases[i] outputs = self.activation(outputs) if self.use_projection(backend.int_shape(inputs)): inputs = self.projection_layer(inputs) # x_k+1 = x_k + a*f(x_k) + b outputs = outputs * self.residual_multiplier if self.use_bias: outputs = outputs + self.residual_bias outputs = inputs + outputs outputs = self.activation(outputs) return outputs
def call(self, x): """Feedforward :param x: [inputs, targets] :return: hidden tensor """ inputs = x[0] targets = x[1] pred_g = self.g_network_decoder(self.g_network_encoder(inputs)) # residual r = Lambda((lambda x: x[1] - x[0]))([pred_g, targets]) out_dim = K.int_shape(self.phi_network_conv(r)) # shape=(?, 64, 7, 7) z = self.phi_network_fc( K.reshape( self.phi_network_conv(r), (self.opt.batch_size, out_dim[1] * out_dim[2] * out_dim[3]))) z = K.reshape(z, (self.opt.batch_size, self.opt.n_latent)) z_emb = self.encoder_latent(z) z_emb = K.reshape(z_emb, (self.opt.batch_size, self.opt.nfeature, 1, 1)) s = self.f_network_encoder(inputs) return Lambda( (lambda x: tf.math.add(x[0], x[1])))([s, z_emb ]) # tf.math.add : broadcast
def bregnet_module(X, num_filters, block_name, module_name, alpha=1.0): main = Conv2D(filters=num_filters, kernel_size=(3, 3), padding='same', kernel_initializer='he_normal', name='conv_{}_{}_1'.format(block_name, module_name))(X) main = ELU(alpha=alpha, name='elu_{}_{}_1'.format(block_name, module_name))(main) main = Conv2D(filters=num_filters, kernel_size=(3, 3), padding='same', kernel_initializer='he_normal', name='conv_{}_{}_2'.format(block_name, module_name))(main) main = BatchNormalization( name='bn_{}_{}'.format(block_name, module_name))(main) if K.int_shape(X)[-1] != num_filters: X = Conv2D(filters=num_filters, kernel_size=(1, 1), padding='same', kernel_initializer='he_normal', name='conv_{}_{}_skip'.format(block_name, module_name))(X) shortcut = Lambda(residual_connection)(X) out = Add()([main, shortcut]) return out
def call(self, inputs): data_format = conv_utils.convert_data_format(self.data_format, self.rank + 2) inputs, tf_data_format = K._preprocess_conv2d_input(inputs, self.data_format) inputs = tf.image.extract_patches( inputs, sizes=(1,) + K.int_shape(self.kernel)[:2] + (1,), strides=(1,) + self.strides + (1,), rates=(1,) + self.dilation_rate + (1,), padding=self.padding.upper(), ) kernel = K.reshape(self.kernel, (-1, self.filters)) outputs = self.kernel_function([inputs, kernel]) if self.data_format == 'channels_first': outputs = K.permute_dimensions(outputs, (0, 3, 1, 2)) if self.use_bias: outputs = nn.bias_add(outputs, self.bias, data_format=data_format) if self.activation is not None: outputs = self.activation(outputs) return outputs
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) for p, g, a in zip(params, grads, accumulators): new_a = a + math_ops.square(g) # update accumulator self.updates.append(state_ops.assign(a, new_a)) new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) t = math_ops.cast(self.iterations, K.floatx()) + 1 lr_t = lr / (1. - math_ops.pow(self.beta_1, t)) shapes = [K.int_shape(p) for p in params] # zero init of 1st moment ms = [K.zeros(shape) for shape in shapes] # zero init of exponentially weighted infinity norm us = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + us for p, g, m, u in zip(params, grads, ms, us): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g u_t = math_ops.maximum(self.beta_2 * u, math_ops.abs(g)) p_t = p - lr_t * m_t / (u_t + self.epsilon) self.updates.append(state_ops.assign(m, m_t)) self.updates.append(state_ops.assign(u, u_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def call(self, x, training=None): def outputs_inference(): # Apply truncation trick according to cutoff. num_layers = K.int_shape(x)[1] if self.cutoff is not None: beta = Ke.where( np.arange(num_layers)[np.newaxis, :, np.newaxis] < self.cutoff, self.psi * np.ones(shape=(1, num_layers, 1), dtype=np.float32), np.ones(shape=(1, num_layers, 1), dtype=np.float32)) #? else: beta = np.ones(shape=(1, num_layers, 1), dtype=np.float32) return self.moving_mean + (x - self.moving_mean) * beta #? # Update moving average. mean = K.mean(x[:, 0], axis=0) #? x_moving_mean = K.moving_average_update(self.moving_mean, mean, self.momentum) #? add_update? # Apply truncation trick according to cutoff. num_layers = K.int_shape(x)[1] if self.cutoff is not None: beta = Ke.where( np.arange(num_layers)[np.newaxis, :, np.newaxis] < self.cutoff, self.psi * np.ones(shape=(1, num_layers, 1), dtype=np.float32), np.ones(shape=(1, num_layers, 1), dtype=np.float32)) #? else: beta = np.ones(shape=(1, num_layers, 1), dtype=np.float32) outputs = x_moving_mean + (x - self.moving_mean) * beta #? return K.in_train_phase(outputs, outputs_inference, training=training)
def __call__(self, inputs, initial_state=None, constants=None, **kwargs): """`Bidirectional.__call__` implements the same API as the wrapped `RNN`.""" inputs, initial_state, constants = _standardize_args( inputs, initial_state, constants, self._num_constants) if isinstance(inputs, list): if len(inputs) > 1: initial_state = inputs[1:] inputs = inputs[0] if initial_state is None and constants is None: return super(Bidirectional, self).__call__(inputs, **kwargs) # Applies the same workaround as in `RNN.__call__` additional_inputs = [] additional_specs = [] if initial_state is not None: # Check if `initial_state` can be splitted into half num_states = len(initial_state) if num_states % 2 > 0: raise ValueError( 'When passing `initial_state` to a Bidirectional RNN, ' 'the state should be a list containing the states of ' 'the underlying RNNs. ' 'Found: ' + str(initial_state)) kwargs['initial_state'] = initial_state additional_inputs += initial_state state_specs = [ InputSpec(shape=K.int_shape(state)) for state in initial_state ] self.forward_layer.state_spec = state_specs[:num_states // 2] self.backward_layer.state_spec = state_specs[num_states // 2:] additional_specs += state_specs if constants is not None: kwargs['constants'] = constants additional_inputs += constants constants_spec = [ InputSpec(shape=K.int_shape(constant)) for constant in constants ] self.forward_layer.constants_spec = constants_spec self.backward_layer.constants_spec = constants_spec additional_specs += constants_spec self._num_constants = len(constants) self.forward_layer._num_constants = self._num_constants self.backward_layer._num_constants = self._num_constants is_keras_tensor = K.is_keras_tensor(additional_inputs[0]) for tensor in additional_inputs: if K.is_keras_tensor(tensor) != is_keras_tensor: raise ValueError('The initial state of a Bidirectional' ' layer cannot be specified with a mix of' ' Keras tensors and non-Keras tensors' ' (a "Keras tensor" is a tensor that was' ' returned by a Keras layer, or by `Input`)') if is_keras_tensor: # Compute the full input spec, including state full_input = [inputs] + additional_inputs full_input_spec = self.input_spec + additional_specs # Perform the call with temporarily replaced input_spec original_input_spec = self.input_spec self.input_spec = full_input_spec output = super(Bidirectional, self).__call__(full_input, **kwargs) self.input_spec = original_input_spec return output else: return super(Bidirectional, self).__call__(inputs, **kwargs)
def preprocess_weights_for_loading(layer, weights, original_keras_version=None, original_backend=None): """Preprocess layer weights between different Keras formats. Converts layers weights from Keras 1 format to Keras 2 and also weights of CuDNN layers in Keras 2. Arguments: layer: Layer instance. weights: List of weights values (Numpy arrays). original_keras_version: Keras version for the weights, as a string. original_backend: Keras backend the weights were trained with, as a string. Returns: A list of weights values (Numpy arrays). """ def convert_nested_bidirectional(weights): """Converts layers nested in `Bidirectional` wrapper. This function uses `preprocess_weights_for_loading()` for converting layers. Arguments: weights: List of weights values (Numpy arrays). Returns: A list of weights values (Numpy arrays). """ num_weights_per_layer = len(weights) // 2 forward_weights = preprocess_weights_for_loading( layer.forward_layer, weights[:num_weights_per_layer], original_keras_version, original_backend) backward_weights = preprocess_weights_for_loading( layer.backward_layer, weights[num_weights_per_layer:], original_keras_version, original_backend) return forward_weights + backward_weights def convert_nested_time_distributed(weights): """Converts layers nested in `TimeDistributed` wrapper. This function uses `preprocess_weights_for_loading()` for converting nested layers. Arguments: weights: List of weights values (Numpy arrays). Returns: A list of weights values (Numpy arrays). """ return preprocess_weights_for_loading( layer.layer, weights, original_keras_version, original_backend) def convert_nested_model(weights): """Converts layers nested in `Model` or `Sequential`. This function uses `preprocess_weights_for_loading()` for converting nested layers. Arguments: weights: List of weights values (Numpy arrays). Returns: A list of weights values (Numpy arrays). """ new_weights = [] # trainable weights for sublayer in layer.layers: num_weights = len(sublayer.trainable_weights) if num_weights > 0: new_weights.extend(preprocess_weights_for_loading( layer=sublayer, weights=weights[:num_weights], original_keras_version=original_keras_version, original_backend=original_backend)) weights = weights[num_weights:] # non-trainable weights for sublayer in layer.layers: num_weights = len([l for l in sublayer.weights if l not in sublayer.trainable_weights]) if num_weights > 0: new_weights.extend(preprocess_weights_for_loading( layer=sublayer, weights=weights[:num_weights], original_keras_version=original_keras_version, original_backend=original_backend)) weights = weights[num_weights:] return new_weights # Convert layers nested in Bidirectional/Model/Sequential. # Both transformation should be ran for both Keras 1->2 conversion # and for conversion of CuDNN layers. if layer.__class__.__name__ == 'Bidirectional': weights = convert_nested_bidirectional(weights) if layer.__class__.__name__ == 'TimeDistributed': weights = convert_nested_time_distributed(weights) elif layer.__class__.__name__ in ['Model', 'Sequential']: weights = convert_nested_model(weights) if original_keras_version == '1': if layer.__class__.__name__ == 'TimeDistributed': weights = preprocess_weights_for_loading( layer.layer, weights, original_keras_version, original_backend) if layer.__class__.__name__ == 'Conv1D': shape = weights[0].shape # Handle Keras 1.1 format if shape[:2] != (layer.kernel_size[0], 1) or shape[3] != layer.filters: # Legacy shape: # (filters, input_dim, filter_length, 1) assert shape[0] == layer.filters and shape[2:] == (layer.kernel_size[0], 1) weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) weights[0] = weights[0][:, 0, :, :] if layer.__class__.__name__ == 'Conv2D': if layer.data_format == 'channels_first': # old: (filters, stack_size, kernel_rows, kernel_cols) # new: (kernel_rows, kernel_cols, stack_size, filters) weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) if layer.__class__.__name__ == 'Conv2DTranspose': if layer.data_format == 'channels_last': # old: (kernel_rows, kernel_cols, stack_size, filters) # new: (kernel_rows, kernel_cols, filters, stack_size) weights[0] = np.transpose(weights[0], (0, 1, 3, 2)) if layer.data_format == 'channels_first': # old: (filters, stack_size, kernel_rows, kernel_cols) # new: (kernel_rows, kernel_cols, filters, stack_size) weights[0] = np.transpose(weights[0], (2, 3, 0, 1)) if layer.__class__.__name__ == 'Conv3D': if layer.data_format == 'channels_first': # old: (filters, stack_size, ...) # new: (..., stack_size, filters) weights[0] = np.transpose(weights[0], (2, 3, 4, 1, 0)) if layer.__class__.__name__ == 'GRU': if len(weights) == 9: kernel = np.concatenate([weights[0], weights[3], weights[6]], axis=-1) recurrent_kernel = np.concatenate( [weights[1], weights[4], weights[7]], axis=-1) bias = np.concatenate([weights[2], weights[5], weights[8]], axis=-1) weights = [kernel, recurrent_kernel, bias] if layer.__class__.__name__ == 'LSTM': if len(weights) == 12: # old: i, c, f, o # new: i, f, c, o kernel = np.concatenate( [weights[0], weights[6], weights[3], weights[9]], axis=-1) recurrent_kernel = np.concatenate( [weights[1], weights[7], weights[4], weights[10]], axis=-1) bias = np.concatenate( [weights[2], weights[8], weights[5], weights[11]], axis=-1) weights = [kernel, recurrent_kernel, bias] if layer.__class__.__name__ == 'ConvLSTM2D': if len(weights) == 12: kernel = np.concatenate( [weights[0], weights[6], weights[3], weights[9]], axis=-1) recurrent_kernel = np.concatenate( [weights[1], weights[7], weights[4], weights[10]], axis=-1) bias = np.concatenate( [weights[2], weights[8], weights[5], weights[11]], axis=-1) if layer.data_format == 'channels_first': # old: (filters, stack_size, kernel_rows, kernel_cols) # new: (kernel_rows, kernel_cols, stack_size, filters) kernel = np.transpose(kernel, (2, 3, 1, 0)) recurrent_kernel = np.transpose(recurrent_kernel, (2, 3, 1, 0)) weights = [kernel, recurrent_kernel, bias] conv_layers = ['Conv1D', 'Conv2D', 'Conv3D', 'Conv2DTranspose', 'ConvLSTM2D'] if layer.__class__.__name__ in conv_layers: if original_backend == 'theano': weights[0] = conv_utils.convert_kernel(weights[0]) if layer.__class__.__name__ == 'ConvLSTM2D': weights[1] = conv_utils.convert_kernel(weights[1]) if K.int_shape(layer.weights[0]) != weights[0].shape: weights[0] = np.transpose(weights[0], (3, 2, 0, 1)) if layer.__class__.__name__ == 'ConvLSTM2D': weights[1] = np.transpose(weights[1], (3, 2, 0, 1)) # convert CuDNN layers return _convert_rnn_weights(layer, weights)
def preprocess_weights_for_loading(layer, weights, original_keras_version=None, original_backend=None): """Preprocess layer weights between different Keras formats. Converts layers weights from Keras 1 format to Keras 2 and also weights of CuDNN layers in Keras 2. Arguments: layer: Layer instance. weights: List of weights values (Numpy arrays). original_keras_version: Keras version for the weights, as a string. original_backend: Keras backend the weights were trained with, as a string. Returns: A list of weights values (Numpy arrays). """ def convert_nested_bidirectional(weights): """Converts layers nested in `Bidirectional` wrapper. This function uses `preprocess_weights_for_loading()` for converting layers. Arguments: weights: List of weights values (Numpy arrays). Returns: A list of weights values (Numpy arrays). """ num_weights_per_layer = len(weights) // 2 forward_weights = preprocess_weights_for_loading( layer.forward_layer, weights[:num_weights_per_layer], original_keras_version, original_backend) backward_weights = preprocess_weights_for_loading( layer.backward_layer, weights[num_weights_per_layer:], original_keras_version, original_backend) return forward_weights + backward_weights def convert_nested_time_distributed(weights): """Converts layers nested in `TimeDistributed` wrapper. This function uses `preprocess_weights_for_loading()` for converting nested layers. Arguments: weights: List of weights values (Numpy arrays). Returns: A list of weights values (Numpy arrays). """ return preprocess_weights_for_loading(layer.layer, weights, original_keras_version, original_backend) def convert_nested_model(weights): """Converts layers nested in `Model` or `Sequential`. This function uses `preprocess_weights_for_loading()` for converting nested layers. Arguments: weights: List of weights values (Numpy arrays). Returns: A list of weights values (Numpy arrays). """ trainable_weights = weights[:len(layer.trainable_weights)] non_trainable_weights = weights[len(layer.trainable_weights):] new_trainable_weights = [] new_non_trainable_weights = [] for sublayer in layer.layers: num_trainable_weights = len(sublayer.trainable_weights) num_non_trainable_weights = len(sublayer.non_trainable_weights) if sublayer.weights: preprocessed = preprocess_weights_for_loading( layer=sublayer, weights=( trainable_weights[:num_trainable_weights] + non_trainable_weights[:num_non_trainable_weights]), original_keras_version=original_keras_version, original_backend=original_backend) new_trainable_weights.extend( preprocessed[:num_trainable_weights]) new_non_trainable_weights.extend( preprocessed[num_trainable_weights:]) trainable_weights = trainable_weights[num_trainable_weights:] non_trainable_weights = non_trainable_weights[ num_non_trainable_weights:] return new_trainable_weights + new_non_trainable_weights # Convert layers nested in Bidirectional/Model/Sequential. # Both transformation should be ran for both Keras 1->2 conversion # and for conversion of CuDNN layers. if layer.__class__.__name__ == 'Bidirectional': weights = convert_nested_bidirectional(weights) if layer.__class__.__name__ == 'TimeDistributed': weights = convert_nested_time_distributed(weights) elif layer.__class__.__name__ in ['Model', 'Sequential']: weights = convert_nested_model(weights) if original_keras_version == '1': if layer.__class__.__name__ == 'TimeDistributed': weights = preprocess_weights_for_loading(layer.layer, weights, original_keras_version, original_backend) if layer.__class__.__name__ == 'Conv1D': shape = weights[0].shape # Handle Keras 1.1 format if shape[:2] != (layer.kernel_size[0], 1) or shape[3] != layer.filters: # Legacy shape: # (filters, input_dim, filter_length, 1) assert shape[0] == layer.filters and shape[2:] == ( layer.kernel_size[0], 1) weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) weights[0] = weights[0][:, 0, :, :] if layer.__class__.__name__ == 'Conv2D': if layer.data_format == 'channels_first': # old: (filters, stack_size, kernel_rows, kernel_cols) # new: (kernel_rows, kernel_cols, stack_size, filters) weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) if layer.__class__.__name__ == 'Conv2DTranspose': if layer.data_format == 'channels_last': # old: (kernel_rows, kernel_cols, stack_size, filters) # new: (kernel_rows, kernel_cols, filters, stack_size) weights[0] = np.transpose(weights[0], (0, 1, 3, 2)) if layer.data_format == 'channels_first': # old: (filters, stack_size, kernel_rows, kernel_cols) # new: (kernel_rows, kernel_cols, filters, stack_size) weights[0] = np.transpose(weights[0], (2, 3, 0, 1)) if layer.__class__.__name__ == 'Conv3D': if layer.data_format == 'channels_first': # old: (filters, stack_size, ...) # new: (..., stack_size, filters) weights[0] = np.transpose(weights[0], (2, 3, 4, 1, 0)) if layer.__class__.__name__ == 'GRU': if len(weights) == 9: kernel = np.concatenate([weights[0], weights[3], weights[6]], axis=-1) recurrent_kernel = np.concatenate( [weights[1], weights[4], weights[7]], axis=-1) bias = np.concatenate([weights[2], weights[5], weights[8]], axis=-1) weights = [kernel, recurrent_kernel, bias] if layer.__class__.__name__ == 'LSTM': if len(weights) == 12: # old: i, c, f, o # new: i, f, c, o kernel = np.concatenate( [weights[0], weights[6], weights[3], weights[9]], axis=-1) recurrent_kernel = np.concatenate( [weights[1], weights[7], weights[4], weights[10]], axis=-1) bias = np.concatenate( [weights[2], weights[8], weights[5], weights[11]], axis=-1) weights = [kernel, recurrent_kernel, bias] if layer.__class__.__name__ == 'ConvLSTM2D': if len(weights) == 12: kernel = np.concatenate( [weights[0], weights[6], weights[3], weights[9]], axis=-1) recurrent_kernel = np.concatenate( [weights[1], weights[7], weights[4], weights[10]], axis=-1) bias = np.concatenate( [weights[2], weights[8], weights[5], weights[11]], axis=-1) if layer.data_format == 'channels_first': # old: (filters, stack_size, kernel_rows, kernel_cols) # new: (kernel_rows, kernel_cols, stack_size, filters) kernel = np.transpose(kernel, (2, 3, 1, 0)) recurrent_kernel = np.transpose(recurrent_kernel, (2, 3, 1, 0)) weights = [kernel, recurrent_kernel, bias] conv_layers = [ 'Conv1D', 'Conv2D', 'Conv3D', 'Conv2DTranspose', 'ConvLSTM2D' ] if layer.__class__.__name__ in conv_layers: if original_backend == 'theano': weights[0] = conv_utils.convert_kernel(weights[0]) if layer.__class__.__name__ == 'ConvLSTM2D': weights[1] = conv_utils.convert_kernel(weights[1]) if K.int_shape(layer.weights[0]) != weights[0].shape: weights[0] = np.transpose(weights[0], (3, 2, 0, 1)) if layer.__class__.__name__ == 'ConvLSTM2D': weights[1] = np.transpose(weights[1], (3, 2, 0, 1)) # convert CuDNN layers return _convert_rnn_weights(layer, weights)
def call(self, inputs, training=None, mask=None): kwargs = {} if generic_utils.has_arg(self.layer.call, 'training'): kwargs['training'] = training input_shape = K.int_shape(inputs) if input_shape[0] and not self._always_use_reshape: inputs, row_lengths = K.convert_inputs_if_ragged(inputs) is_ragged_input = row_lengths is not None # batch size matters, use rnn-based implementation def step(x, _): output = self.layer(x, **kwargs) return output, [] _, outputs, _ = K.rnn(step, inputs, initial_states=[], input_length=row_lengths[0] if is_ragged_input else input_shape[1], mask=mask, unroll=False) y = K.maybe_convert_to_ragged(is_ragged_input, outputs, row_lengths) else: # No batch size specified, therefore the layer will be able # to process batches of any size. # We can go with reshape-based implementation for performance. if isinstance(inputs, ragged_tensor.RaggedTensor): y = self.layer(inputs.values, **kwargs) y = ragged_tensor.RaggedTensor.from_row_lengths( y, inputs.nested_row_lengths()[0]) else: input_length = input_shape[1] if not input_length: input_length = array_ops.shape(inputs)[1] inner_input_shape = self._get_shape_tuple((-1, ), inputs, 2) # Shape: (num_samples * timesteps, ...). And track the # transformation in self._input_map. inputs = array_ops.reshape(inputs, inner_input_shape) # (num_samples * timesteps, ...) if generic_utils.has_arg(self.layer.call, 'mask') and mask is not None: inner_mask_shape = self._get_shape_tuple((-1, ), mask, 2) kwargs['mask'] = K.reshape(mask, inner_mask_shape) y = self.layer(inputs, **kwargs) # Shape: (num_samples, timesteps, ...) output_shape = self.compute_output_shape(input_shape).as_list() output_shape = self._get_shape_tuple((-1, input_length), y, 1, output_shape[2:]) y = array_ops.reshape(y, output_shape) if not context.executing_eagerly(): # Set the static shape for the result since it might be lost during # array_ops reshape, eg, some `None` dim in the result could be # inferred. y.set_shape(self.compute_output_shape(input_shape)) return y
def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5): # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,) if (len(K.int_shape(y_true)) == len(K.int_shape(y_pred))): y_true = array_ops.squeeze(y_true, [-1]) return K.mean(nn.in_top_k(y_pred, math_ops.cast(y_true, 'int32'), k), axis=-1)
def standard_gru(inputs, init_h, kernel, recurrent_kernel, bias, activation, recurrent_activation, mask, time_major, go_backwards): """GRU with standard kernel implementation. This implementation can be run on all types of hardware. This implementation lifts out all the layer weights and make them function parameters. It has same number of tensor input params as the CuDNN counterpart. The RNN step logic has been simplified, eg dropout and mask is removed since CuDNN implementation does not support that. Arguments: inputs: Input tensor of GRU layer. init_h: Initial state tensor for the cell output. kernel: Weights for cell kernel. recurrent_kernel: Weights for cell recurrent kernel. bias: Weights for cell kernel bias and recurrent bias. The bias contains the combined input_bias and recurrent_bias. activation: Activation function to use for output. recurrent_activation: Activation function to use for hidden recurrent state. mask: Binary tensor of shape `(samples, timesteps)` indicating whether a given timestep should be masked. time_major: Boolean, whether the inputs are in the format of [time, batch, feature] or [batch, time, feature]. go_backwards: Boolean (default False). If True, process the input sequence backwards and return the reversed sequence. Returns: last_output: output tensor for the last timestep, which has shape [batch, units]. outputs: output tensor for all timesteps, which has shape [batch, time, units]. state_0: the cell output, which has same shape as init_h. runtime: constant string tensor which indicate real runtime hardware. This value is for testing purpose and should be used by user. """ input_shape = K.int_shape(inputs) timesteps = input_shape[0] if time_major else input_shape[1] input_bias, recurrent_bias = array_ops.unstack(bias) def step(cell_inputs, cell_states): """Step function that will be used by Keras RNN backend.""" h_tm1 = cell_states[0] # inputs projected by all gate matrices at once matrix_x = K.dot(cell_inputs, kernel) matrix_x = K.bias_add(matrix_x, input_bias) x_z, x_r, x_h = array_ops.split(matrix_x, 3, axis=1) # hidden state projected by all gate matrices at once matrix_inner = K.dot(h_tm1, recurrent_kernel) matrix_inner = K.bias_add(matrix_inner, recurrent_bias) recurrent_z, recurrent_r, recurrent_h = array_ops.split(matrix_inner, 3, axis=1) z = recurrent_activation(x_z + recurrent_z) r = recurrent_activation(x_r + recurrent_r) hh = activation(x_h + r * recurrent_h) # previous and candidate state mixed by update gate h = z * h_tm1 + (1 - z) * hh return h, [h] last_output, outputs, new_states = K.rnn( step, inputs, [init_h], constants=None, unroll=False, time_major=time_major, mask=mask, go_backwards=go_backwards, input_length=timesteps) return last_output, outputs, new_states[0], _runtime('cpu')
def call(self, inputs, mask=None, training=None, initial_state=None): # LSTM does not support constants. Ignore it during process. inputs, initial_state, _ = self._process_inputs(inputs, initial_state, None) if isinstance(mask, list): mask = mask[0] input_shape = K.int_shape(inputs) timesteps = input_shape[0] if self.time_major else input_shape[1] if not self.could_use_cudnn: # Fall back to use the normal LSTM. kwargs = {'training': training} def step(inputs, states): return self.cell.call(inputs, states, **kwargs) last_output, outputs, states = K.rnn( step, inputs, initial_state, constants=None, go_backwards=self.go_backwards, mask=mask, unroll=self.unroll, input_length=timesteps, time_major=self.time_major, zero_output_for_mask=self.zero_output_for_mask) runtime = _runtime('unknown') else: # Use the new defun approach for backend implementation swap. # Note that different implementations need to have same function # signature, eg, the tensor parameters need to have same shape and dtypes. # Since the CuDNN has an extra set of bias, those bias will be passed to # both normal and CuDNN implementations. self.reset_dropout_mask() dropout_mask = self.get_dropout_mask_for_cell(inputs, training, count=4) if dropout_mask is not None: inputs *= dropout_mask[0] cudnn_lstm_kwargs = { 'inputs': inputs, 'init_h': initial_state[0], 'init_c': initial_state[1], 'kernel': self.cell.kernel, 'recurrent_kernel': self.cell.recurrent_kernel, 'bias': self.cell.bias, 'mask': mask, 'time_major': self.time_major, 'go_backwards': self.go_backwards } normal_lstm_kwargs = cudnn_lstm_kwargs.copy() normal_lstm_kwargs.update({ 'activation': self.activation, 'recurrent_activation': self.recurrent_activation }) if context.executing_eagerly(): device_type = _get_context_device_type() can_use_gpu = ( # Either user specified GPU or unspecified but GPU is available. (device_type == _GPU_DEVICE_NAME or (device_type is None and context.num_gpus() > 0)) and (mask is None or is_sequence_right_padded(mask, self.time_major))) # Under eager context, check the device placement and prefer the # GPU implementation when GPU is available. if can_use_gpu: last_output, outputs, new_h, new_c, runtime = cudnn_lstm( **cudnn_lstm_kwargs) else: last_output, outputs, new_h, new_c, runtime = standard_lstm( **normal_lstm_kwargs) else: # Each time a `tf.function` is called, we will give it a unique # identifiable API name, so that Grappler won't get confused when it # sees multiple LSTM layers added into same graph, and it will be able # to pair up the different implementations across them. api_name = 'lstm_' + str(uuid.uuid4()) defun_standard_lstm = _generate_defun_backend( api_name, _CPU_DEVICE_NAME, standard_lstm) defun_cudnn_lstm = _generate_defun_backend( api_name, _GPU_DEVICE_NAME, cudnn_lstm) # Call the normal LSTM impl and register the CuDNN impl function. The # grappler will kick in during session execution to optimize the graph. last_output, outputs, new_h, new_c, runtime = defun_standard_lstm( **normal_lstm_kwargs) def register_cudnn_defun(): function.register(defun_cudnn_lstm, **cudnn_lstm_kwargs) # return some dummy value since the tf.cond require some return value. return 0 if mask is None: register_cudnn_defun() else: # Only when seq_right_padded=True, CuDNN kernel can support that # properly. control_flow_ops.cond(is_sequence_right_padded(mask, self.time_major), true_fn=register_cudnn_defun, false_fn=lambda: 0) states = [new_h, new_c] if self.stateful: updates = [] for i in range(len(states)): updates.append(state_ops.assign(self.states[i], states[i])) self.add_update(updates, inputs) if self.return_sequences: output = outputs else: output = last_output if self.return_state: return [output] + list(states) elif self.return_runtime: return output, runtime else: return output
def compute_mask(self, inputs, mask=None): """Computes an output mask tensor for Embedding layer. This is based on the inputs, mask, and the inner layer. If batch size is specified: Simply return the input `mask`. (An rnn-based implementation with more than one rnn inputs is required but not supported in tf.keras yet.) Otherwise we call `compute_mask` of the inner layer at each time step. If the output mask at each time step is not `None`: (E.g., inner layer is Masking or RNN) Concatenate all of them and return the concatenation. If the output mask at each time step is `None` and the input mask is not `None`:(E.g., inner layer is Dense) Reduce the input_mask to 2 dimensions and return it. Otherwise (both the output mask and the input mask are `None`): (E.g., `mask` is not used at all) Return `None`. Args: inputs: Tensor with shape [batch size, timesteps, ...] indicating the input to TimeDistributed. If static shape information is available for "batch size", `mask` is returned unmodified. mask: Either None (indicating no masking) or a Tensor indicating the input mask for TimeDistributed. The shape can be static or dynamic. Returns: Either None (no masking), or a [batch size, timesteps, ...] Tensor with an output mask for the TimeDistributed layer with the shape beyond the second dimension being the value of the input mask shape(if the computed output mask is none), an output mask with the shape beyond the first dimension being the value of the mask shape(if mask is not None) or output mask with the shape beyond the first dimension being the value of the computed output shape. """ # cases need to call the layer.compute_mask when input_mask is None: # Masking layer and Embedding layer with mask_zero input_shape = nest.map_structure( lambda x: tensor_shape.TensorShape(backend.int_shape(x)), inputs) input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False) batch_size = tf_utils.convert_shapes(input_shape) batch_size = nest.flatten(batch_size)[0] is_ragged_input = nest.map_structure( lambda x: isinstance(x, ragged_tensor.RaggedTensor), inputs) is_ragged_input = generic_utils.to_list(nest.flatten(is_ragged_input)) if batch_size and not self._always_use_reshape or any(is_ragged_input): # batch size matters, we currently do not handle mask explicitly, or if # the layer always uses reshape approach, or the input is a ragged tensor. return mask inner_mask = mask if inner_mask is not None: inner_mask_shape = self._get_shape_tuple((-1, ), mask, 2) inner_mask = backend.reshape(inner_mask, inner_mask_shape) inner_input_shape = nest.map_structure( lambda tensor: self._get_shape_tuple((-1, ), tensor, 2), inputs) inner_inputs = nest.map_structure_up_to(inputs, array_ops.reshape, inputs, inner_input_shape) output_mask = self.layer.compute_mask(inner_inputs, inner_mask) if output_mask is None: if mask is None: return None # input_mask is not None, and output_mask is None: # we should return a not-None mask output_mask = mask for _ in range(2, len(backend.int_shape(mask))): output_mask = backend.any(output_mask, axis=-1) else: # output_mask is not None. We need to reshape it input_length = tf_utils.convert_shapes(input_shape) input_length = nest.flatten(input_length)[1] if not input_length: input_length = nest.map_structure( lambda x: backend.shape(x)[1], inputs) input_length = nest.flatten(input_length)[0] output_mask_int_shape = backend.int_shape(output_mask) if output_mask_int_shape is None: # if the output_mask does not have a static shape, # its shape must be the same as mask's if mask is not None: output_mask_int_shape = backend.int_shape(mask) else: input_shape = generic_utils.to_list( nest.flatten(input_shape))[0] output_mask_int_shape = backend.compute_output_shape( input_shape)[:-1] output_mask_shape = self._get_shape_tuple( (-1, input_length), output_mask, 1, output_mask_int_shape[1:]) output_mask = backend.reshape(output_mask, output_mask_shape) return output_mask
def _adjust_block(p, ip, filters, block_id=None): """Adjusts the input `previous path` to match the shape of the `input`. Used in situations where the output number of filters needs to be changed. Arguments: p: Input tensor which needs to be modified ip: Input tensor whose shape needs to be matched filters: Number of output filters to be matched block_id: String block_id Returns: Adjusted Keras tensor """ channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 img_dim = 2 if K.image_data_format() == 'channels_first' else -2 ip_shape = K.int_shape(ip) if p is not None: p_shape = K.int_shape(p) with K.name_scope('adjust_block'): if p is None: p = ip elif p_shape[img_dim] != ip_shape[img_dim]: with K.name_scope('adjust_reduction_block_%s' % block_id): p = Activation('relu', name='adjust_relu_1_%s' % block_id)(p) p1 = AveragePooling2D( (1, 1), strides=(2, 2), padding='valid', name='adjust_avg_pool_1_%s' % block_id)( p) p1 = Conv2D( filters // 2, (1, 1), padding='same', use_bias=False, name='adjust_conv_1_%s' % block_id, kernel_initializer='he_normal')( p1) p2 = ZeroPadding2D(padding=((0, 1), (0, 1)))(p) p2 = Cropping2D(cropping=((1, 0), (1, 0)))(p2) p2 = AveragePooling2D( (1, 1), strides=(2, 2), padding='valid', name='adjust_avg_pool_2_%s' % block_id)( p2) p2 = Conv2D( filters // 2, (1, 1), padding='same', use_bias=False, name='adjust_conv_2_%s' % block_id, kernel_initializer='he_normal')( p2) p = concatenate([p1, p2], axis=channel_dim) p = BatchNormalization( axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='adjust_bn_%s' % block_id)( p) elif p_shape[channel_dim] != filters: with K.name_scope('adjust_projection_block_%s' % block_id): p = Activation('relu')(p) p = Conv2D( filters, (1, 1), strides=(1, 1), padding='same', name='adjust_conv_projection_%s' % block_id, use_bias=False, kernel_initializer='he_normal')( p) p = BatchNormalization( axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='adjust_bn_%s' % block_id)( p) return p
def __call__(self, inputs, initial_state=None, constants=None, **kwargs): """`Bidirectional.__call__` implements the same API as the wrapped `RNN`.""" inputs, initial_state, constants = _standardize_args( inputs, initial_state, constants, self._num_constants) if isinstance(inputs, list): if len(inputs) > 1: initial_state = inputs[1:] inputs = inputs[0] if initial_state is None and constants is None: return super(Bidirectional, self).__call__(inputs, **kwargs) # Applies the same workaround as in `RNN.__call__` additional_inputs = [] additional_specs = [] if initial_state is not None: # Check if `initial_state` can be splitted into half num_states = len(initial_state) if num_states % 2 > 0: raise ValueError( 'When passing `initial_state` to a Bidirectional RNN, ' 'the state should be a list containing the states of ' 'the underlying RNNs. ' 'Found: ' + str(initial_state)) kwargs['initial_state'] = initial_state additional_inputs += initial_state state_specs = [InputSpec(shape=K.int_shape(state)) for state in initial_state] self.forward_layer.state_spec = state_specs[:num_states // 2] self.backward_layer.state_spec = state_specs[num_states // 2:] additional_specs += state_specs if constants is not None: kwargs['constants'] = constants additional_inputs += constants constants_spec = [InputSpec(shape=K.int_shape(constant)) for constant in constants] self.forward_layer.constants_spec = constants_spec self.backward_layer.constants_spec = constants_spec additional_specs += constants_spec self._num_constants = len(constants) self.forward_layer._num_constants = self._num_constants self.backward_layer._num_constants = self._num_constants is_keras_tensor = K.is_keras_tensor(additional_inputs[0]) for tensor in additional_inputs: if K.is_keras_tensor(tensor) != is_keras_tensor: raise ValueError('The initial state of a Bidirectional' ' layer cannot be specified with a mix of' ' Keras tensors and non-Keras tensors' ' (a "Keras tensor" is a tensor that was' ' returned by a Keras layer, or by `Input`)') if is_keras_tensor: # Compute the full input spec, including state full_input = [inputs] + additional_inputs # The original input_spec is None since there could be a nested tensor # input. Update the input_spec to match the inputs. full_input_spec = [None for _ in range(len(nest.flatten(inputs))) ] + additional_specs # Perform the call with temporarily replaced input_spec original_input_spec = self.input_spec self.input_spec = full_input_spec output = super(Bidirectional, self).__call__(full_input, **kwargs) self.input_spec = original_input_spec return output else: return super(Bidirectional, self).__call__(inputs, **kwargs)
def compute_mask(self, inputs, mask=None): """Computes an output mask tensor for Embedding layer. This is based on the inputs, mask, and the inner layer. If batch size is specified: Simply return the input `mask`. (An rnn-based implementation with more than one rnn inputs is required but not supported in tf.keras yet.) Otherwise we call `compute_mask` of the inner layer at each time step. If the output mask at each time step is not `None`: (E.g., inner layer is Masking or RNN) Concatenate all of them and return the concatenation. If the output mask at each time step is `None` and the input mask is not `None`:(E.g., inner layer is Dense) Reduce the input_mask to 2 dimensions and return it. Otherwise (both the output mask and the input mask are `None`): (E.g., `mask` is not used at all) Return `None`. Arguments: inputs: Tensor with shape [batch size, timesteps, ...] indicating the input to TimeDistributed. If static shape information is available for "batch size", `mask` is returned unmodified. mask: Either None (indicating no masking) or a Tensor indicating the input mask for TimeDistributed. The shape can be static or dynamic. Returns: Either None (no masking), or a [batch size, timesteps, ...] Tensor with an output mask for the TimeDistributed layer with the shape beyond the second dimension being the value of the input mask shape(if the computed output mask is none), an output mask with the shape beyond the first dimension being the value of the mask shape(if mask is not None) or output mask with the shape beyond the first dimension being the value of the computed output shape. """ # cases need to call the layer.compute_mask when input_mask is None: # Masking layer and Embedding layer with mask_zero input_shape = K.int_shape(inputs) if input_shape[0]: # batch size matters, we currently do not handle mask explicitly return mask inner_mask = mask if inner_mask is not None: inner_mask_shape = self._get_shape_tuple((-1,), mask, 2) inner_mask = K.reshape(inner_mask, inner_mask_shape) input_uid = generic_utils.object_list_uid(inputs) inner_inputs = self._input_map.get(input_uid, inputs) output_mask = self.layer.compute_mask(inner_inputs, inner_mask) if output_mask is None: if mask is None: return None # input_mask is not None, and output_mask is None: # we should return a not-None mask output_mask = mask for _ in range(2, len(K.int_shape(mask))): output_mask = K.any(output_mask, axis=-1) else: # output_mask is not None. We need to reshape it input_length = input_shape[1] if not input_length: input_length = K.shape(inputs)[1] output_mask_int_shape = K.int_shape(output_mask) if output_mask_int_shape is None: # if the output_mask does not have a static shape, # its shape must be the same as mask's if mask is not None: output_mask_int_shape = K.int_shape(mask) else: output_mask_int_shape = K.compute_output_shape(input_shape)[:-1] output_mask_shape = self._get_shape_tuple( (-1, input_length), output_mask, 1, output_mask_int_shape[1:]) output_mask = K.reshape(output_mask, output_mask_shape) return output_mask
def _inverted_res_block(x, expansion, filters, kernel_size, stride, se_ratio, activation, block_id): channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 shortcut = x prefix = 'expanded_conv/' infilters = backend.int_shape(x)[channel_axis] if block_id: # Expand prefix = 'expanded_conv_{}/'.format(block_id) x = layers.Conv2D( _depth(infilters * expansion), kernel_size=1, padding='same', use_bias=False, name=prefix + 'expand')( x) x = layers.BatchNormalization( axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'expand/BatchNorm')( x) x = activation(x) if stride == 2: x = layers.ZeroPadding2D( padding=imagenet_utils.correct_pad(x, kernel_size), name=prefix + 'depthwise/pad')( x) x = layers.DepthwiseConv2D( kernel_size, strides=stride, padding='same' if stride == 1 else 'valid', use_bias=False, name=prefix + 'depthwise')( x) x = layers.BatchNormalization( axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'depthwise/BatchNorm')( x) x = activation(x) if se_ratio: x = _se_block(x, _depth(infilters * expansion), se_ratio, prefix) x = layers.Conv2D( filters, kernel_size=1, padding='same', use_bias=False, name=prefix + 'project')( x) x = layers.BatchNormalization( axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'project/BatchNorm')( x) if stride == 1 and infilters == filters: x = layers.Add(name=prefix + 'Add')([shortcut, x]) return x
def standard_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias, activation, recurrent_activation, mask, time_major, go_backwards): """LSTM with standard kernel implementation. This implementation can be run on all types for hardware. This implementation lifts out all the layer weights and make them function parameters. It has same number of tensor input params as the CuDNN counterpart. The RNN step logic has been simplified, eg dropout and mask is removed since CuDNN implementation does not support that. Note that the first half of the bias tensor should be ignored by this impl. The CuDNN impl need an extra set of input gate bias. In order to make the both function take same shape of parameter, that extra set of bias is also feed here. Args: inputs: input tensor of LSTM layer. init_h: initial state tensor for the cell output. init_c: initial state tensor for the cell hidden state. kernel: weights for cell kernel. recurrent_kernel: weights for cell recurrent kernel. bias: weights for cell kernel bias and recurrent bias. Only recurrent bias is used in this case. activation: Activation function to use for output. recurrent_activation: Activation function to use for hidden recurrent state. mask: Boolean tensor for mask out the steps within sequence. time_major: boolean, whether the inputs are in the format of [time, batch, feature] or [batch, time, feature]. go_backwards: Boolean (default False). If True, process the input sequence backwards and return the reversed sequence. Returns: last_output: output tensor for the last timestep, which has shape [batch, units]. outputs: output tensor for all timesteps, which has shape [batch, time, units]. state_0: the cell output, which has same shape as init_h. state_1: the cell hidden state, which has same shape as init_c. runtime: constant string tensor which indicate real runtime hardware. This value is for testing purpose and should be used by user. """ input_shape = K.int_shape(inputs) timesteps = input_shape[0] if time_major else input_shape[1] def step(cell_inputs, cell_states): """Step function that will be used by Keras RNN backend.""" h_tm1 = cell_states[0] # previous memory state c_tm1 = cell_states[1] # previous carry state z = K.dot(cell_inputs, kernel) z += K.dot(h_tm1, recurrent_kernel) z = K.bias_add(z, bias) z0, z1, z2, z3 = array_ops.split(z, 4, axis=1) i = recurrent_activation(z0) f = recurrent_activation(z1) c = f * c_tm1 + i * activation(z2) o = recurrent_activation(z3) h = o * activation(c) return h, [h, c] last_output, outputs, new_states = K.rnn( step, inputs, [init_h, init_c], constants=None, unroll=False, time_major=time_major, mask=mask, go_backwards=go_backwards, input_length=timesteps) return last_output, outputs, new_states[0], new_states[1], _runtime('cpu')
def _main(args): config_path = os.path.expanduser(args.config_path) weights_path = os.path.expanduser(args.weights_path) assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format( config_path) assert weights_path.endswith( '.weights'), '{} is not a .weights file'.format(weights_path) output_path = os.path.expanduser(args.output_path) assert output_path.endswith( '.h5'), 'output path {} is not a .h5 file'.format(output_path) output_root = os.path.splitext(output_path)[0] # Load weights and config. print('Loading weights.') weights_file = open(weights_path, 'rb') major, minor, revision = np.ndarray( shape=(3, ), dtype='int32', buffer=weights_file.read(12)) if (major*10+minor)>=2 and major<1000 and minor<1000: seen = np.ndarray(shape=(1,), dtype='int64', buffer=weights_file.read(8)) else: seen = np.ndarray(shape=(1,), dtype='int32', buffer=weights_file.read(4)) print('Weights Header: ', major, minor, revision, seen) print('Parsing Darknet config.') unique_config_file = unique_config_sections(config_path) cfg_parser = configparser.ConfigParser() cfg_parser.read_file(unique_config_file) print('Creating Keras model.') input_layer = Input(shape=(None, None, 3)) prev_layer = input_layer all_layers = [] weight_decay = float(cfg_parser['net_0']['decay'] ) if 'net_0' in cfg_parser.sections() else 5e-4 count = 0 out_index = [] for section in cfg_parser.sections(): print('Parsing section {}'.format(section)) if section.startswith('convolutional'): filters = int(cfg_parser[section]['filters']) size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) pad = int(cfg_parser[section]['pad']) activation = cfg_parser[section]['activation'] batch_normalize = 'batch_normalize' in cfg_parser[section] padding = 'same' if pad == 1 and stride == 1 else 'valid' # Setting weights. # Darknet serializes convolutional weights as: # [bias/beta, [gamma, mean, variance], conv_weights] prev_layer_shape = K.int_shape(prev_layer) weights_shape = (size, size, prev_layer_shape[-1], filters) darknet_w_shape = (filters, weights_shape[2], size, size) weights_size = np.product(weights_shape) print('conv2d', 'bn' if batch_normalize else ' ', activation, weights_shape) conv_bias = np.ndarray( shape=(filters, ), dtype='float32', buffer=weights_file.read(filters * 4)) count += filters if batch_normalize: bn_weights = np.ndarray( shape=(3, filters), dtype='float32', buffer=weights_file.read(filters * 12)) count += 3 * filters bn_weight_list = [ bn_weights[0], # scale gamma conv_bias, # shift beta bn_weights[1], # running mean bn_weights[2] # running var ] conv_weights = np.ndarray( shape=darknet_w_shape, dtype='float32', buffer=weights_file.read(weights_size * 4)) count += weights_size # DarkNet conv_weights are serialized Caffe-style: # (out_dim, in_dim, height, width) # We would like to set these to Tensorflow order: # (height, width, in_dim, out_dim) conv_weights = np.transpose(conv_weights, [2, 3, 1, 0]) conv_weights = [conv_weights] if batch_normalize else [ conv_weights, conv_bias ] # Handle activation. act_fn = None if activation == 'leaky': pass # Add advanced activation later. elif activation != 'linear': raise ValueError( 'Unknown activation function `{}` in section {}'.format( activation, section)) # Create Conv2D layer if stride>1: # Darknet uses left and top padding instead of 'same' mode prev_layer = ZeroPadding2D(((1,0),(1,0)))(prev_layer) conv_layer = (Conv2D( filters, (size, size), strides=(stride, stride), kernel_regularizer=l2(weight_decay), use_bias=not batch_normalize, weights=conv_weights, activation=act_fn, padding=padding))(prev_layer) if batch_normalize: conv_layer = (BatchNormalization( weights=bn_weight_list))(conv_layer) prev_layer = conv_layer if activation == 'linear': all_layers.append(prev_layer) elif activation == 'leaky': act_layer = LeakyReLU(alpha=0.1)(prev_layer) prev_layer = act_layer all_layers.append(act_layer) elif section.startswith('route'): ids = [int(i) for i in cfg_parser[section]['layers'].split(',')] layers = [all_layers[i] for i in ids] if len(layers) > 1: print('Concatenating route layers:', layers) concatenate_layer = Concatenate()(layers) all_layers.append(concatenate_layer) prev_layer = concatenate_layer else: skip_layer = layers[0] # only one layer to route all_layers.append(skip_layer) prev_layer = skip_layer elif section.startswith('maxpool'): size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) all_layers.append( MaxPooling2D( pool_size=(size, size), strides=(stride, stride), padding='same')(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('shortcut'): index = int(cfg_parser[section]['from']) activation = cfg_parser[section]['activation'] assert activation == 'linear', 'Only linear activation supported.' all_layers.append(Add()([all_layers[index], prev_layer])) prev_layer = all_layers[-1] elif section.startswith('upsample'): stride = int(cfg_parser[section]['stride']) assert stride == 2, 'Only stride=2 supported.' all_layers.append(UpSampling2D(stride)(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('yolo'): out_index.append(len(all_layers)-1) all_layers.append(None) prev_layer = all_layers[-1] elif section.startswith('net'): pass else: raise ValueError( 'Unsupported section header type: {}'.format(section)) # Create and save model. if len(out_index)==0: out_index.append(len(all_layers)-1) model = Model(inputs=input_layer, outputs=[all_layers[i] for i in out_index]) print(model.summary()) if args.weights_only: model.save_weights('{}'.format(output_path)) print('Saved Keras weights to {}'.format(output_path)) else: model.save('{}'.format(output_path)) print('Saved Keras model to {}'.format(output_path)) # Check to see if all weights have been read. remaining_weights = len(weights_file.read()) / 4 weights_file.close() print('Read {} of {} from Darknet weights.'.format(count, count + remaining_weights)) if remaining_weights > 0: print('Warning: {} unused weights'.format(remaining_weights)) if args.plot_model: plot(model, to_file='{}.png'.format(output_root), show_shapes=True) print('Saved model plot to {}.png'.format(output_root))
# 将图像转换到np数组里面 img = np.array(img) # 归一化像素 img = img / 255.0 # 将2-dim灰度数组转换成3-dimRGB数组 if (len(img.shape) == 2): img = np.repeat(img[:, :, np.newaxis], 3, axis=2) return img image_model = VGG16(include_top=True, weights='imagenet') #加载预训练VGG16模型 transfer_layer = image_model.get_layer('fc2') #将VGG16的最后一层fc2层去掉,替换成我们自己创建的传输层 image_model_transfer = Model( inputs=image_model.input, #重新创建一个模型,这个模型没有最后fc2层,但是有传输层 outputs=transfer_layer.output) img_size = K.int_shape(image_model.input)[1:3] #定义输入图像的大小 transfer_values_size = K.int_shape(transfer_layer.output)[1] #定义输出的传输值向量的大小 def print_progress(count, max_count): #打印处理进度函数 # 完成的百分比 pct_complete = count / max_count #打印处理进度 msg = "\r- Progress: {0:.1%}".format(pct_complete) sys.stdout.write(msg) sys.stdout.flush() def process_images(data_dir, filenames, batch_size=32): #定义处理图片函数
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) with ops.control_dependencies( [state_ops.assign_add(self.iterations, 1)]): t = math_ops.cast(self.iterations, K.floatx()) lr_bc = gen_math_ops.sqrt(1. - math_ops.pow(self.beta_2, t)) / ( 1. - math_ops.pow(self.beta_1, t)) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] lams = [K.zeros(1, dtype=K.dtype(p)) for p in params] conds = [K.variable(False, dtype='bool') for p in params] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats + lams + conds for p, g, m, v, vhat, lam, cond in zip(params, grads, ms, vs, vhats, lams, conds): beta_g = m_switch(cond, 1.0, 1.0 - self.beta_1) m_t = (self.beta_1 * m) + beta_g * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g) if self.amsgrad: vhat_t = math_ops.maximum(vhat, v_t) p_t_ada = lr_bc * m_t / (gen_math_ops.sqrt(vhat_t) + self.epsilon) self.updates.append(state_ops.assign(vhat, vhat_t)) else: p_t_ada = lr_bc * m_t / (gen_math_ops.sqrt(v_t) + self.epsilon) gamma_den = math_ops.reduce_sum(p_t_ada * g) gamma = math_ops.reduce_sum(gen_math_ops.square(p_t_ada)) / ( math_ops.abs(gamma_den) + self.epsilon) * (gen_math_ops.sign(gamma_den) + self.epsilon) lam_t = (self.beta_2 * lam) + (1. - self.beta_2) * gamma lam_prime = lam / (1. - math_ops.pow(self.beta_2, t)) lam_t_prime = lam_t / (1. - math_ops.pow(self.beta_2, t)) lg_err = math_ops.abs(lam_t_prime - gamma) cond_update = gen_math_ops.logical_or( gen_math_ops.logical_and( gen_math_ops.logical_and(self.iterations > 1, lg_err < 1e-5), lam_t > 0), cond)[0] lam_update = m_switch(cond_update, lam, lam_t) self.updates.append(state_ops.assign(lam, lam_update)) self.updates.append(state_ops.assign(cond, cond_update)) p_t_sgd = (1. - self.beta_1) * lam_prime * m_t self.updates.append(state_ops.assign(m, m_t)) self.updates.append(state_ops.assign(v, v_t)) new_p = m_switch(cond, p - lr * p_t_sgd, p - lr * p_t_ada) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def inception_resnet_block(x, scale, block_type, block_idx, activation='relu'): """Adds a Inception-ResNet block. This function builds 3 types of Inception-ResNet blocks mentioned in the paper, controlled by the `block_type` argument (which is the block name used in the official TF-slim implementation): - Inception-ResNet-A: `block_type='block35'` - Inception-ResNet-B: `block_type='block17'` - Inception-ResNet-C: `block_type='block8'` Arguments: x: input tensor. scale: scaling factor to scale the residuals (i.e., the output of passing `x` through an inception module) before adding them to the shortcut branch. Let `r` be the output from the residual branch, the output of this block will be `x + scale * r`. block_type: `'block35'`, `'block17'` or `'block8'`, determines the network structure in the residual branch. block_idx: an `int` used for generating layer names. The Inception-ResNet blocks are repeated many times in this network. We use `block_idx` to identify each of the repetitions. For example, the first Inception-ResNet-A block will have `block_type='block35', block_idx=0`, ane the layer names will have a common prefix `'block35_0'`. activation: activation function to use at the end of the block. When `activation=None`, no activation is applied (i.e., "linear" activation: `a(x) = x`). Returns: Output tensor for the block. Raises: ValueError: if `block_type` is not one of `'block35'`, `'block17'` or `'block8'`. """ if block_type == 'block35': branch_0 = conv2d_bn(x, 32, 1) branch_1 = conv2d_bn(x, 32, 1) branch_1 = conv2d_bn(branch_1, 32, 3) branch_2 = conv2d_bn(x, 32, 1) branch_2 = conv2d_bn(branch_2, 48, 3) branch_2 = conv2d_bn(branch_2, 64, 3) branches = [branch_0, branch_1, branch_2] elif block_type == 'block17': branch_0 = conv2d_bn(x, 192, 1) branch_1 = conv2d_bn(x, 128, 1) branch_1 = conv2d_bn(branch_1, 160, [1, 7]) branch_1 = conv2d_bn(branch_1, 192, [7, 1]) branches = [branch_0, branch_1] elif block_type == 'block8': branch_0 = conv2d_bn(x, 192, 1) branch_1 = conv2d_bn(x, 192, 1) branch_1 = conv2d_bn(branch_1, 224, [1, 3]) branch_1 = conv2d_bn(branch_1, 256, [3, 1]) branches = [branch_0, branch_1] else: raise ValueError('Unknown Inception-ResNet block type. ' 'Expects "block35", "block17" or "block8", ' 'but got: ' + str(block_type)) block_name = block_type + '_' + str(block_idx) channel_axis = 1 if K.image_data_format() == 'channels_first' else 3 mixed = Concatenate(axis=channel_axis, name=block_name + '_mixed')(branches) up = conv2d_bn( mixed, K.int_shape(x)[channel_axis], 1, activation=None, use_bias=True, name=block_name + '_conv') x = Lambda( lambda inputs, scale: inputs[0] + inputs[1] * scale, output_shape=K.int_shape(x)[1:], arguments={'scale': scale}, name=block_name)([x, up]) if activation is not None: x = Activation(activation, name=block_name + '_ac')(x) return x
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) t = math_ops.cast(self.iterations, K.floatx()) + 1 # Due to the recommendations in [2], i.e. warming momentum schedule momentum_cache_t = self.beta_1 * ( 1. - 0.5 * (math_ops.pow(K.cast_to_floatx(0.96), t * self.schedule_decay))) momentum_cache_t_1 = self.beta_1 * ( 1. - 0.5 * (math_ops.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay))) m_schedule_new = self.m_schedule * momentum_cache_t m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1 self.updates.append((self.m_schedule, m_schedule_new)) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): # the following equations given in [1] g_prime = g / (1. - m_schedule_new) m_t = self.beta_1 * m + self.beta_g * g m_t_prime = m_t / (1. - m_schedule_next) v_t = self.beta_2 * v + (1. - self.beta_2) * math_ops.square(g) if self.amsgrad: vhat_t = math_ops.maximum(vhat, v_t) self.updates.append(state_ops.assign(vhat, vhat_t)) v_t_prime = vhat_t / (1. - math_ops.pow(self.beta_2, t)) else: v_t_prime = v_t / (1. - math_ops.pow(self.beta_2, t)) m_t_bar = (self.beta_g / (1. - self.beta_1)) * ( 1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime self.updates.append(state_ops.assign(m, m_t)) self.updates.append(state_ops.assign(v, v_t)) p_t_ada = p - lr * m_t_bar / (gen_math_ops.sqrt(v_t_prime) + self.epsilon) p_t_sgd = p - self.lr_boost * lr * m_t_bar new_p = m_switch(self.switch_flag, p_t_sgd, p_t_ada) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def _adjust_block(p, ip, filters, block_id=None): """Adjusts the input `previous path` to match the shape of the `input`. Used in situations where the output number of filters needs to be changed. Arguments: p: Input tensor which needs to be modified ip: Input tensor whose shape needs to be matched filters: Number of output filters to be matched block_id: String block_id Returns: Adjusted Keras tensor """ channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 img_dim = 2 if backend.image_data_format() == 'channels_first' else -2 ip_shape = backend.int_shape(ip) if p is not None: p_shape = backend.int_shape(p) with backend.name_scope('adjust_block'): if p is None: p = ip elif p_shape[img_dim] != ip_shape[img_dim]: with backend.name_scope('adjust_reduction_block_%s' % block_id): p = layers.Activation('relu', name='adjust_relu_1_%s' % block_id)(p) p1 = layers.AveragePooling2D( (1, 1), strides=(2, 2), padding='valid', name='adjust_avg_pool_1_%s' % block_id)(p) p1 = layers.Conv2D(filters // 2, (1, 1), padding='same', use_bias=False, name='adjust_conv_1_%s' % block_id, kernel_initializer='he_normal')(p1) p2 = layers.ZeroPadding2D(padding=((0, 1), (0, 1)))(p) p2 = layers.Cropping2D(cropping=((1, 0), (1, 0)))(p2) p2 = layers.AveragePooling2D( (1, 1), strides=(2, 2), padding='valid', name='adjust_avg_pool_2_%s' % block_id)(p2) p2 = layers.Conv2D(filters // 2, (1, 1), padding='same', use_bias=False, name='adjust_conv_2_%s' % block_id, kernel_initializer='he_normal')(p2) p = layers.concatenate([p1, p2], axis=channel_dim) p = layers.BatchNormalization(axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='adjust_bn_%s' % block_id)(p) elif p_shape[channel_dim] != filters: with backend.name_scope('adjust_projection_block_%s' % block_id): p = layers.Activation('relu')(p) p = layers.Conv2D(filters, (1, 1), strides=(1, 1), padding='same', name='adjust_conv_projection_%s' % block_id, use_bias=False, kernel_initializer='he_normal')(p) p = layers.BatchNormalization(axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='adjust_bn_%s' % block_id)(p) return p
def MobileNetV3(stack_fn, last_point_ch, input_shape=None, alpha=1.0, model_type='large', minimalistic=False, include_top=True, weights='imagenet', input_tensor=None, classes=1000, pooling=None, dropout_rate=0.2, classifier_activation='softmax'): if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') # Determine proper input shape and default size. # If both input_shape and input_tensor are used, they should match if input_shape is not None and input_tensor is not None: try: is_input_t_tensor = backend.is_keras_tensor(input_tensor) except ValueError: try: is_input_t_tensor = backend.is_keras_tensor( layer_utils.get_source_inputs(input_tensor)) except ValueError: raise ValueError('input_tensor: ', input_tensor, 'is not type input_tensor') if is_input_t_tensor: if backend.image_data_format() == 'channels_first': if backend.int_shape(input_tensor)[1] != input_shape[1]: raise ValueError( 'input_shape: ', input_shape, 'and input_tensor: ', input_tensor, 'do not meet the same shape requirements') else: if backend.int_shape(input_tensor)[2] != input_shape[1]: raise ValueError( 'input_shape: ', input_shape, 'and input_tensor: ', input_tensor, 'do not meet the same shape requirements') else: raise ValueError('input_tensor specified: ', input_tensor, 'is not a keras tensor') # If input_shape is None, infer shape from input_tensor if input_shape is None and input_tensor is not None: try: backend.is_keras_tensor(input_tensor) except ValueError: raise ValueError('input_tensor: ', input_tensor, 'is type: ', type(input_tensor), 'which is not a valid type') if backend.is_keras_tensor(input_tensor): if backend.image_data_format() == 'channels_first': rows = backend.int_shape(input_tensor)[2] cols = backend.int_shape(input_tensor)[3] input_shape = (3, cols, rows) else: rows = backend.int_shape(input_tensor)[1] cols = backend.int_shape(input_tensor)[2] input_shape = (cols, rows, 3) # If input_shape is None and input_tensor is None using standart shape if input_shape is None and input_tensor is None: input_shape = (None, None, 3) if backend.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: row_axis, col_axis = (1, 2) rows = input_shape[row_axis] cols = input_shape[col_axis] if rows and cols and (rows < 32 or cols < 32): raise ValueError( 'Input size must be at least 32x32; got `input_shape=' + str(input_shape) + '`') if weights == 'imagenet': if (not minimalistic and alpha not in [0.75, 1.0] or minimalistic and alpha != 1.0): raise ValueError( 'If imagenet weights are being loaded, ' 'alpha can be one of `0.75`, `1.0` for non minimalistic' ' or `1.0` for minimalistic only.') if rows != cols or rows != 224: logging.warning('`input_shape` is undefined or non-square, ' 'or `rows` is not 224.' ' Weights for input shape (224, 224) will be' ' loaded as the default.') if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 if minimalistic: kernel = 3 activation = relu se_ratio = None else: kernel = 5 activation = hard_swish se_ratio = 0.25 x = img_input x = layers.Rescaling(scale=1. / 127.5, offset=-1.)(x) x = layers.Conv2D(16, kernel_size=3, strides=(2, 2), padding='same', use_bias=False, name='Conv')(x) x = layers.BatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv/BatchNorm')(x) x = activation(x) x = stack_fn(x, kernel, activation, se_ratio) last_conv_ch = _depth(backend.int_shape(x)[channel_axis] * 6) # if the width multiplier is greater than 1 we # increase the number of output channels if alpha > 1.0: last_point_ch = _depth(last_point_ch * alpha) x = layers.Conv2D(last_conv_ch, kernel_size=1, padding='same', use_bias=False, name='Conv_1')(x) x = layers.BatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv_1/BatchNorm')(x) x = activation(x) x = layers.GlobalAveragePooling2D(keepdims=True)(x) x = layers.Conv2D(last_point_ch, kernel_size=1, padding='same', use_bias=True, name='Conv_2')(x) x = activation(x) if include_top: if dropout_rate > 0: x = layers.Dropout(dropout_rate)(x) x = layers.Conv2D(classes, kernel_size=1, padding='same', name='Logits')(x) x = layers.Flatten()(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Activation(activation=classifier_activation, name='Predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D(name='avg_pool')(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D(name='max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = models.Model(inputs, x, name='MobilenetV3' + model_type) # Load weights. if weights == 'imagenet': model_name = '{}{}_224_{}_float'.format( model_type, '_minimalistic' if minimalistic else '', str(alpha)) if include_top: file_name = 'weights_mobilenet_v3_' + model_name + '.h5' file_hash = WEIGHTS_HASHES[model_name][0] else: file_name = 'weights_mobilenet_v3_' + model_name + '_no_top.h5' file_hash = WEIGHTS_HASHES[model_name][1] weights_path = data_utils.get_file(file_name, BASE_WEIGHT_PATH + file_name, cache_subdir='models', file_hash=file_hash) model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def compute_mask(self, inputs, mask=None): """Computes an output mask tensor for Embedding layer. This is based on the inputs, mask, and the inner layer. If batch size is specified: Simply return the input `mask`. (An rnn-based implementation with more than one rnn inputs is required but not supported in tf.keras yet.) Otherwise we call `compute_mask` of the inner layer at each time step. If the output mask at each time step is not `None`: (E.g., inner layer is Masking or RNN) Concatenate all of them and return the concatenation. If the output mask at each time step is `None` and the input mask is not `None`:(E.g., inner layer is Dense) Reduce the input_mask to 2 dimensions and return it. Otherwise (both the output mask and the input mask are `None`): (E.g., `mask` is not used at all) Return `None`. Arguments: inputs: Tensor with shape [batch size, timesteps, ...] indicating the input to TimeDistributed. If static shape information is available for "batch size", `mask` is returned unmodified. mask: Either None (indicating no masking) or a Tensor indicating the input mask for TimeDistributed. The shape can be static or dynamic. Returns: Either None (no masking), or a [batch size, timesteps, ...] Tensor with an output mask for the TimeDistributed layer with the shape beyond the second dimension being the value of the input mask shape(if the computed output mask is none), an output mask with the shape beyond the first dimension being the value of the mask shape(if mask is not None) or output mask with the shape beyond the first dimension being the value of the computed output shape. """ # cases need to call the layer.compute_mask when input_mask is None: # Masking layer and Embedding layer with mask_zero input_shape = K.int_shape(inputs) if input_shape[0]: # batch size matters, we currently do not handle mask explicitly return mask inner_mask = mask if inner_mask is not None: inner_mask_shape = self._get_shape_tuple((-1, ), mask, 2) inner_mask = K.reshape(inner_mask, inner_mask_shape) input_uid = generic_utils.object_list_uid(inputs) inner_inputs = self._input_map.get(input_uid, inputs) output_mask = self.layer.compute_mask(inner_inputs, inner_mask) if output_mask is None: if mask is None: return None # input_mask is not None, and output_mask is None: # we should return a not-None mask output_mask = mask for _ in range(2, len(K.int_shape(mask))): output_mask = K.any(output_mask, axis=-1) else: # output_mask is not None. We need to reshape it input_length = input_shape[1] if not input_length: input_length = K.shape(inputs)[1] output_mask_int_shape = K.int_shape(output_mask) if output_mask_int_shape is None: # if the output_mask does not have a static shape, # its shape must be the same as mask's if mask is not None: output_mask_int_shape = K.int_shape(mask) else: output_mask_int_shape = K.compute_output_shape( input_shape)[:-1] output_mask_shape = self._get_shape_tuple( (-1, input_length), output_mask, 1, output_mask_int_shape[1:]) output_mask = K.reshape(output_mask, output_mask_shape) return output_mask
def _create_all_weights(self, params): shapes = [backend.int_shape(p) for p in params] moments = [backend.zeros(shape) for shape in shapes] self.weights = [self.iterations] + moments return moments
def load_weights_from_hdf5_group_by_name(f, layers, skip_mismatch=False): """Implements name-based weight loading. (instead of topological weight loading). Layers that have no matching name are skipped. Arguments: f: A pointer to a HDF5 group. layers: a list of target layers. skip_mismatch: Boolean, whether to skip loading of layers where there is a mismatch in the number of weights, or a mismatch in the shape of the weights. Raises: ValueError: in case of mismatch between provided layers and weights file and skip_match=False. """ if 'keras_version' in f.attrs: original_keras_version = f.attrs['keras_version'].decode('utf8') else: original_keras_version = '1' if 'backend' in f.attrs: original_backend = f.attrs['backend'].decode('utf8') else: original_backend = None # New file format. layer_names = load_attributes_from_hdf5_group(f, 'layer_names') # Reverse index of layer name to list of layers with name. index = {} for layer in layers: if layer.name: index.setdefault(layer.name, []).append(layer) # We batch weight value assignments in a single backend call # which provides a speedup in TensorFlow. weight_value_tuples = [] for k, name in enumerate(layer_names): g = f[name] weight_names = load_attributes_from_hdf5_group(g, 'weight_names') weight_values = [ np.asarray(g[weight_name]) for weight_name in weight_names ] for layer in index.get(name, []): symbolic_weights = _legacy_weights(layer) weight_values = preprocess_weights_for_loading( layer, weight_values, original_keras_version, original_backend) if len(weight_values) != len(symbolic_weights): if skip_mismatch: logging.warning( 'Skipping loading of weights for ' 'layer {}'.format(layer.name) + ' due to mismatch ' 'in number of weights ({} vs {}).'.format( len(symbolic_weights), len(weight_values))) continue raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + '") expects ' + str(len(symbolic_weights)) + ' weight(s), but the saved weights' + ' have ' + str(len(weight_values)) + ' element(s).') # Set values. for i in range(len(weight_values)): if K.int_shape(symbolic_weights[i]) != weight_values[i].shape: if skip_mismatch: logging.warning('Skipping loading of weights for ' 'layer {}'.format(layer.name) + ' due to ' 'mismatch in shape ({} vs {}).'.format( symbolic_weights[i].shape, weight_values[i].shape)) continue raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + '"), weight ' + str(symbolic_weights[i]) + ' has shape {}'.format( K.int_shape(symbolic_weights[i])) + ', but the saved weight has shape ' + str(weight_values[i].shape) + '.') else: weight_value_tuples.append( (symbolic_weights[i], weight_values[i])) K.batch_set_value(weight_value_tuples)
def _create_all_weights(self, params): shapes = [backend.int_shape(p) for p in params] accumulators = [backend.zeros(shape) for shape in shapes] delta_accumulators = [backend.zeros(shape) for shape in shapes] self.weights = accumulators + delta_accumulators return accumulators, delta_accumulators
def load_weights_from_hdf5_group_by_name(f, layers): """Implements name-based weight loading. (instead of topological weight loading). Layers that have no matching name are skipped. Arguments: f: A pointer to a HDF5 group. layers: a list of target layers. Raises: ValueError: in case of mismatch between provided layers and weights file. """ if 'keras_version' in f.attrs: original_keras_version = f.attrs['keras_version'].decode('utf8') else: original_keras_version = '1' if 'backend' in f.attrs: original_backend = f.attrs['backend'].decode('utf8') else: original_backend = None # New file format. layer_names = load_attributes_from_hdf5_group(f, 'layer_names') # Reverse index of layer name to list of layers with name. index = {} for layer in layers: if layer.name: index.setdefault(layer.name, []).append(layer) # We batch weight value assignments in a single backend call # which provides a speedup in TensorFlow. weight_value_tuples = [] for k, name in enumerate(layer_names): g = f[name] weight_names = load_attributes_from_hdf5_group(g, 'weight_names') weight_values = [np.asarray(g[weight_name]) for weight_name in weight_names] for layer in index.get(name, []): symbolic_weights = layer.weights weight_values = preprocess_weights_for_loading( layer, weight_values, original_keras_version, original_backend) if len(weight_values) != len(symbolic_weights): raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + '") expects ' + str(len(symbolic_weights)) + ' weight(s), but the saved weights' + ' have ' + str(len(weight_values)) + ' element(s).') # Set values. for i in range(len(weight_values)): if K.int_shape(symbolic_weights[i]) != weight_values[i].shape: raise ValueError('Layer #' + str(k) +' (named "' + layer.name + '"), weight ' + str(symbolic_weights[i]) + ' has shape {}'.format(K.int_shape( symbolic_weights[i])) + ', but the saved weight has shape ' + str(weight_values[i].shape) + '.') else: weight_value_tuples.append((symbolic_weights[i], weight_values[i])) K.batch_set_value(weight_value_tuples)
def preprocess_weights_for_loading(layer, weights, original_keras_version=None, original_backend=None): """Converts layers weights from Keras 1 format to Keras 2. Arguments: layer: Layer instance. weights: List of weights values (Numpy arrays). original_keras_version: Keras version for the weights, as a string. original_backend: Keras backend the weights were trained with, as a string. Returns: A list of weights values (Numpy arrays). """ if layer.__class__.__name__ == 'Bidirectional': num_weights_per_layer = len(weights) // 2 forward_weights = preprocess_weights_for_loading( layer.forward_layer, weights[:num_weights_per_layer], original_keras_version, original_backend) backward_weights = preprocess_weights_for_loading( layer.backward_layer, weights[num_weights_per_layer:], original_keras_version, original_backend) weights = forward_weights + backward_weights if original_keras_version == '1': if layer.__class__.__name__ == 'TimeDistributed': weights = preprocess_weights_for_loading(layer.layer, weights, original_keras_version, original_backend) if layer.__class__.__name__ == 'Conv1D': shape = weights[0].shape # Handle Keras 1.1 format if shape[:2] != (layer.kernel_size[0], 1) or shape[3] != layer.filters: # Legacy shape: # (filters, input_dim, filter_length, 1) assert shape[0] == layer.filters and shape[2:] == ( layer.kernel_size[0], 1) weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) weights[0] = weights[0][:, 0, :, :] if layer.__class__.__name__ == 'Conv2D': if layer.data_format == 'channels_first': # old: (filters, stack_size, kernel_rows, kernel_cols) # new: (kernel_rows, kernel_cols, stack_size, filters) weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) if layer.__class__.__name__ == 'Conv2DTranspose': if layer.data_format == 'channels_last': # old: (kernel_rows, kernel_cols, stack_size, filters) # new: (kernel_rows, kernel_cols, filters, stack_size) weights[0] = np.transpose(weights[0], (0, 1, 3, 2)) if layer.data_format == 'channels_first': # old: (filters, stack_size, kernel_rows, kernel_cols) # new: (kernel_rows, kernel_cols, filters, stack_size) weights[0] = np.transpose(weights[0], (2, 3, 0, 1)) if layer.__class__.__name__ == 'Conv3D': if layer.data_format == 'channels_first': # old: (filters, stack_size, ...) # new: (..., stack_size, filters) weights[0] = np.transpose(weights[0], (2, 3, 4, 1, 0)) if layer.__class__.__name__ == 'GRU': if len(weights) == 9: kernel = np.concatenate([weights[0], weights[3], weights[6]], axis=-1) recurrent_kernel = np.concatenate( [weights[1], weights[4], weights[7]], axis=-1) bias = np.concatenate([weights[2], weights[5], weights[8]], axis=-1) weights = [kernel, recurrent_kernel, bias] if layer.__class__.__name__ == 'LSTM': if len(weights) == 12: # old: i, c, f, o # new: i, f, c, o kernel = np.concatenate( [weights[0], weights[6], weights[3], weights[9]], axis=-1) recurrent_kernel = np.concatenate( [weights[1], weights[7], weights[4], weights[10]], axis=-1) bias = np.concatenate( [weights[2], weights[8], weights[5], weights[11]], axis=-1) weights = [kernel, recurrent_kernel, bias] if layer.__class__.__name__ == 'ConvLSTM2D': if len(weights) == 12: kernel = np.concatenate( [weights[0], weights[6], weights[3], weights[9]], axis=-1) recurrent_kernel = np.concatenate( [weights[1], weights[7], weights[4], weights[10]], axis=-1) bias = np.concatenate( [weights[2], weights[8], weights[5], weights[11]], axis=-1) if layer.data_format == 'channels_first': # old: (filters, stack_size, kernel_rows, kernel_cols) # new: (kernel_rows, kernel_cols, stack_size, filters) kernel = np.transpose(kernel, (2, 3, 1, 0)) recurrent_kernel = np.transpose(recurrent_kernel, (2, 3, 1, 0)) weights = [kernel, recurrent_kernel, bias] if layer.__class__.__name__ in ['Model', 'Sequential']: new_weights = [] # trainable weights for sublayer in layer.layers: num_weights = len(sublayer.trainable_weights) if num_weights > 0: new_weights.extend( preprocess_weights_for_loading( layer=sublayer, weights=weights[:num_weights], original_keras_version=original_keras_version, original_backend=original_backend)) weights = weights[num_weights:] # non-trainable weights for sublayer in layer.layers: num_weights = len([ l for l in sublayer.weights if l not in sublayer.trainable_weights ]) if num_weights > 0: new_weights.extend( preprocess_weights_for_loading( layer=sublayer, weights=weights[:num_weights], original_keras_version=original_keras_version, original_backend=original_backend)) weights = weights[num_weights:] weights = new_weights conv_layers = [ 'Conv1D', 'Conv2D', 'Conv3D', 'Conv2DTranspose', 'ConvLSTM2D' ] if layer.__class__.__name__ in conv_layers: if original_backend == 'theano': weights[0] = conv_utils.convert_kernel(weights[0]) if layer.__class__.__name__ == 'ConvLSTM2D': weights[1] = conv_utils.convert_kernel(weights[1]) if K.int_shape(layer.weights[0]) != weights[0].shape: weights[0] = np.transpose(weights[0], (3, 2, 0, 1)) if layer.__class__.__name__ == 'ConvLSTM2D': weights[1] = np.transpose(weights[1], (3, 2, 0, 1)) return _convert_rnn_weights(layer, weights)
def block3(x, filters, kernel_size=3, stride=1, groups=32, conv_shortcut=True, name=None): """A residual block. Arguments: x: input tensor. filters: integer, filters of the bottleneck layer. kernel_size: default 3, kernel size of the bottleneck layer. stride: default 1, stride of the first layer. groups: default 32, group size for grouped convolution. conv_shortcut: default True, use convolution shortcut if True, otherwise identity shortcut. name: string, block label. Returns: Output tensor for the residual block. """ bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 if conv_shortcut: shortcut = layers.Conv2D((64 // groups) * filters, 1, strides=stride, use_bias=False, name=name + '_0_conv')(x) shortcut = tf.keras.layers.experimental.SyncBatchNormalization( axis=bn_axis, name=name + '_0_bn')(shortcut) else: shortcut = x x = layers.Conv2D(filters, 1, use_bias=False, name=name + '_1_conv')(x) x = tf.keras.layers.experimental.SyncBatchNormalization(axis=bn_axis, name=name + '_1_bn')(x) x = layers.Activation('relu', name=name + '_1_relu')(x) c = filters // groups x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x) x = layers.DepthwiseConv2D(kernel_size, strides=stride, depth_multiplier=c, use_bias=False, name=name + '_2_conv')(x) x_shape = backend.int_shape(x)[1:-1] x = layers.Reshape(x_shape + (groups, c, c))(x) x = layers.Lambda(lambda x: sum(x[:, :, :, :, i] for i in range(c)), name=name + '_2_reduce')(x) x = layers.Reshape(x_shape + (filters, ))(x) x = tf.keras.layers.experimental.SyncBatchNormalization(axis=bn_axis, name=name + '_2_bn')(x) x = layers.Activation('relu', name=name + '_2_relu')(x) x = layers.Conv2D((64 // groups) * filters, 1, use_bias=False, name=name + '_3_conv')(x) x = tf.keras.layers.experimental.SyncBatchNormalization(axis=bn_axis, name=name + '_3_bn')(x) x = layers.Add(name=name + '_add')([shortcut, x]) x = layers.Activation('relu', name=name + '_out')(x) return x
def call(self, inputs, mask=None, training=None, initial_state=None, constants=None): # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if isinstance(inputs, list): inputs = inputs[0] if initial_state is not None: pass elif self.stateful: initial_state = self.states else: initial_state = self.get_initial_state(inputs) if isinstance(mask, list): mask = mask[0] if len(initial_state) != len(self.states): raise ValueError('Layer has ' + str(len(self.states)) + ' states but was passed ' + str(len(initial_state)) + ' initial states.') timesteps = K.int_shape(inputs)[1] kwargs = {} if generic_utils.has_arg(self.cell.call, 'training'): kwargs['training'] = training if constants: if not generic_utils.has_arg(self.cell.call, 'constants'): raise ValueError('RNN cell does not support constants') def step(inputs, states): constants = states[-self._num_constants:] states = states[:-self._num_constants] return self.cell.call(inputs, states, constants=constants, **kwargs) else: def step(inputs, states): return self.cell.call(inputs, states, **kwargs) last_output, outputs, states = K.rnn(step, inputs, initial_state, constants=constants, go_backwards=self.go_backwards, mask=mask, input_length=timesteps) if self.stateful: updates = [] for i in range(len(states)): updates.append(K.update(self.states[i], states[i])) self.add_update(updates, inputs=True) if self.return_sequences: output = outputs else: output = last_output # Properly set learning phase if getattr(last_output, '_uses_learning_phase', False): output._uses_learning_phase = True if self.return_state: if not isinstance(states, (list, tuple)): states = [states] else: states = list(states) return [output] + states else: return output