def vat_loss(self, eps, xi, ip): normal_outputs = [K.stop_gradient(x) for x in to_list(self.outputs)] d_list = [K.random_normal(K.shape(x)) for x in self.inputs] for _ in range(ip): new_inputs = [ x + self.normalize_vector(d) * xi for (x, d) in zip(self.inputs, d_list) ] new_outputs = to_list(self.call(new_inputs)) klds = [ K.sum(self.kld(normal, new)) for normal, new in zip(normal_outputs, new_outputs) ] kld = reduce(lambda t, x: t + x, klds, 0) d_list = [K.stop_gradient(d) for d in K.gradients(kld, d_list)] new_inputs = [ x + self.normalize_vector(d) * eps for (x, d) in zip(self.inputs, d_list) ] y_perturbations = to_list(self.call(new_inputs)) klds = [ K.mean(self.kld(normal, new)) for normal, new in zip(normal_outputs, y_perturbations) ] kld = reduce(lambda t, x: t + x, klds, 0) return kld
def test_Bidirectional_dropout(merge_mode): rnn = layers.LSTM samples = 2 dim = 5 timesteps = 3 units = 3 X = [np.random.rand(samples, timesteps, dim)] inputs = Input((timesteps, dim)) wrapped = wrappers.Bidirectional(rnn(units, dropout=0.2, recurrent_dropout=0.2), merge_mode=merge_mode) outputs = to_list(wrapped(inputs, training=True)) assert all(not getattr(x, '_uses_learning_phase') for x in outputs) inputs = Input((timesteps, dim)) wrapped = wrappers.Bidirectional(rnn(units, dropout=0.2, return_state=True), merge_mode=merge_mode) outputs = to_list(wrapped(inputs)) assert all(x._uses_learning_phase for x in outputs) model = Model(inputs, outputs) assert model.uses_learning_phase y1 = to_list(model.predict(X)) y2 = to_list(model.predict(X)) for x1, x2 in zip(y1, y2): assert_allclose(x1, x2, atol=1e-5)
def is_same_tensor(x, y): if len(to_list(x)) != len(to_list(y)): return False else: res = [] for xi, yi in zip(to_list(x), to_list(y)): res.append(xi.name == yi.name) return all(res)
def test_Bidirectional_merged_value(merge_mode): rnn = layers.LSTM samples = 2 dim = 5 timesteps = 3 units = 3 X = [np.random.rand(samples, timesteps, dim)] if merge_mode == 'sum': merge_func = lambda y, y_rev: y + y_rev elif merge_mode == 'mul': merge_func = lambda y, y_rev: y * y_rev elif merge_mode == 'ave': merge_func = lambda y, y_rev: (y + y_rev) / 2 elif merge_mode == 'concat': merge_func = lambda y, y_rev: np.concatenate((y, y_rev), axis=-1) else: merge_func = lambda y, y_rev: [y, y_rev] # basic case inputs = Input((timesteps, dim)) layer = wrappers.Bidirectional(rnn(units, return_sequences=True), merge_mode=merge_mode) f_merged = K.function([inputs], to_list(layer(inputs))) f_forward = K.function([inputs], [layer.forward_layer.call(inputs)]) f_backward = K.function([inputs], [K.reverse(layer.backward_layer.call(inputs), 1)]) y_merged = f_merged(X) y_expected = to_list(merge_func(f_forward(X)[0], f_backward(X)[0])) assert len(y_merged) == len(y_expected) for x1, x2 in zip(y_merged, y_expected): assert_allclose(x1, x2, atol=1e-5) # test return_state inputs = Input((timesteps, dim)) layer = wrappers.Bidirectional(rnn(units, return_state=True), merge_mode=merge_mode) f_merged = K.function([inputs], layer(inputs)) f_forward = K.function([inputs], layer.forward_layer.call(inputs)) f_backward = K.function([inputs], layer.backward_layer.call(inputs)) n_states = len(layer.layer.states) y_merged = f_merged(X) y_forward = f_forward(X) y_backward = f_backward(X) y_expected = to_list(merge_func(y_forward[0], y_backward[0])) assert len(y_merged) == len(y_expected) + n_states * 2 for x1, x2 in zip(y_merged, y_expected): assert_allclose(x1, x2, atol=1e-5) # test if the state of a BiRNN is the concatenation of the underlying RNNs y_merged = y_merged[-n_states * 2:] y_forward = y_forward[-n_states:] y_backward = y_backward[-n_states:] for state_birnn, state_inner in zip(y_merged, y_forward + y_backward): assert_allclose(state_birnn, state_inner, atol=1e-5)
def build(self, nodes=None, adjacency=None): """ Checks the shapes and builds the GraphWrapper with the given adjacency matrix and nodes. If nodes or adjacency matrix are None, uses respectively self._nodes or self._adjacency to build the object. Useful to automatically re-building the GraphWrapper when the nodes or adjacency setter is called. Args: - nodes: a (..., N, F) tensor, - adjacency: a (..., N, N) tensor Returns 'self' """ if nodes is None and self._nodes is not None: nodes = self._nodes elif not K.is_tensor(nodes): raise ValueError("Nodes must be a tensor.") if adjacency is None and self._adjacency is not None: adjacency = self._adjacency else: _adjacency = to_list(adjacency) for a in _adjacency: if not K.is_tensor(a): raise ValueError("Adjacency must be a tensor.") nodes_shape = K.int_shape(nodes) if isinstance(adjacency, list): adjacency_shape = [K.int_shape(a) for a in adjacency] else: adjacency_shape = K.int_shape(adjacency) # Creating a GraphShape object will handle shape checking if self._keras_shape is None: self._keras_shape = GraphShape( nodes_shape=nodes_shape, adjacency_shape=adjacency_shape) else: self._keras_shape.build( nodes_shape=nodes_shape, adjacency_shape=adjacency_shape) self._nodes = nodes self._adjacency = adjacency self._n_features = nodes_shape[-1] self._n_nodes = nodes_shape[-2] super(GraphWrapper, self)._clear() super(GraphWrapper, self)._extend( [self.nodes] + to_list(self.adjacency)) self._built = True return self
def data_generator(x, y, batch_size): x = to_list(x) y = to_list(y) max_batch_index = len(x[0]) // batch_size i = 0 while 1: x_batch = [array[i * batch_size: (i + 1) * batch_size] for array in x] x_batch = unpack_singleton(x_batch) y_batch = [array[i * batch_size: (i + 1) * batch_size] for array in y] y_batch = unpack_singleton(y_batch) yield x_batch, y_batch i += 1 i = i % max_batch_index
def data_generator(x, y, batch_size): x = to_list(x) y = to_list(y) max_batch_index = len(x[0]) // batch_size i = 0 while 1: x_batch = [array[i * batch_size:(i + 1) * batch_size] for array in x] x_batch = unpack_singleton(x_batch) y_batch = [array[i * batch_size:(i + 1) * batch_size] for array in y] y_batch = unpack_singleton(y_batch) yield x_batch, y_batch i += 1 i = i % max_batch_index
def score(self, X, y, **kwargs): X = check_array(X, accept_sparse=['csc', 'csr'], allow_nd=True) y = np.searchsorted(self.classes_, y) check_params(kwargs, Model.evaluate) if self.loss == 'categorical_crossentropy' and len(y.shape) != 2: y = to_categorical(y) if self.predict_batch_generator is None: predict_batch_generator = self.train_batch_generator else: predict_batch_generator = self.predict_batch_generator n_jobs = self.n_jobs batch_size = self.batch_size outputs = self.model_.evaluate_generator( predict_batch_generator.flow(X, y, batch_size=batch_size), n_jobs=n_jobs, use_multiprocessing=True if n_jobs > 1 else False, **kwargs) outputs = to_list(outputs) for name, output in zip(self.model_.metrics_names, outputs): if name == 'acc': return output raise ValueError('The model is not configured to compute accuracy. ' 'You should pass `metrics=["accuracy"]` to ' 'the `model.compile()` method.')
def layer_shapes(image_shape, model): """Compute layer shapes given input image shape and the model. Args image_shape: The shape of the image. model: The model to use for computing how the image shape is transformed in the pyramid. Returns A dictionary mapping layer names to image shapes. """ shape = {} input_shapes = to_list(model.input_shape) for i, input_name in enumerate(model.input_names): # shape[input_name] = (None,) + image_shape shape[input_name] = input_shapes[i] if None in input_shapes[i][1:]: if i > 0: raise Exception( "Variable image size unsupported when multiple \ inputs are active.") else: shape[input_name] = (None, ) + image_shape for layer in model.layers[1:]: nodes = layer._inbound_nodes for node in nodes: inputs = [shape[lr.name] for lr in node.inbound_layers] if not inputs: continue shape[layer.name] = layer.compute_output_shape( inputs[0] if len(inputs) == 1 else inputs) return shape
def build(self, input_shape): assert isinstance(input_shape, GraphShape) x_shape = input_shape.nodes_shape adj_shape = to_list(input_shape.adjacency_shape) assert 4 > len( x_shape) >= 2, "Expected at least than 2 dims, get %s" % ( x_shape, ) for a in adj_shape: assert 4 > len(a) >= 2, "Expected at least than 2 dims, get %s" % ( a, ) if self.basis != -1: self.basis = min(self.basis, len(adj_shape)) else: self.basis = len(adj_shape) self.kernel = [ self.add_weight(shape=(x_shape[-1], self.filters), initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint, trainable=True) for _ in range(self.basis) ] if self.use_bias: self.bias = self.add_weight(shape=(self.filters, ), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint)
def reset_state(self): num_thresholds = len(to_list(self.thresholds)) backend.batch_set_value([(v, np.zeros((num_thresholds, ))) for v in ( self.true_positives, self.false_negatives, self.true_negatives, self.false_positives, )])
def build(self, nodes_shape, adjacency_shape): self.assert_graph_shape(nodes_shape, adjacency_shape) self._nodes_shape = nodes_shape self._adjacency_shape = adjacency_shape self._clear() self._extend([self._nodes_shape] + to_list(self._adjacency_shape))
def compute_output_shape(self, input_shape): if isinstance(input_shape, list): input_shape = input_shape[0] # Check whether the input shape contains any nested shapes. It could be # (tensor_shape(1, 2), tensor_shape(3, 4)) or (1, 2, 3) which is from numpy # inputs. try: input_shape = tf.TensorShape(input_shape) except (ValueError, TypeError): # A nested tensor input input_shape = tf.nest.flatten(input_shape)[0] batch = input_shape[0] time_step = input_shape[1] if self.time_major: batch, time_step = time_step, batch if rnn_utils.is_multiple_state(self.cell.state_size): state_size = self.cell.state_size else: state_size = [self.cell.state_size] def _get_output_shape(flat_output_size): output_dim = tf.TensorShape(flat_output_size).as_list() if self.return_sequences: if self.time_major: output_shape = tf.TensorShape([time_step, batch] + output_dim) else: output_shape = tf.TensorShape([batch, time_step] + output_dim) else: output_shape = tf.TensorShape([batch] + output_dim) return output_shape if getattr(self.cell, 'output_size', None) is not None: # cell.output_size could be nested structure. output_shape = tf.nest.flatten( tf.nest.map_structure(_get_output_shape, self.cell.output_size)) output_shape = output_shape[0] if len( output_shape) == 1 else output_shape else: # Note that state_size[0] could be a tensor_shape or int. output_shape = _get_output_shape(state_size[0]) if self.return_state: def _get_state_shape(flat_state): state_shape = [batch] + tf.TensorShape(flat_state).as_list() return tf.TensorShape(state_shape) state_shape = tf.nest.map_structure(_get_state_shape, state_size) return generic_utils.to_list(output_shape) + tf.nest.flatten( state_shape) else: return output_shape
def on_batch_end(self, batch, logs=None): logs = logs or {} logs['lr'] = self.lr logs['epoch'] = self.epoch if (self.batch_no % self.val_period == 0) or (self.lr > self.lr_max): val_outs = self.model.evaluate_generator(self.val_enqueuer_gen, self.validation_steps, workers=0) val_outs = to_list(val_outs) # Same labels assumed. for l, o in zip(self.model.metrics_names, val_outs): logs['val_' + l] = o def handle_value(k): is_zero_dim_ndarray = isinstance(k, np.ndarray) and k.ndim == 0 if isinstance(k, six.string_types): return k elif isinstance(k, Iterable) and not is_zero_dim_ndarray: return '"[%s]"' % (', '.join(map(str, k))) else: return k if self.keys is None: self.keys = sorted(logs.keys()) if self.model.stop_training: # We set NA so that csv parsers do not fail for this last epoch. logs = dict([(k, logs[k]) if k in logs else (k, 'NA') for k in self.keys]) if not self.writer: class CustomDialect(csv.excel): delimiter = self.sep fieldnames = ['batch_no'] + self.keys if six.PY2: fieldnames = [unicode(x) for x in fieldnames] self.writer = csv.DictWriter(self.csv_file, fieldnames=fieldnames, dialect=CustomDialect) if self.append_header: self.writer.writeheader() row_dict = OrderedDict({'batch_no': self.batch_no}) row_dict.update( (key, handle_value(logs[key])) for key in self.keys) self.writer.writerow(row_dict) self.csv_file.flush() self.lr *= self.lr_increment self.batch_no += 1 if self.lr > self.lr_max: self.model.stop_training = True
def draw(self, outputs, num): logs = {} outputs = to_list(outputs) for l, o in zip(model.metrics_names, outputs): logs[l] = o for name, value in logs.items(): summary = tf.Summary() summary_value = summary.value.add() summary_value.simple_value = value summary_value.tag = name self.writer.add_summary(summary, num) self.writer.flush()
def call(self, inputs, states, constants, training=None): """Complete attentive cell transformation. """ attended = to_list(constants, allow_tuple=True) # NOTE: `K.rnn` will pass constants as a tuple and `_collect_previous_mask` # returns `None` if passed a tuple of tensors, hence `to_list` above! # We also make `attended` and `attended_mask` always lists for uniformity: attended_mask = to_list(_collect_previous_mask(attended)) cell_states = states[:self._num_wrapped_states] attention_states = states[self._num_wrapped_states:] if self.attend_after: call = self._call_attend_after else: call = self._call_attend_before return call(inputs=inputs, cell_states=cell_states, attended=attended, attention_states=attention_states, attended_mask=attended_mask, training=training)
def call(self, inputs): """ x: List/GraphWrapper - the adjacency matrix (shape: (n_nodes, n_nodes)) - the nodes features (shape: (n_nodes, n_features)) TODO: implement here the renormalization trick """ if isinstance(inputs, GraphWrapper): nodes = inputs.nodes adjacency = inputs.adjacency else: raise ValueError() adj_ls = to_list(adjacency) supports = [] for k in self.kernel: supports.append(K.dot(nodes, k)) features = [] for x, a in zip(supports, adj_ls): if len(K.int_shape(x)) == 3: x = K.permute_dimensions(x, (1, 0, 2)) x = K.batch_dot(a, x) x = K.reshape(x, [-1, inputs.n_nodes, self.filters]) else: x = K.dot(a, x) features.append(x) if len(adj_ls) > 1: features = [K.expand_dims(x, axis=0) for x in features] if self.use_attention: raise NotImplementedError() elif len(features) > 1: features = K.concatenate(features, axis=0) features = K.sum(features, axis=0) else: features = features[0] if self.use_bias: features = K.bias_add(features, self.bias, data_format='channels_last') if self.activation is not None: features = self.activation(features) return self.make_output_graph(adjacency=adjacency, nodes=features)
def _collect_input_shape(input_tensors): """Collects the output shape(s) of a list of Keras tensors. # Arguments input_tensors: list of input tensors (or single input tensor). # Returns List of shape tuples (or single tuple), one tuple per input. """ input_tensors = to_list(input_tensors) shapes = [] for x in input_tensors: try: shapes.append(K.int_shape(x)) except TypeError: shapes.append(None) return unpack_singleton(shapes)
def score(self, X, y, **kwargs): X = check_array(X, accept_sparse=['csc', 'csr'], allow_nd=True) y = np.searchsorted(self.classes_, y) check_params(kwargs, Model.evaluate) if self.loss == 'categorical_crossentropy' and len(y.shape) != 2: y = to_categorical(y) outputs = self.model_.evaluate(X, y, **kwargs) outputs = to_list(outputs) for name, output in zip(self.model_.metrics_names, outputs): if name == 'acc': return output raise ValueError('The model is not configured to compute accuracy. ' 'You should pass `metrics=["accuracy"]` to ' 'the `model.compile()` method.')
def compute_output_shape(self, input_shape): if self.input_length is None: return input_shape + (self.output_dim, ) else: # input_length can be tuple if input is 3D or higher in_lens = to_list(self.input_length, allow_tuple=True) if len(in_lens) != len(input_shape) - 1: raise ValueError( '"input_length" is %s, but received input has shape %s' % (str(self.input_length), str(input_shape))) else: for i, (s1, s2) in enumerate(zip(in_lens, input_shape[1:])): if s1 is not None and s2 is not None and s1 != s2: raise ValueError( '"input_length" is %s, but received input has shape %s' % (str(self.input_length), str(input_shape))) elif s1 is None: in_lens[i] = s2 return (input_shape[0], ) + tuple(in_lens) + (self.output_dim, )
def _collect_previous_mask(input_tensors): """Retrieves the output mask(s) of the previous node. # Arguments input_tensors: A tensor or list of tensors. # Returns A mask tensor or list of mask tensors. """ input_tensors = to_list(input_tensors) masks = [] for x in input_tensors: if hasattr(x, '_keras_history'): inbound_layer, node_index, tensor_index = x._keras_history node = inbound_layer._inbound_nodes[node_index] mask = node.output_masks[tensor_index] masks.append(mask) else: masks.append(None) return unpack_singleton(masks)
def call(self, inputs): if isinstance(inputs, GraphWrapper): nodes = inputs.nodes adjacency = inputs.adjacency else: raise ValueError() new_nodes = Dropout(rate=self.nodes_rate, noise_shape=self.nodes_noise_shape, seed=self.nodes_seed)(nodes) new_adj = to_list(adjacency) if self.use_adjacency_dropout: new_adj = [ Dropout(rate=self.adjacency_rate, noise_shape=self.adjacency_noise_shape, seed=self.nodes_seed)(a) for a in new_adj ] new_adj = unpack_singleton(new_adj) else: new_adj = adjacency return self.make_output_graph(adjacency=new_adj, nodes=new_nodes)
def score(self, x, y, **kwargs): """Returns the mean accuracy on the given test data and labels. # Arguments x : array-like, shape `(n_samples, n_features)` Test samples where `n_samples` is the number of samples and `n_features` is the number of features. y : array-like, shape `(n_samples,)` or `(n_samples, n_outputs)` True labels for `x`. **kwargs : dictionary arguments Legal arguments are the arguments of `Sequential.evaluate`. # Returns score : float Mean accuracy of predictions on `x` wrt. `y`. # Raises ValueError : If the underlying model isn't configured to compute accuracy. You should pass `metrics=["accuracy"]` to the `.compile()` method of the model. """ y = np.searchsorted(self.classes_, y) kwargs = self.filter_sk_params(Sequential.evaluate, kwargs) loss_name = self.model_.loss if hasattr(loss_name, '__name__'): loss_name = loss_name.__name__ if loss_name == 'categorical_crossentropy' and len(y.shape) != 2: y = to_categorical(y) outputs = self.model_.evaluate(x, y, **kwargs) outputs = to_list(outputs) for name, output in zip(self.model_.metrics_names, outputs): if (name == 'acc') or (name == 'accuracy'): return output raise ValueError('The model is not configured to compute accuracy. ' 'You should pass `metrics=["accuracy"]` to ' 'the `model.compile()` method.')
async def fit_generator(model, generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None, validation_data=None, validation_steps=None, class_weight=None, shuffle=True, initial_epoch=0): """See docstring for `Model.fit_generator`.""" epoch = initial_epoch do_validation = bool(validation_data) model._make_train_function() if do_validation: model._make_test_function() if steps_per_epoch is None: steps_per_epoch = len(generator) # Prepare display labels. out_labels = model.metrics_names callback_metrics = out_labels + ['val_' + n for n in out_labels] # prepare callbacks model.history = cbks.History() _callbacks = [ cbks.BaseLogger(stateful_metrics=model.stateful_metric_names) ] if verbose: _callbacks.append( cbks.ProgbarLogger(count_mode='steps', stateful_metrics=model.stateful_metric_names)) _callbacks += (callbacks or []) + [model.history] callbacks = cbks.CallbackList(_callbacks) # it's possible to callback a different model than self: if hasattr(model, 'callback_model') and model.callback_model: callback_model = model.callback_model else: callback_model = model callbacks.set_model(callback_model) callbacks.set_params({ 'epochs': epochs, 'steps': steps_per_epoch, 'verbose': verbose, 'do_validation': do_validation, 'metrics': callback_metrics, }) callbacks.on_train_begin() output_generator = generator.async_next callback_model.stop_training = False # Construct epoch logs. epoch_logs = {} while epoch < epochs: for m in model.stateful_metric_functions: m.reset_states() callbacks.on_epoch_begin(epoch) steps_done = 0 batch_index = 0 while steps_done < steps_per_epoch: generator_output = await output_generator() if not hasattr(generator_output, '__len__'): raise ValueError('Output of generator should be ' 'a tuple `(x, y, sample_weight)` ' 'or `(x, y)`. Found: ' + str(generator_output)) if len(generator_output) == 2: x, y = generator_output sample_weight = None elif len(generator_output) == 3: x, y, sample_weight = generator_output else: raise ValueError('Output of generator should be ' 'a tuple `(x, y, sample_weight)` ' 'or `(x, y)`. Found: ' + str(generator_output)) # build batch logs batch_logs = {} if x is None or len(x) == 0: # Handle data tensors support when no input given # step-size = 1 for data tensors batch_size = 1 elif isinstance(x, list): batch_size = x[0].shape[0] elif isinstance(x, dict): batch_size = list(x.values())[0].shape[0] else: batch_size = x.shape[0] batch_logs['batch'] = batch_index batch_logs['size'] = batch_size callbacks.on_batch_begin(batch_index, batch_logs) outs = model.train_on_batch(x, y, sample_weight=sample_weight, class_weight=class_weight) outs = to_list(outs) for l, o in zip(out_labels, outs): batch_logs[l] = o callbacks.on_batch_end(batch_index, batch_logs) batch_index += 1 steps_done += 1 # Epoch finished. if steps_done >= steps_per_epoch and do_validation: val_outs = await evaluate_generator(model, validation_data, validation_steps) val_outs = to_list(val_outs) # Same labels assumed. for l, o in zip(out_labels, val_outs): epoch_logs['val_' + l] = o if callback_model.stop_training: break generator.on_epoch_end() callbacks.on_epoch_end(epoch, epoch_logs) epoch += 1 if callback_model.stop_training: break callbacks.on_train_end() return model.history
def parse_init_thresholds(thresholds, default_threshold=0.5): if thresholds is not None: assert_thresholds_range(to_list(thresholds)) thresholds = to_list( default_threshold if thresholds is None else thresholds) return thresholds
def _clone_sequential_model(model, input_tensors=None, layer_fn=_clone_layer): """Clone a `Sequential` model instance. Model cloning is similar to calling a model on new inputs, except that it creates new layers (and thus new weights) instead of sharing the weights of the existing layers. Args: model: Instance of `Sequential`. input_tensors: optional list of input tensors to build the model upon. If not provided, placeholders will be created. layer_fn: callable to be applied on non-input layers in the model. By default it clones the layer. Another example is to preserve the layer to share the weights. This is required when we create a per-replica copy of the model with distribution strategy; we want the weights to be shared but still feed inputs separately so we create new input layers. Returns: An instance of `Sequential` reproducing the behavior of the original model, on top of new inputs tensors, using newly instantiated weights. Raises: ValueError: in case of invalid `model` argument value or `layer_fn` argument value. """ if not isinstance(model, Sequential): raise ValueError( 'Expected `model` argument ' 'to be a `Sequential` model instance, ' 'but got:', model) if not callable(layer_fn): raise ValueError('Expected `layer_fn` argument to be a callable.') layers = [] # Layers needed to compute the model's outputs. layer_map = {} # Ensure that all layers are cloned. The model's layers # property will exclude the initial InputLayer (if it exists) in the model, # resulting in a different Sequential model structure. for layer in model._flatten_layers(include_self=False, recursive=False): if isinstance(layer, InputLayer) and input_tensors is not None: # If input tensors are provided, the original model's InputLayer is # overwritten with a different InputLayer. continue cloned_layer = (_clone_layer(layer) if isinstance(layer, InputLayer) else layer_fn(layer)) layers.append(cloned_layer) layer_map[layer] = cloned_layer layers, ancillary_layers = _remove_ancillary_layers( model, layer_map, layers) if input_tensors is None: cloned_model = Sequential(layers=layers, name=model.name) elif len(generic_utils.to_list(input_tensors)) != 1: raise ValueError('To clone a `Sequential` model, we expect ' ' at most one tensor ' 'as part of `input_tensors`.') else: # Overwrite the original model's input layer. if isinstance(input_tensors, tuple): input_tensors = list(input_tensors) x = generic_utils.to_list(input_tensors)[0] if backend.is_keras_tensor(x): origin_layer = x._keras_history.layer if isinstance(origin_layer, InputLayer): cloned_model = Sequential(layers=[origin_layer] + layers, name=model.name) else: raise ValueError('Cannot clone a `Sequential` model on top ' 'of a tensor that comes from a Keras layer ' 'other than an `InputLayer`. ' 'Use the functional API instead.') else: input_tensor = Input(tensor=x, name='input_wrapper_for_' + str(x.name)) input_layer = input_tensor._keras_history.layer cloned_model = Sequential(layers=[input_layer] + layers, name=model.name) if not ancillary_layers: return cloned_model tensor_map = {} # Maps tensors from `model` to those in `cloned_model`. for depth, cloned_nodes in cloned_model._nodes_by_depth.items(): nodes = model._nodes_by_depth[depth] # This should be safe in a Sequential model. In an arbitrary network, you # need to sort using the outbound layer of the node as a key. for cloned_node, node in zip(cloned_nodes, nodes): if isinstance(cloned_node.output_tensors, list): for j, output_tensor in enumerate(cloned_node.output_tensors): tensor_map[node.output_tensors[j]] = output_tensor else: tensor_map[node.output_tensors] = cloned_node.output_tensors # Ancillary nodes have negative depth. new_nodes = _make_new_nodes( { depth: nodes for depth, nodes in model._nodes_by_depth.items() if depth < 0 }, layer_fn, layer_map, tensor_map) _insert_ancillary_layers(cloned_model, ancillary_layers, model.metrics_names, new_nodes) return cloned_model
def call(self, inputs, mask=None, training=None, initial_state=None, constants=None): # The input should be dense, padded with zeros. If a ragged input is fed # into the layer, it is padded and the row lengths are used for masking. inputs, row_lengths = backend.convert_inputs_if_ragged(inputs) is_ragged_input = (row_lengths is not None) self._validate_args_if_ragged(is_ragged_input, mask) inputs, initial_state, constants = self._process_inputs( inputs, initial_state, constants) self._maybe_reset_cell_dropout_mask(self.cell) if isinstance(self.cell, StackedRNNCells): for cell in self.cell.cells: self._maybe_reset_cell_dropout_mask(cell) if mask is not None: # Time step masks must be the same for each input. # TODO(scottzhu): Should we accept multiple different masks? mask = tf.nest.flatten(mask)[0] if tf.nest.is_nested(inputs): # In the case of nested input, use the first element for shape check. input_shape = backend.int_shape(tf.nest.flatten(inputs)[0]) else: input_shape = backend.int_shape(inputs) timesteps = input_shape[0] if self.time_major else input_shape[1] if self.unroll and timesteps is None: raise ValueError('Cannot unroll a RNN if the ' 'time dimension is undefined. \n' '- If using a Sequential model, ' 'specify the time dimension by passing ' 'an `input_shape` or `batch_input_shape` ' 'argument to your first layer. If your ' 'first layer is an Embedding, you can ' 'also use the `input_length` argument.\n' '- If using the functional API, specify ' 'the time dimension by passing a `shape` ' 'or `batch_shape` argument to your Input layer.') kwargs = {} if generic_utils.has_arg(self.cell.call, 'training'): kwargs['training'] = training # TF RNN cells expect single tensor as state instead of list wrapped tensor. is_tf_rnn_cell = getattr(self.cell, '_is_tf_rnn_cell', None) is not None # Use the __call__ function for callable objects, eg layers, so that it # will have the proper name scopes for the ops, etc. cell_call_fn = self.cell.__call__ if callable( self.cell) else self.cell.call if constants: if not generic_utils.has_arg(self.cell.call, 'constants'): raise ValueError( f'RNN cell {self.cell} does not support constants. ' f'Received: constants={constants}') def step(inputs, states): constants = states[-self._num_constants:] # pylint: disable=invalid-unary-operand-type states = states[:-self._num_constants] # pylint: disable=invalid-unary-operand-type states = states[0] if len( states) == 1 and is_tf_rnn_cell else states output, new_states = cell_call_fn(inputs, states, constants=constants, **kwargs) if not tf.nest.is_nested(new_states): new_states = [new_states] return output, new_states else: def step(inputs, states): states = states[0] if len( states) == 1 and is_tf_rnn_cell else states output, new_states = cell_call_fn(inputs, states, **kwargs) if not tf.nest.is_nested(new_states): new_states = [new_states] return output, new_states last_output, outputs, states = backend.rnn( step, inputs, initial_state, constants=constants, go_backwards=self.go_backwards, mask=mask, unroll=self.unroll, input_length=row_lengths if row_lengths is not None else timesteps, time_major=self.time_major, zero_output_for_mask=self.zero_output_for_mask) if self.stateful: updates = [ tf.compat.v1.assign(self_state, tf.cast(state, self_state.dtype)) for self_state, state in zip(tf.nest.flatten(self.states), tf.nest.flatten(states)) ] self.add_update(updates) if self.return_sequences: output = backend.maybe_convert_to_ragged( is_ragged_input, outputs, row_lengths, go_backwards=self.go_backwards) else: output = last_output if self.return_state: if not isinstance(states, (list, tuple)): states = [states] else: states = list(states) return generic_utils.to_list(output) + states else: return output
def __call__(self, inputs, initial_state=None, constants=None, **kwargs): inputs, initial_state, constants = rnn_utils.standardize_args( inputs, initial_state, constants, self._num_constants) if initial_state is None and constants is None: return super(RNN, self).__call__(inputs, **kwargs) # If any of `initial_state` or `constants` are specified and are Keras # tensors, then add them to the inputs and temporarily modify the # input_spec to include them. additional_inputs = [] additional_specs = [] if initial_state is not None: additional_inputs += initial_state self.state_spec = tf.nest.map_structure( lambda s: InputSpec(shape=backend.int_shape(s)), initial_state) additional_specs += self.state_spec if constants is not None: additional_inputs += constants self.constants_spec = [ InputSpec(shape=backend.int_shape(constant)) for constant in constants ] self._num_constants = len(constants) additional_specs += self.constants_spec # additional_inputs can be empty if initial_state or constants are provided # but empty (e.g. the cell is stateless). flat_additional_inputs = tf.nest.flatten(additional_inputs) is_keras_tensor = backend.is_keras_tensor( flat_additional_inputs[0]) if flat_additional_inputs else True for tensor in flat_additional_inputs: if backend.is_keras_tensor(tensor) != is_keras_tensor: raise ValueError( 'The initial state or constants of an RNN layer cannot be ' 'specified via a mix of Keras tensors and non-Keras tensors ' '(a "Keras tensor" is a tensor that was returned by a Keras layer ' ' or by `Input` during Functional model construction). ' f'Received: initial_state={initial_state}, constants={constants}' ) if is_keras_tensor: # Compute the full input spec, including state and constants full_input = [inputs] + additional_inputs if self.built: # Keep the input_spec since it has been populated in build() method. full_input_spec = self.input_spec + additional_specs else: # The original input_spec is None since there could be a nested tensor # input. Update the input_spec to match the inputs. full_input_spec = generic_utils.to_list( tf.nest.map_structure(lambda _: None, inputs)) + additional_specs # Perform the call with temporarily replaced input_spec self.input_spec = full_input_spec output = super(RNN, self).__call__(full_input, **kwargs) # Remove the additional_specs from input spec and keep the rest. It is # important to keep since the input spec was populated by build(), and # will be reused in the stateful=True. self.input_spec = self.input_spec[:-len(additional_specs)] return output else: if initial_state is not None: kwargs['initial_state'] = initial_state if constants is not None: kwargs['constants'] = constants return super(RNN, self).__call__(inputs, **kwargs)
def build(self, input_shape): if isinstance(input_shape, list): input_shape = input_shape[0] # The input_shape here could be a nest structure. # do the tensor_shape to shapes here. The input could be single tensor, or a # nested structure of tensors. def get_input_spec(shape): """Convert input shape to InputSpec.""" if isinstance(shape, tf.TensorShape): input_spec_shape = shape.as_list() else: input_spec_shape = list(shape) batch_index, time_step_index = (1, 0) if self.time_major else (0, 1) if not self.stateful: input_spec_shape[batch_index] = None input_spec_shape[time_step_index] = None return InputSpec(shape=tuple(input_spec_shape)) def get_step_input_shape(shape): if isinstance(shape, tf.TensorShape): shape = tuple(shape.as_list()) # remove the timestep from the input_shape return shape[1:] if self.time_major else (shape[0], ) + shape[2:] def get_state_spec(shape): state_spec_shape = tf.TensorShape(shape).as_list() # append batch dim state_spec_shape = [None] + state_spec_shape return InputSpec(shape=tuple(state_spec_shape)) # Check whether the input shape contains any nested shapes. It could be # (tensor_shape(1, 2), tensor_shape(3, 4)) or (1, 2, 3) which is from numpy # inputs. try: input_shape = tf.TensorShape(input_shape) except (ValueError, TypeError): # A nested tensor input pass if not tf.nest.is_nested(input_shape): # This indicates the there is only one input. if self.input_spec is not None: self.input_spec[0] = get_input_spec(input_shape) else: self.input_spec = [get_input_spec(input_shape)] step_input_shape = get_step_input_shape(input_shape) else: if self.input_spec is not None: self.input_spec[0] = tf.nest.map_structure( get_input_spec, input_shape) else: self.input_spec = generic_utils.to_list( tf.nest.map_structure(get_input_spec, input_shape)) step_input_shape = tf.nest.map_structure(get_step_input_shape, input_shape) # allow cell (if layer) to build before we set or validate state_spec. if isinstance(self.cell, base_layer.Layer) and not self.cell.built: with backend.name_scope(self.cell.name): self.cell.build(step_input_shape) self.cell.built = True # set or validate state_spec if rnn_utils.is_multiple_state(self.cell.state_size): state_size = list(self.cell.state_size) else: state_size = [self.cell.state_size] if self.state_spec is not None: # initial_state was passed in call, check compatibility self._validate_state_spec(state_size, self.state_spec) else: if tf.nest.is_nested(state_size): self.state_spec = tf.nest.map_structure( get_state_spec, state_size) else: self.state_spec = [ InputSpec(shape=[None] + tf.TensorShape(dim).as_list()) for dim in state_size ] # ensure the generated state_spec is correct. self._validate_state_spec(state_size, self.state_spec) if self.stateful: self.reset_states() self.built = True
def call(self, inputs, mask=None, training=None, initial_state=None, constants=None): # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if isinstance(inputs, list): inputs = inputs[0] if initial_state is not None: pass elif self.stateful: initial_state = self.states else: initial_state = self.get_initial_state(inputs) if isinstance(mask, list): mask = mask[0] if len(initial_state) != len(self.states): raise ValueError('Layer has ' + str(len(self.states)) + ' states but was passed ' + str(len(initial_state)) + ' initial states.') timesteps = K.int_shape(inputs)[1] kwargs = {} if has_arg(self.cell.call, 'training'): kwargs['training'] = training if constants: if not has_arg(self.cell.call, 'constants'): raise ValueError('RNN cell does not support constants') def step(inputs, states): constants = states[-self._num_constants:] states = states[:-self._num_constants] return self.cell.call(inputs, states, constants=constants, **kwargs) else: def step(inputs, states): return self.cell.call(inputs, states, **kwargs) last_output, outputs, states = K.rnn(step, inputs, initial_state, constants=constants, go_backwards=self.go_backwards, mask=mask, input_length=timesteps) if self.stateful: updates = [] for i in range(len(states)): updates.append((self.states[i], states[i])) self.add_update(updates, inputs) if self.return_sequences: output = outputs else: output = last_output # Properly set learning phase if getattr(last_output, '_uses_learning_phase', False): output._uses_learning_phase = True if self.return_state: states = to_list(states, allow_tuple=True) return [output] + states else: return output
def reset_states(self, states=None): if not self.stateful: raise AttributeError('Layer must be stateful.') input_shape = self.input_spec[0].shape state_shape = self.compute_output_shape(input_shape) if self.return_state: state_shape = state_shape[0] if self.return_sequences: state_shape = state_shape[:1] + state_shape[2:] if None in state_shape: raise ValueError('If a RNN is stateful, it needs to know ' 'its batch size. Specify the batch size ' 'of your input tensors: \n' '- If using a Sequential model, ' 'specify the batch size by passing ' 'a `batch_input_shape` ' 'argument to your first layer.\n' '- If using the functional API, specify ' 'the time dimension by passing a ' '`batch_shape` argument to your Input layer.\n' 'The same thing goes for the number of rows ' 'and columns.') # helper function def get_tuple_shape(nb_channels): result = list(state_shape) if self.cell.data_format == 'channels_first': result[1] = nb_channels elif self.cell.data_format == 'channels_last': result[4] = nb_channels else: raise KeyError return tuple(result) # initialize state if None if self.states[0] is None: if hasattr(self.cell.state_size, '__len__'): self.states = [ K.zeros(get_tuple_shape(dim)) for dim in self.cell.state_size ] else: self.states = [K.zeros(get_tuple_shape(self.cell.state_size))] elif states is None: if hasattr(self.cell.state_size, '__len__'): for state, dim in zip(self.states, self.cell.state_size): K.set_value(state, np.zeros(get_tuple_shape(dim))) else: K.set_value(self.states[0], np.zeros(get_tuple_shape(self.cell.state_size))) else: states = to_list(states, allow_tuple=True) if len(states) != len(self.states): raise ValueError('Layer ' + self.name + ' expects ' + str(len(self.states)) + ' states, ' 'but it received ' + str(len(states)) + ' state values. Input received: ' + str(states)) for index, (value, state) in enumerate(zip(states, self.states)): if hasattr(self.cell.state_size, '__len__'): dim = self.cell.state_size[index] else: dim = self.cell.state_size if value.shape != get_tuple_shape(dim): raise ValueError('State ' + str(index) + ' is incompatible with layer ' + self.name + ': expected shape=' + str(get_tuple_shape(dim)) + ', found shape=' + str(value.shape)) # TODO: consider batch calls to `set_value`. K.set_value(state, value)