def _make_grads_fn(model, layers=None, params=None, mode='outputs', return_names=False): """Returns gradient computation function w.r.t. layer outputs or weights. NOTE: gradients will be clipped if `clipnorm` or `clipvalue` were set. `params` can be layer weights or outputs; cannot supply along `layers`. `mode` is irrelevant if passing `params`. `return_names`: whether to return parameter names along grads_fn. """ def _validate_args(layers, params, mode): got_both = (layers is not None and params is not None) got_neither = (layers is None and params is None) if got_both or got_neither: raise ValueError("one (and only one) of `layers` or `weights` " "must be set") if mode not in ('outputs', 'weights'): raise ValueError("`mode` must be one of: 'outputs', 'weights'") if layers is not None and not isinstance(layers, list): layers = [layers] if params is not None and not isinstance(params, list): params = [params] return layers, params def _get_params(layers, mode): if mode == 'outputs': return [l.output for l in layers] weights = [] _ = [weights.extend(l.trainable_weights) for l in layers] return weights layers, params = _validate_args(layers, params, mode) if params is None: params = _get_params(layers, mode) grads = model.optimizer.get_gradients(model.total_loss, params) if TF_KERAS: inputs = [model.inputs[0], model._feed_targets[0]] else: inputs = [ model.inputs[0], model.sample_weights[0], model._feed_targets[0], K.learning_phase() ] if not return_names: return K.function(inputs=inputs, outputs=grads) else: return (K.function(inputs=inputs, outputs=grads), [p.name for p in params])
def get_rnn_weights(model, _id, layer=None, as_tensors=False, concat_gates=True): """Retrievers RNN layer weights. Arguments: model: keras.Model/tf.keras.Model. idx: int. Index of layer to fetch, via model.layers[idx]. name: str. Name of layer (can be substring) to be fetched. Returns earliest match if multiple found. layer: keras.Layer/tf.keras.Layer. Layer whose gradients to return. Overrides `idx` and `name`. as_tensors: If True, returns weight tensors instead of array values. NOTE: in Eager, both are returned. concat_gates: If True, returns kernel weights are signle concatenated matrices, instead of individual per-gate weight lists. """ names, idxs, *_ = _validate_args(_id, layer) name = names[0] if names is not None else None idx = idxs[0] if idxs is not None else None if layer is None: layer = get_layer(model, name or idx) rnn_type = _validate_rnn_type(layer, return_value=True) IS_CUDNN = 'CuDNN' in rnn_type if hasattr(layer, 'backward_layer'): l = layer forward_cell = l.forward_layer if IS_CUDNN else l.forward_layer.cell backward_cell = l.backward_layer if IS_CUDNN else l.backward_layer.cell forward_cell_weights = _get_cell_weights(forward_cell, as_tensors, concat_gates) backward_cell_weights = _get_cell_weights(backward_cell, as_tensors, concat_gates) return forward_cell_weights + backward_cell_weights else: cell = layer if IS_CUDNN else layer.cell return _get_cell_weights(cell, as_tensors, concat_gates)
def get_full_name(model, _id): """Given full or partial (substring) layer name, or layer index, or list containing either, return complete layer name(s). Arguments: model: keras.Model / tf.keras.Model. _id: str/int/(list of str/int). int -> idx; str -> name idx: int. Index of layer to fetch, via model.layers[idx]. name: str. Name of layer (full or substring) to be fetched. Returns earliest match if multiple found. list of str/int -> treat each str element as name, int as idx. Ex: ['gru', 2] gets full names of first layer w/ name substring 'gru', and of layer w/ idx 2. Returns: Full name of layer specified by `_id`. """ names, idxs, _, one_requested = _validate_args(_id) fullnames = [] if idxs is not None: fullnames = [model.layers[i].name for i in idxs] if names is None: return fullnames[0] if one_requested else fullnames for layer in model.layers: for n in names: if n in layer.name: fullnames.append(layer.name) _ = names.pop(names.index(n)) # get at most one per match break # note that above doesn't avoid duplicates, since `names` doesn't if len(fullnames) == 0: raise Exception(f"layer w/ identifier '{_id}' not found") return fullnames[0] if one_requested else fullnames
def get_layer(model, _id): """Returns layer by index or name. If multiple matches are found, returns earliest. """ names, idxs, _, one_requested = _validate_args(_id) layers = [] if idxs is not None: layers = [model.layers[i] for i in idxs] if names is None: return layers if len(layers) > 1 else layers[0] for layer in model.layers: for n in names: if (n in layer.name): layers.append(layer) _ = names.pop(names.index(n)) # get at most one per match break # note that above doesn't avoid duplicates, since `names` doesn't if len(layers) == 0: raise Exception("no layers found w/ names matching substring(s):", ', '.join(names)) return layers[0] if one_requested else layers
def get_outputs(model, _id, input_data, layer=None, learning_phase=0, as_dict=False): """Retrieves layer outputs given input data and layer info. Arguments: model: keras.Model/tf.keras.Model. _id: str/int/(list of str/int). int -> idx; str -> name idx: int. Index of layer to fetch, via model.layers[idx]. name: str. Name of layer (full or substring) to be fetched. Returns earliest match if multiple found. list of str/int -> treat each str element as name, int as idx. Ex: ['gru', 2] gets outputs of first layer with name substring 'gru', then of layer w/ idx 2 '*': wildcard -> get outputs of all layers (except input) with 'output' attribute. Overrides `layer`. input_data: np.ndarray & supported formats(1). Data w.r.t. which loss is to be computed for the gradient. Only for mode=='grads'. layer: keras.Layer/tf.keras.Layer. Layer whose outputs to return. Overrides `_id`. learning_phase: bool. 1: use model in train mode 0: use model in inference mode as_dict: bool. True: return output fullname-value pairs in a dict False: return output values as list in order fetched Returns: Layer output values or name-value pairs (see `as_dict`). (1): tf.data.Dataset, generators, .tfrecords, & other supported TensorFlow input data formats """ def _get_outs_tensors(model, names, idxs, layers): if layers is None: _id = [x for var in (names, idxs) if var for x in var] or None layers = get_layer(model, _id) if not isinstance(layers, list): layers = [layers] return [l.output for l in layers] if _id != '*': names, idxs, layers, one_requested = _validate_args(_id, layer) else: # exclude input layer & non-output layers names = [ l.name for l in model.layers[1:] if getattr(l, 'output', None) is not None ] idxs, layers = None, None one_requested = len(_id) == 1 layer_outs = _get_outs_tensors(model, names, idxs, layers) if TF_KERAS: outs_fn = K.function([model.input], layer_outs) else: outs_fn = K.function([model.input, K.learning_phase()], layer_outs) outs = outs_fn([input_data, learning_phase]) if as_dict: return { get_full_name(model, i): x for i, x in zip(names or idxs, outs) } return outs[0] if (one_requested and len(outs) == 1) else outs
def get_weights(model, _id, omit_names=None, as_tensors=False, as_dict=False): """Given full or partial (substring) weight name(s), return weight values (and corresponding names if as_list=False). Arguments: model: keras.Model / tf.keras.Model _id: str/int/tuple of int/(list of str/int/tuple of int). int/tuple of int -> idx; str -> name idx: int/tuple of int. Index of layer weights to fetch. int -> all weights of model.layer[idx] tuple of int -> e.g. (idx, wi0, wi1) -> weights indexed wi0, wi1, of model.layer[idx]. name: str. Name of layer (full or substring) to be fetched. Returns earliest match if multiple found. Can specify a weight (full or substring) in format {name/weight_name}. list of str/int/tuple of int -> treat each str element as name, int/tuple of int as idx. Ex: ['gru', 2, (3, 1, 2)] gets weights of first layer with name substring 'gru', then all weights of layer w/ idx 2, then weights w/ idxs 1 and 2 of layer w/ idx 3. '*': wildcard -> get weights of all layers with 'weights' attribute. omit_names: str/str list. List of names (can be substring) of weights to omit from fetching. as_tensors: bool. True: return weight tensors. False: return weight values. as_dict: bool. True: return weight fullname-value pairs in a dict False: return weight values as list in order fetched Returns: Layer weight values or name-value pairs (see `as_dict`). """ def _get_weights_tensors(model, _id): def _get_by_idx(model, idx): if isinstance(idx, tuple) and len(idx) == 2: layer_idx, weight_idxs = idx else: layer_idx, weight_idxs = idx, None layer = model.get_layer(index=layer_idx) if weight_idxs is None: weight_idxs = list(range(len(layer.weights))) # get all if not isinstance(weight_idxs, (tuple, list)): weight_idxs = [weight_idxs] return { w.name: w for i, w in enumerate(layer.weights) if i in weight_idxs } def _get_by_name(model, name): # weight_name == weight part of the full weight name if len(name.split('/')) == 2: layer_name, weight_name = name.split('/') else: layer_name, weight_name = name.split('/')[0], None layer_name = get_full_name(model, layer_name) layer = model.get_layer(name=layer_name) if weight_name is not None: _weights = {} for w in layer.weights: if weight_name in w.name: _weights[w.name] = w else: _weights = {w.name: w for w in layer.weights} if len(_weights) == 0: raise Exception(f"weight w/ name '{name}' not found") return _weights if isinstance(_id, str): _weights = _get_by_name(model, _id) else: _weights = _get_by_idx(model, _id) w_names = list(_weights) for w_name in w_names: if any(to_omit in w_name for to_omit in omit_names): del _weights[w_name] return _weights if _id != '*': names, idxs, *_ = _validate_args(_id) _ids = [x for var in (names, idxs) if var for x in var] or None else: # exclude input layer & non-weight layers _ids = [ l.name for l in model.layers[1:] if getattr(l, 'weights', None) not in (None, []) ] if not isinstance(omit_names, list): omit_names = [omit_names] if omit_names else [] weights = {} for _id in _ids: weights.update(_get_weights_tensors(model, _id)) if not as_tensors: weights = { name: value for name, value in zip(weights, K.batch_get_value(list(weights.values()))) } if as_dict: return weights weights = list(weights.values()) return weights[0] if (len(_ids) == 1 and len(_ids) == 1) else weights
def _validate_args_(_id, layer, mode): if mode not in ['outputs', 'weights']: raise Exception("`mode` must be one of: 'outputs', 'weights'") return _validate_args(_id, layer)