def _test_save_load(model, X, optimizer_name, optimizer): saved_model_preds = model.predict(X[0]) saved_model_weights = K.batch_get_value(model.trainable_weights) saved_optim_weights = K.batch_get_value(model.optimizer.weights) test_name = 'test__%f{}.h5'.format(np.random.random()) modelpath = os.path.join(tempfile.gettempdir(), test_name) model.save(modelpath) del model if TF_2 and not TF_EAGER and not TF_KERAS: tf.compat.v1.experimental.output_all_intermediates(True) # bug fix model = load_model(modelpath, custom_objects={optimizer_name: optimizer}) loaded_model_preds = model.predict(X[0]) loaded_model_weights = K.batch_get_value(model.trainable_weights) loaded_optim_weights = K.batch_get_value(model.optimizer.weights) assert np.allclose(saved_model_preds, loaded_model_preds, rtol=0, atol=1e-8) for smw, lmw in zip(saved_model_weights, loaded_model_weights): assert np.allclose(smw, lmw, rtol=0, atol=1e-8) for sow, low in zip(saved_optim_weights, loaded_optim_weights): assert np.allclose(sow, low, rtol=0, atol=1e-8)
def _test_outputs_gradients(model): x, y, _ = make_data(K.int_shape(model.input), model.layers[2].units) name = model.layers[1].name grads_all = get_gradients(model, name, x, y, mode='outputs') grads_last = get_gradients(model, 2, x, y, mode='outputs') kwargs1 = dict(n_rows=None, show_xy_ticks=[0, 0], show_borders=True, max_timesteps=50, title_mode='grads') kwargs2 = dict(n_rows=2, show_xy_ticks=[1, 1], show_borders=False, max_timesteps=None) features_1D(grads_all[0], **kwargs1) features_1D(grads_all[:1], **kwargs1) features_1D(grads_all, **kwargs2) features_2D(grads_all[0], norm=(-.01, .01), show_colorbar=True, **kwargs1) features_2D(grads_all, norm=None, reflect_half=True, **kwargs2) features_0D(grads_last, marker='o', color=None, title_mode='grads') features_0D(grads_last, marker='x', color='blue', ylims=(-.1, .1)) features_hist(grads_all, bins=100, xlims=(-.01, .01), title="Outs hists") features_hist(grads_all, bins=100, n_rows=4) print('\n') # improve separation
def _test_weights_gradients(model): x, y, _ = make_data(K.int_shape(model.input), model.layers[2].units) name = model.layers[1].name with tempdir() as dirpath: kws = dict(input_data=x, labels=y, mode='grads') if hasattr(model.layers[1], 'backward_layer'): kws['savepath'] = dirpath rnn_histogram(model, name, bins=100, **kws) rnn_heatmap(model, name, **kws)
def _validate_save_load(tg, C): def _get_load_path(tg, logdir): for postfix in ('weights', 'model', 'model_noopt'): postfix += '.h5' path = [str(p) for p in Path(logdir).iterdir() if p.name.endswith(postfix)] if path: return path[0] raise Exception(f"no model save file found in {logdir}") # get behavior before saving, to ensure no changes presave-to-postload data = np.random.randn(*tg.model.input_shape) Wm_save = tg.model.get_weights() Wo_save = K.batch_get_value(tg.model.optimizer.weights) preds_save = tg.model.predict(data, batch_size=len(data)) tg.checkpoint() logdir = tg.logdir tg.destroy(confirm=True) C['traingen']['logdir'] = logdir path = _get_load_path(tg, logdir) if path.endswith('weights.h5'): model = make_classifier(**C['model']) model.load_weights(path) else: model = load_model(path) tg = init_session(C, model=model) tg.load() Wm_load = tg.model.get_weights() Wo_load = K.batch_get_value(tg.model.optimizer.weights) preds_load = tg.model.predict(data, batch_size=len(data)) for s, l in zip(Wm_save, Wm_load): assert np.allclose(s, l), "max absdiff: %s" % np.max(np.abs(s - l)) for s, l in zip(Wo_save, Wo_load): assert np.allclose(s, l), "max absdiff: %s" % np.max(np.abs(s - l)) assert np.allclose(preds_save, preds_load), ( "max absdiff: %s" % np.max(np.abs(preds_save - preds_load)))
def test_updates(): """Ensure weight updates are applied with same actual learning rate (after applying eta_t) for every weight - and that update with eta_t=0 does not change weights """ def _make_model(opt, batch_shape): ipt = Input(batch_shape=batch_shape) x = Dense(batch_shape[-1])(ipt) out = Dense(batch_shape[-1])(x) model = Model(ipt, out) model.compile(opt, 'mse') return model batch_shape = (16, 10, 8) x = y = np.random.randn(*batch_shape) for Opt in (AdamW, NadamW, SGDW): # rerun several times to stress-test # nondeterministic device order of operations for j in range(5): opt = Opt(lr=1e-2, use_cosine_annealing=True, total_iterations=25) model = _make_model(opt, batch_shape) K.set_value(opt.eta_t, 0) # TF cannot guarantee that weights are updated before eta_t is; # this ensures t_cur forces eta_t to 0 regardless of update order K.set_value(opt.t_cur, opt.total_iterations - 2) W_pre = model.get_weights() model.train_on_batch(x, y) W_post = model.get_weights() for i, (w_pre, w_post) in enumerate(zip(W_pre, W_post)): absdiff = np.sum(np.abs(w_post - w_pre)) assert absdiff < 1e-8, ( "absdiff = {:.4e} for weight idx = {}, {} optimizer". format(absdiff, i, Opt.__name__)) print("Nondeterministic-op stress test iter %s passed" % (j + 1)) cprint("\n<< %s UPDATE TEST PASSED >>\n" % Opt.__name__, 'green') cprint("\n<< ALL UPDATES TESTS PASSED >>\n", 'green')
def _weighted_loss(y_true, y_pred, sample_weight): def _standardize(losses, sample_weight): if isinstance(sample_weight, np.ndarray): if (sample_weight.shape[-1] != y_true.shape[-1]) and ( sample_weight.ndim < y_true.ndim): sample_weight = np.expand_dims(sample_weight, -1) if losses.ndim < sample_weight.ndim: losses = np.expand_dims(losses, -1) return losses, sample_weight yt, yp = _maybe_cast_to_tensor(y_true, y_pred) losses = K.eval(getattr(keras_losses, name)(yt, yp)) # negative is standard in TF2, but was positive in TF1 if name == 'cosine_similarity' and (not TF_2 and TF_KERAS): losses = -losses losses, sample_weight = _standardize(losses, sample_weight) return np.mean(losses * sample_weight)
def _test_outputs(model): x, *_ = make_data(K.int_shape(model.input), model.layers[2].units) outs = get_outputs(model, 1, x) features_1D(outs[:1], show_y_zero=True) features_1D(outs[0]) features_2D(outs)
def test_misc(): # test miscellaneous functionalities units = 6 batch_shape = (8, 100, 2 * units) reset_seeds(reset_graph_with_backend=K) model = make_model(GRU, batch_shape, activation='relu', recurrent_dropout=0.3, IMPORTS=IMPORTS) x, y, sw = make_data(batch_shape, units) model.train_on_batch(x, y, sw) weights_norm(model, 'gru', omit_names='bias', verbose=1) weights_norm(model, ['gru', 1, (1, 1)], norm_fn=np.abs) stats = weights_norm(model, 'gru') weights_norm(model, 'gru', _dict=stats) grads = get_gradients(model, 1, x, y) get_gradients(model, 1, x, y, as_dict=True) get_gradients(model, ['gru', 1], x, y) get_outputs(model, ['gru', 1], x) features_1D(grads, subplot_samples=True, tight=True, borderwidth=2, share_xy=False) with tempdir() as dirpath: features_0D(grads[0], savepath=os.path.join(dirpath, 'img.png')) with tempdir() as dirpath: features_1D(grads[0], subplot_samples=True, annotations=[1, 'pi'], savepath=os.path.join(dirpath, 'img.png')) features_2D(grads.T, n_rows=1.5, tight=True, borderwidth=2) with tempdir() as dirpath: features_2D(grads.T[:, :, 0], norm='auto', savepath=os.path.join(dirpath, 'img.png')) with tempdir() as dirpath: features_hist(grads, show_borders=False, borderwidth=1, annotations=[0], show_xy_ticks=[0, 0], share_xy=(1, 1), title="grads", savepath=os.path.join(dirpath, 'img.png')) with tempdir() as dirpath: features_hist_v2(list(grads[:, :4, :3]), colnames=list('abcd'), show_borders=False, xlims=(-.01, .01), ylim=100, borderwidth=1, show_xy_ticks=[0, 0], side_annot='row', share_xy=True, title="Grads", savepath=os.path.join(dirpath, 'img.png')) features_hist(grads, center_zero=True, xlims=(-1, 1), share_xy=0) features_hist_v2(list(grads[:, :4, :3]), center_zero=True, xlims=(-1, 1), share_xy=(False, False)) with tempdir() as dirpath: rnn_histogram(model, 1, show_xy_ticks=[0, 0], equate_axes=2, savepath=os.path.join(dirpath, 'img.png')) rnn_histogram(model, 1, equate_axes=False, configs={ 'tight': dict(left=0, right=1), 'plot': dict(color='red'), 'title': dict(fontsize=14), }) rnn_heatmap(model, 1, cmap=None, normalize=True, show_borders=False) rnn_heatmap(model, 1, cmap=None, norm='auto', absolute_value=True) rnn_heatmap(model, 1, norm=None) with tempdir() as dirpath: rnn_heatmap(model, 1, norm=(-.004, .004), savepath=os.path.join(dirpath, 'img.png')) hist_clipped(grads, peaks_to_clip=2) _, ax = plt.subplots(1, 1) hist_clipped(grads, peaks_to_clip=2, ax=ax, annot_kw=dict(fontsize=15)) get_full_name(model, 'gru') get_full_name(model, 1) pass_on_error(get_full_name, model, 'croc') get_weights(model, 'gru', as_dict=False) get_weights(model, 'gru', as_dict=True) get_weights(model, 'gru/bias') get_weights(model, ['gru', 1, (1, 1)]) pass_on_error(get_weights, model, 'gru/goo') get_weights(model, '*') get_gradients(model, '*', x, y) get_outputs(model, '*', x) from see_rnn.utils import _filter_duplicates_by_keys keys, data = _filter_duplicates_by_keys(list('abbc'), [1, 2, 3, 4]) assert keys == ['a', 'b', 'c'] assert data == [1, 2, 4] keys, data = _filter_duplicates_by_keys(list('abbc'), [1, 2, 3, 4], [5, 6, 7, 8]) assert keys == ['a', 'b', 'c'] assert data[0] == [1, 2, 4] and data[1] == [5, 6, 8] from see_rnn.inspect_gen import get_layer, detect_nans get_layer(model, 'gru') get_rnn_weights(model, 1, concat_gates=False, as_tensors=True) rnn_heatmap(model, 1, input_data=x, labels=y, mode='weights') _test_prefetched_data(model) # test NaN/Inf detection nan_txt = detect_nans(np.array([1] * 9999 + [np.nan])).replace('\n', ' ') print(nan_txt) # case: print as quantity data = np.array([np.nan, np.inf, -np.inf, 0]) print(detect_nans(data, include_inf=True)) print(detect_nans(data, include_inf=False)) data = np.array([np.inf, 0]) print(detect_nans(data, include_inf=True)) detect_nans(np.array([0])) K.set_value(model.optimizer.lr, 1e12) train_model(model, iterations=10) rnn_histogram(model, 1) rnn_heatmap(model, 1) del model reset_seeds(reset_graph_with_backend=K) # test SimpleRNN & other _model = make_model(SimpleRNN, batch_shape, units=128, use_bias=False, IMPORTS=IMPORTS) train_model(_model, iterations=1) # TF2-Keras-Graph bug workaround rnn_histogram(_model, 1) # test _pretty_hist K.set_value(_model.optimizer.lr, 1e50) # force NaNs train_model(_model, iterations=20) rnn_heatmap(_model, 1) data = get_rnn_weights(_model, 1) rnn_heatmap(_model, 1, input_data=x, labels=y, data=data) os.environ["TF_KERAS"] = '0' get_rnn_weights(_model, 1, concat_gates=False) del _model assert True cprint("\n<< MISC TESTS PASSED >>\n", 'green')
def mLe(y_true, y_pred): L = 1.5 # configurable return K.mean(K.pow(K.abs(y_true - y_pred), L), axis=-1)
def _fn(y_true, y_pred): yt, yp = _maybe_cast_to_tensor(y_true, y_pred) # sample_weight makes no sense for keras `metrics` return K.eval(getattr(keras_metrics, name)(yt, yp))
def _maybe_cast_to_tensor(y_true, y_pred): if name not in ('hinge', 'squared_hinge'): return K.variable(y_true), K.variable(y_pred) return y_true, y_pred