def test_regex_filtering_for_default_collections(out_dir): hook = smd.KerasHook( out_dir, save_config=SaveConfig(save_interval=9), include_collections=[CollectionKeys.LAYERS, CollectionKeys.GRADIENTS], ) hook.get_collection(CollectionKeys.LAYERS).include("^dense") hook.get_collection(CollectionKeys.GRADIENTS).include("gradients/dense") helper_keras_fit( out_dir, hook=hook, save_config=SaveConfig(save_interval=10), steps=["train"], run_eagerly=True, ) tr = create_trial_fast_refresh(out_dir) layer_tnames = tr.tensor_names(collection=CollectionKeys.LAYERS) gradient_tnames = tr.tensor_names(collection=CollectionKeys.GRADIENTS) assert len(layer_tnames) == (4 if is_tf_2_2() else 0) assert len(gradient_tnames) == (4 if is_tf_2_2() else 0) layer_pattern = r"^(dense)(_\d+)?\/(inputs|outputs)" gradient_pattern = r"gradients/dense" for tname in layer_tnames: assert tr.tensor(tname).value(0) is not None assert re.match(pattern=layer_pattern, string=tname) is not None for tname in gradient_tnames: assert tr.tensor(tname).value(0) is not None assert re.match(pattern=gradient_pattern, string=tname) is not None
def test_include_regex(out_dir): hook = KerasHook( out_dir, save_config=SaveConfig(save_interval=9), include_collections=["custom_coll"] ) hook.get_collection("custom_coll").include("dense") train_model(out_dir, hook=hook, save_config=SaveConfig(save_interval=9), steps=["train"]) tr = create_trial_fast_refresh(out_dir) tnames = tr.tensor_names(collection="custom_coll") assert len(tnames) == 12 for tname in tnames: assert tr.tensor(tname).value(0) is not None
def test_layer_names_gradient_tape(out_dir): hook = smd.KerasHook( out_dir, save_config=SaveConfig(save_interval=9), include_collections=[CollectionKeys.LAYERS], ) helper_keras_gradtape(out_dir, hook=hook, save_config=SaveConfig(save_interval=9)) tr = create_trial_fast_refresh(out_dir) tnames = tr.tensor_names(collection=CollectionKeys.LAYERS) pattern = r"^(flatten|dense|dropout)(_\d+)?\/(inputs|outputs)" for tname in tnames: assert re.match(pattern=pattern, string=tname) is not None
def test_keras_fit_pure_eager(out_dir, tf_eager_mode): """ Test save all and save default collection in fit() pure eager mode """ hook = smd.KerasHook(out_dir=out_dir, save_all=True, save_config=SaveConfig(save_interval=3)) helper_keras_fit(trial_dir=out_dir, hook=hook, eager=tf_eager_mode, run_eagerly=True) trial = smd.create_trial(path=out_dir) if is_tf_2_2(): assert len(trial.tensor_names()) == 27 else: assert len(trial.tensor_names()) == (20 if is_tf_2_3() else 21) assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2 assert len( trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5 assert len(trial.tensor_names( collection=CollectionKeys.INPUTS)) == (1 if is_tf_2_2() else 0) assert len(trial.tensor_names( collection=CollectionKeys.OUTPUTS)) == (2 if is_tf_2_2() else 0)
def test_save_all(out_dir, tf_eager_mode, workers): save_config = SaveConfig(save_steps=[5]) strategy, saved_scalars = train_model( out_dir, include_collections=None, save_all=True, save_config=save_config, steps=["train"], eager=tf_eager_mode, include_workers=workers, ) tr = create_trial_fast_refresh(out_dir) print(tr.tensor_names()) if tf_eager_mode: if is_tf_2_2(): assert len( tr.tensor_names()) == (6 + 2 + 1 + 5 + 1 + 1 + 2 + 8 + 8 if is_tf_2_2() else 6 + 3 + 1 + 5 + 1) # weights, metrics, losses, optimizer variables, scalar, inputs, outputs, gradients, layers else: assert len( tr.tensor_names()) == (6 + 2 + 1 + 5 + 1 if is_tf_2_3() else 6 + 3 + 1 + 5 + 1) else: assert (len(tr.tensor_names()) == 6 + 6 + 5 + 3 + 1 + 3 * strategy.num_replicas_in_sync + 2 * strategy.num_replicas_in_sync) # weights, grads, optimizer_variables, metrics, losses, outputs assert len(tr.steps()) == 3 for tname in tr.tensor_names(): assert len( tr.tensor(tname).workers(0)) == (1 if workers == "one" else strategy.num_replicas_in_sync) verify_files(out_dir, save_config, saved_scalars)
def test_include_collections(out_dir, tf_eager_mode): include_collections = [ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.GRADIENTS, CollectionKeys.LOSSES, CollectionKeys.OUTPUTS, CollectionKeys.METRICS, CollectionKeys.OPTIMIZER_VARIABLES, ] save_config = SaveConfig(save_interval=3) hook = smd.KerasHook( out_dir, save_config=save_config, include_collections=include_collections, reduction_config=ReductionConfig(norms=ALLOWED_NORMS, reductions=ALLOWED_REDUCTIONS), ) helper_keras_fit(out_dir, hook=hook, steps=["train", "eval", "predict"], eager=tf_eager_mode) trial = smd.create_trial(path=out_dir) # can't save gradients in TF 2.x if tf_eager_mode: assert len(trial.tensor_names()) == 7 if is_tf_2_2() else 8 else: assert len(trial.tensor_names()) == 18 assert len(trial.tensor_names(collection=CollectionKeys.GRADIENTS)) == 4 assert len(trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5 assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1 assert ( len(trial.tensor_names(collection=CollectionKeys.METRICS)) == 2 if is_tf_2_2() and tf_eager_mode else 3 )
def test_save_all_workers(out_dir, zcc=False): # Skip if no GPUS if get_available_gpus() == 0: return strategy = train_model( out_dir, include_collections=None, save_all=True, save_config=SaveConfig(save_steps=[5]), steps=["train"], include_workers="all", ) tr = create_trial_fast_refresh(out_dir) assert len(tr.workers()) == get_available_gpus() assert len(tr.tensor_names(collection="weights")) assert (len( tr.tensor(tr.tensor_names(collection="weights")[0]).workers(5)) == strategy.num_replicas_in_sync) assert "conv2d/weights/conv2d/kernel:0" in tr.tensor_names( collection="weights") assert (len(tr.tensor("conv2d/weights/conv2d/kernel:0").workers(5)) == strategy.num_replicas_in_sync) assert len(tr.tensor_names(collection="biases")) assert "conv2d/weights/conv2d/bias:0" in tr.tensor_names( collection="biases") assert (len(tr.tensor(tr.tensor_names( collection="biases")[0]).workers(5)) == strategy.num_replicas_in_sync) assert len(tr.tensor_names(collection="gradients"))
def test_collection_reductions(out_dir, tf_eager_mode): tf.keras.backend.clear_session() hook = KerasHook( out_dir=out_dir, save_config=SaveConfig(save_interval=3), include_collections=[ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.GRADIENTS, ], ) hook.get_collection( CollectionKeys.WEIGHTS).reduction_config = ReductionConfig( norms=["l1"]) train_model(out_dir, hook=hook, steps=["train"], eager=tf_eager_mode) tr = create_trial_fast_refresh(out_dir) weight_name = tr.tensor_names(collection=CollectionKeys.WEIGHTS)[0] try: tr.tensor(weight_name).value(0) assert False except TensorUnavailableForStep: try: assert tr.tensor(weight_name).reduction_value(0, "l1") is not None except ValueError: # some tensors reduction can't be computed pass except TensorUnavailable: # sometimes we might not have tensor saved if it was only being # saved as reduction and the reduction computation failed pass
def test_get_custom_and_default_collections(): tmp_dir = TemporaryDirectory().name include_collections = [ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.GRADIENTS, CollectionKeys.LOSSES, CollectionKeys.OUTPUTS, CollectionKeys.METRICS, CollectionKeys.LOSSES, CollectionKeys.OPTIMIZER_VARIABLES, "custom_collection", ] hook = SessionHook( out_dir=tmp_dir, save_config=SaveConfig(save_interval=2), include_collections=include_collections, ) hook.get_collection(name="custom_collection").include("random-regex") custom_collections, default_collections = hook._get_custom_and_default_collections( ) assert len(custom_collections) == 1 assert (len(default_collections) == 8 + 3 ) # Addtional three collections are: all, default and sm_metrics
def test_include_regex(out_dir, tf_eager_mode, workers): hook = KerasHook( out_dir=out_dir, save_config=SaveConfig(save_interval=9), include_collections=["custom_coll"], include_workers=workers, ) hook.get_collection("custom_coll").include("dense") strategy, _ = train_model(out_dir, hook=hook, steps=["train"], eager=tf_eager_mode) tr = create_trial_fast_refresh(out_dir) tnames = tr.tensor_names(collection="custom_coll") if tf_eager_mode: assert len(tnames) == (12 if is_tf_2_2() else 4) else: assert len(tnames) == 4 + 3 * strategy.num_replicas_in_sync for tname in tnames: assert tr.tensor(tname).value(0) is not None assert len( tr.tensor(tname).workers(0)) == (1 if workers == "one" else strategy.num_replicas_in_sync)
def test_include_regex_opt_var(out_dir, tf_eager_mode, workers): include_collections = ["custom_optimizer_variables"] save_config = SaveConfig(save_interval=3) hook = KerasHook( out_dir=out_dir, save_config=save_config, include_collections=include_collections, include_workers=workers, ) hook.get_collection("custom_optimizer_variables").include("Adam") strategy, _ = train_model(out_dir, hook=hook, steps=["train"], eager=tf_eager_mode) tr = create_trial_fast_refresh(out_dir) tnames = tr.tensor_names(collection="custom_optimizer_variables") if tf_eager_mode: assert len(tnames) == 5 else: assert len(tnames) == 4 + 3 * strategy.num_replicas_in_sync for tname in tnames: assert tr.tensor(tname).value(0) is not None assert len( tr.tensor(tname).workers(0)) == (1 if workers == "one" else strategy.num_replicas_in_sync)
def test_gradtape_include_collections(out_dir): """ This test ensures that a training script written with GradientTape handles the case where hook config contains all collections mentioned through include collections """ include_collections = [ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.GRADIENTS, CollectionKeys.LOSSES, CollectionKeys.OUTPUTS, CollectionKeys.METRICS, CollectionKeys.OPTIMIZER_VARIABLES, ] save_config = SaveConfig(save_interval=3) hook = smd.KerasHook( out_dir, save_config=save_config, include_collections=include_collections, reduction_config=ReductionConfig(norms=ALLOWED_NORMS, reductions=ALLOWED_REDUCTIONS), ) helper_keras_gradtape(out_dir, hook=hook) trial = smd.create_trial(path=out_dir) # can't save gradients in TF 2.x assert len(trial.tensor_names()) == (16 if is_tf_2_2() else 15) assert len(trial.tensor_names(collection=CollectionKeys.GRADIENTS)) == 4 assert len( trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5 assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1 assert len(trial.tensor_names(collection=CollectionKeys.METRICS)) == 1
def test_gradtape_include_regex(out_dir): """ Test custom collection with regex """ hook = smd.KerasHook( out_dir, save_config=SaveConfig(save_interval=9), include_collections=["custom_coll"] ) hook.get_collection("custom_coll").include("dense") helper_keras_gradtape(out_dir, hook=hook, save_config=SaveConfig(save_interval=9)) tr = create_trial_fast_refresh(out_dir) tnames = tr.tensor_names(collection="custom_coll") assert len(tnames) == (12 if is_tf_2_2() else 8) for tname in tnames: assert tr.tensor(tname).value(0) is not None
def test_simple_include(out_dir): pre_test_clean_up() hook = SessionHook( out_dir=out_dir, save_config=SaveConfig(save_interval=2), include_collections=["default", "losses"], ) helper_test_simple_include(out_dir, hook)
def test_save_config_start_and_end(out_dir): pre_test_clean_up() hook = SessionHook( out_dir=out_dir, save_all=False, save_config=SaveConfig(save_interval=2, start_step=8, end_step=14), ) helper_save_config_start_and_end(out_dir, hook)
def test_include_regex(out_dir, tf_eager_mode): hook = smd.KerasHook( out_dir, save_config=SaveConfig(save_interval=9), include_collections=["custom_coll"] ) hook.get_collection("custom_coll").include("dense") helper_keras_fit( out_dir, hook=hook, save_config=SaveConfig(save_interval=9), steps=["train"], run_eagerly=tf_eager_mode, ) tr = create_trial_fast_refresh(out_dir) tnames = tr.tensor_names(collection="custom_coll") assert len(tnames) == (12 if is_tf_2_2() else 4) for tname in tnames: assert tr.tensor(tname).value(0) is not None
def test_simple_include_regex(out_dir): pre_test_clean_up() hook = SessionHook( out_dir=out_dir, include_regex=["loss:0"], include_collections=[], save_config=SaveConfig(save_interval=2), ) helper_test_simple_include_regex(out_dir, hook)
def test_keras_fit_shapes(out_dir): hook = smd.KerasHook( out_dir=out_dir, save_all=True, save_config=SaveConfig(save_steps=[0]), reduction_config=ReductionConfig(save_shape=True), ) helper_keras_fit(trial_dir=out_dir, hook=hook) print(create_trial_fast_refresh(out_dir).tensor_names(step=0)) verify_shapes(out_dir, 0)
def test_hook_write(out_dir): pre_test_clean_up() # set up hook hook = SessionHook( out_dir, save_all=True, include_collections=None, save_config=SaveConfig(save_interval=999) ) helper_hook_write(out_dir, hook) tr = create_trial_fast_refresh(out_dir) print(tr.tensor_names(collection="weights")) assert len(tr.tensor_names(collection="weights"))
def test_keras_gradtape_shapes(out_dir): hook = smd.KerasHook( out_dir=out_dir, save_all=True, save_config=SaveConfig(save_steps=[0]), reduction_config=ReductionConfig(save_shape=True), ) helper_keras_gradtape(trial_dir=out_dir, hook=hook) verify_shapes(out_dir, 0) verify_shapes(out_dir, 500)
def test_multi_collection_match(out_dir): pre_test_clean_up() hook = SessionHook( out_dir=out_dir, include_regex=["loss:0"], include_collections=["default", "trial"], save_config=SaveConfig(save_interval=2), ) hook.get_collection("trial").include("loss:0") helper_test_multi_collection_match(out_dir, hook)
def test_should_save_tensor_behavior_without_prepare_collections(out_dir): """Always return false if an attempt to save a tensor is made before the collections are prepared. This can happen if the fn is called before callbacks are init.""" hook = smd.KerasHook(out_dir, save_config=SaveConfig(save_interval=3), save_all=True) assert not hook.should_save_tensor_or_collection("dummy", CollectionKeys.GRADIENTS) assert not hook.should_save_tensor_or_collection("dummy", CollectionKeys.LAYERS)
def test_tf_keras_shapes(out_dir): train_model( out_dir, save_all=True, reduction_config=ReductionConfig(save_shape=True), use_tf_keras=True, save_config=SaveConfig(save_steps=[0, 10]), eager=False, steps=["train", "eval", "predict", "train"], ) verify_shapes(out_dir, 0)
def test_shapes(out_dir, tf_eager_mode): strategy, _ = train_model( out_dir, save_all=True, save_config=SaveConfig(save_steps=[0]), reduction_config=ReductionConfig(save_shape=True), steps=["train"], eager=tf_eager_mode, ) multiworker = strategy.num_replicas_in_sync > 1 verify_shapes(out_dir, 0, multiworker=multiworker)
def helper_create_hook(out_dir, collections, include_regex=None): hook = smd.KerasHook(out_dir, save_config=SaveConfig(save_interval=3), include_collections=collections) if include_regex: for collection in collections: hook.get_collection(collection).include(include_regex) hook.register_model(model) hook.on_train_begin() return hook
def test_one_device(out_dir): strategy = train_model( out_dir, include_collections=[ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.OUTPUTS, CollectionKeys.GRADIENTS, ], save_config=SaveConfig(save_interval=9), strategy=tf.distribute.OneDeviceStrategy(device="/cpu:0"), steps=["train"], ) assert os.path.isdir(os.path.join(out_dir, "events")) is False
def test_save_all_full(out_dir, hook=None): tf.reset_default_graph() if hook is None: hook = SessionHook(out_dir=out_dir, save_all=True, save_config=SaveConfig(save_interval=2)) simple_model(hook) tr = create_trial_fast_refresh(out_dir) assert len(tr.tensor_names()) > 50 print(tr.tensor_names(collection="weights")) assert len(tr.tensor_names(collection="weights")) == 1 assert len(tr.tensor_names(collection="gradients")) == 1 assert len(tr.tensor_names(collection="losses")) == 1
def test_collection_add(out_dir): train_model( out_dir, include_collections=["relu"], save_config=SaveConfig(save_interval=9), create_relu_collection=True, steps=["train"], ) tr = create_trial_fast_refresh(out_dir) relu_coll_tensor_names = tr.tensor_names(collection="relu") assert len(relu_coll_tensor_names) == 2 assert tr.tensor(relu_coll_tensor_names[0]).value(0) is not None assert tr.tensor(relu_coll_tensor_names[1]).value(0) is not None
def test_clash_with_custom_callback(out_dir): strategy = train_model( out_dir, include_collections=[ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.OUTPUTS, CollectionKeys.GRADIENTS, ], save_config=SaveConfig(save_interval=9), steps=["train"], add_callbacks=["fetch_tensor"], ) tr = create_trial_fast_refresh(out_dir) assert len(tr.tensor_names()) == 6 + 6 + strategy.num_replicas_in_sync * 1 + 3
def test_save_all(out_dir): strategy = train_model( out_dir, include_collections=None, save_all=True, save_config=SaveConfig(save_steps=[5]), steps=["train"], ) tr = create_trial_fast_refresh(out_dir) print(tr.tensor_names()) assert (len(tr.tensor_names()) == 6 + 6 + 5 + 3 + 1 + 3 * strategy.num_replicas_in_sync + 2 * strategy.num_replicas_in_sync) # weights, grads, optimizer_variables, metrics, losses, outputs assert len(tr.steps()) == 3