def test_collection_reductions(out_dir, tf_eager_mode): tf.keras.backend.clear_session() hook = KerasHook( out_dir=out_dir, save_config=SaveConfig(save_interval=3), include_collections=[ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.GRADIENTS, ], ) hook.get_collection( CollectionKeys.WEIGHTS).reduction_config = ReductionConfig( norms=["l1"]) train_model(out_dir, hook=hook, steps=["train"], eager=tf_eager_mode) tr = create_trial_fast_refresh(out_dir) weight_name = tr.tensor_names(collection=CollectionKeys.WEIGHTS)[0] try: tr.tensor(weight_name).value(0) assert False except TensorUnavailableForStep: try: assert tr.tensor(weight_name).reduction_value(0, "l1") is not None except ValueError: # some tensors reduction can't be computed pass except TensorUnavailable: # sometimes we might not have tensor saved if it was only being # saved as reduction and the reduction computation failed pass
def test_save_all_workers(out_dir, zcc=False): # Skip if no GPUS if get_available_gpus() == 0: return strategy = train_model( out_dir, include_collections=None, save_all=True, save_config=SaveConfig(save_steps=[5]), steps=["train"], include_workers="all", ) tr = create_trial_fast_refresh(out_dir) assert len(tr.workers()) == get_available_gpus() assert len(tr.tensor_names(collection="weights")) assert (len( tr.tensor(tr.tensor_names(collection="weights")[0]).workers(5)) == strategy.num_replicas_in_sync) assert "conv2d/weights/conv2d/kernel:0" in tr.tensor_names( collection="weights") assert (len(tr.tensor("conv2d/weights/conv2d/kernel:0").workers(5)) == strategy.num_replicas_in_sync) assert len(tr.tensor_names(collection="biases")) assert "conv2d/weights/conv2d/bias:0" in tr.tensor_names( collection="biases") assert (len(tr.tensor(tr.tensor_names( collection="biases")[0]).workers(5)) == strategy.num_replicas_in_sync) assert len(tr.tensor_names(collection="gradients"))
def test_include_regex_opt_var(out_dir, tf_eager_mode, workers): include_collections = ["custom_optimizer_variables"] save_config = SaveConfig(save_interval=3) hook = KerasHook( out_dir=out_dir, save_config=save_config, include_collections=include_collections, include_workers=workers, ) hook.get_collection("custom_optimizer_variables").include("Adam") strategy, _ = train_model(out_dir, hook=hook, steps=["train"], eager=tf_eager_mode) tr = create_trial_fast_refresh(out_dir) tnames = tr.tensor_names(collection="custom_optimizer_variables") if tf_eager_mode: assert len(tnames) == 5 else: assert len(tnames) == 4 + 3 * strategy.num_replicas_in_sync for tname in tnames: assert tr.tensor(tname).value(0) is not None assert len( tr.tensor(tname).workers(0)) == (1 if workers == "one" else strategy.num_replicas_in_sync)
def test_include_regex(out_dir, tf_eager_mode, workers): hook = KerasHook( out_dir=out_dir, save_config=SaveConfig(save_interval=9), include_collections=["custom_coll"], include_workers=workers, ) hook.get_collection("custom_coll").include("dense") strategy, _ = train_model(out_dir, hook=hook, steps=["train"], eager=tf_eager_mode) tr = create_trial_fast_refresh(out_dir) tnames = tr.tensor_names(collection="custom_coll") if tf_eager_mode: assert len(tnames) == (12 if is_tf_2_2() else 4) else: assert len(tnames) == 4 + 3 * strategy.num_replicas_in_sync for tname in tnames: assert tr.tensor(tname).value(0) is not None assert len( tr.tensor(tname).workers(0)) == (1 if workers == "one" else strategy.num_replicas_in_sync)
def verify_shapes(out_dir, step_num, multiworker=False): trial = create_trial_fast_refresh(out_dir) for tname in trial.tensor_names(step=step_num): tensor = trial.tensor(tname) if multiworker is False: assert isinstance(tensor.shape(step_num), tuple), (tname, tensor.shape(step_num)) try: if not is_scalar(tensor.value(step_num)): # test did not save value except scalars which dont use reduction config # so it should raise the below exception assert False except TensorUnavailableForStep: pass else: workers = tensor.workers(step_num) assert len(workers) > 1 for w in workers: try: if not is_scalar(tensor.value(step_num, worker=w)): # test did not save value so it should raise the below exception assert False except TensorUnavailableForStep: pass assert isinstance(tensor.shape(step_num, worker=w), tuple), ( tname, w, tensor.shape(step_num, worker=w), )
def test_basic(out_dir, zcc=False): strategy = helper_mirrored( out_dir, steps=["train", "eval", "predict", "train"], include_collections=[ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.GRADIENTS, CollectionKeys.LOSSES, ], eval_distributed=False, zcc=zcc, ) if skip_trial_check(): return tr = create_trial_fast_refresh(out_dir) # wts, grads, losses print(tr.tensor_names()) assert len( tr.tensor_names()) == 8 + 8 + (1 * strategy.num_replicas_in_sync) + 1 assert len(tr.steps()) == 7 assert len(tr.steps(ModeKeys.TRAIN)) == 3 assert len(tr.steps(ModeKeys.EVAL)) == 2 assert len(tr.steps(ModeKeys.PREDICT)) == 2 assert "dense_1/kernel:0" in tr.tensor_names(collection="weights") for tname in tr.tensor_names(collection="weights"): for s in tr.tensor(tname).steps(ModeKeys.TRAIN): assert len(tr.tensor(tname).workers( s, ModeKeys.TRAIN)) == strategy.num_replicas_in_sync for worker in tr.tensor(tname).workers(s, ModeKeys.TRAIN): assert tr.tensor(tname).value(s, worker=worker, mode=ModeKeys.TRAIN) is not None for s in tr.tensor(tname).steps(ModeKeys.EVAL): assert len(tr.tensor(tname).workers( s, ModeKeys.EVAL)) == strategy.num_replicas_in_sync assert tr.tensor(tname).value(s, mode=ModeKeys.EVAL) is not None tensornames = tr.tensor_names(regex="Identity_\d+:0") for s in tr.tensor(tensornames[0]).steps(ModeKeys.TRAIN): for w in tr.tensor(tensornames[0]).workers(s, ModeKeys.TRAIN): assert tr.tensor(tensornames[0]).value( s, worker=w, mode=ModeKeys.TRAIN) is not None assert (len(tr.tensor(tensornames[0]).workers( s, ModeKeys.TRAIN)) == strategy.num_replicas_in_sync) for tname in tr.tensor_names(collection="losses"): if tname != tensornames[0]: for s in tr.tensor(tname).steps(ModeKeys.TRAIN): assert len(tr.tensor(tname).workers(s, ModeKeys.TRAIN)) == 1 assert tr.tensor(tname).value(s, mode=ModeKeys.TRAIN) is not None tname = "sparse_softmax_cross_entropy_loss/value:0" for s in tr.tensor(tname).steps(ModeKeys.EVAL): assert len(tr.tensor(tname).workers( s, ModeKeys.EVAL)) == strategy.num_replicas_in_sync assert tr.tensor(tname).value(s, mode=ModeKeys.EVAL) is not None
def test_save_all(out_dir, tf_eager_mode, workers): save_config = SaveConfig(save_steps=[5]) strategy, saved_scalars = train_model( out_dir, include_collections=None, save_all=True, save_config=save_config, steps=["train"], eager=tf_eager_mode, include_workers=workers, ) tr = create_trial_fast_refresh(out_dir) print(tr.tensor_names()) if tf_eager_mode: if is_tf_2_2(): assert len( tr.tensor_names()) == (6 + 2 + 1 + 5 + 1 + 1 + 2 + 8 + 8 if is_tf_2_2() else 6 + 3 + 1 + 5 + 1) # weights, metrics, losses, optimizer variables, scalar, inputs, outputs, gradients, layers else: assert len( tr.tensor_names()) == (6 + 2 + 1 + 5 + 1 if is_tf_2_3() else 6 + 3 + 1 + 5 + 1) else: assert (len(tr.tensor_names()) == 6 + 6 + 5 + 3 + 1 + 3 * strategy.num_replicas_in_sync + 2 * strategy.num_replicas_in_sync) # weights, grads, optimizer_variables, metrics, losses, outputs assert len(tr.steps()) == 3 for tname in tr.tensor_names(): assert len( tr.tensor(tname).workers(0)) == (1 if workers == "one" else strategy.num_replicas_in_sync) verify_files(out_dir, save_config, saved_scalars)
def test_regex_filtering_for_default_collections(out_dir): hook = smd.KerasHook( out_dir, save_config=SaveConfig(save_interval=9), include_collections=[CollectionKeys.LAYERS, CollectionKeys.GRADIENTS], ) hook.get_collection(CollectionKeys.LAYERS).include("^dense") hook.get_collection(CollectionKeys.GRADIENTS).include("gradients/dense") helper_keras_fit( out_dir, hook=hook, save_config=SaveConfig(save_interval=10), steps=["train"], run_eagerly=True, ) tr = create_trial_fast_refresh(out_dir) layer_tnames = tr.tensor_names(collection=CollectionKeys.LAYERS) gradient_tnames = tr.tensor_names(collection=CollectionKeys.GRADIENTS) assert len(layer_tnames) == (4 if is_tf_2_2() else 0) assert len(gradient_tnames) == (4 if is_tf_2_2() else 0) layer_pattern = r"^(dense)(_\d+)?\/(inputs|outputs)" gradient_pattern = r"gradients/dense" for tname in layer_tnames: assert tr.tensor(tname).value(0) is not None assert re.match(pattern=layer_pattern, string=tname) is not None for tname in gradient_tnames: assert tr.tensor(tname).value(0) is not None assert re.match(pattern=gradient_pattern, string=tname) is not None
def test_base_reductions(out_dir, tf_eager_mode): helper_keras_fit( trial_dir=out_dir, include_collections=[ CollectionKeys.WEIGHTS, CollectionKeys.METRICS, CollectionKeys.LOSSES ], reduction_config=ReductionConfig(norms=ALLOWED_NORMS, reductions=ALLOWED_REDUCTIONS), run_eagerly=tf_eager_mode, ) tr = create_trial_fast_refresh(out_dir) weight_name = tr.tensor_names(collection=CollectionKeys.WEIGHTS)[0] try: tr.tensor(weight_name).value(0) assert False except TensorUnavailableForStep: assert tr.tensor(weight_name).reduction_value(0, "l1") is not None assert len(tr.tensor(weight_name).reduction_values( 0)) == len(ALLOWED_REDUCTIONS) + len(ALLOWED_NORMS) loss_name = tr.tensor_names(collection=CollectionKeys.LOSSES)[0] assert tr.tensor(loss_name).value(0) is not None metric_name = tr.tensor_names(collection=CollectionKeys.METRICS)[0] assert tr.tensor(metric_name).value(0) is not None
def test_base_reductions(out_dir, tf_eager_mode): train_model( out_dir, include_collections=[ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.METRICS, CollectionKeys.LOSSES, ], reduction_config=ReductionConfig(norms=ALLOWED_NORMS, reductions=ALLOWED_REDUCTIONS), steps=["train"], eager=tf_eager_mode, ) tr = create_trial_fast_refresh(out_dir) weight_name = tr.tensor_names(collection=CollectionKeys.WEIGHTS)[0] try: tr.tensor(weight_name).value(0) assert False except TensorUnavailableForStep: assert tr.tensor(weight_name).reduction_values(0) loss_name = tr.tensor_names(collection=CollectionKeys.LOSSES)[0] assert tr.tensor(loss_name).value(0) is not None metric_name = tr.tensor_names(collection=CollectionKeys.METRICS)[0] assert tr.tensor(metric_name).value(0) is not None
def exhaustive_check(out_dir, use_tf_keras): include_collections = [ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.GRADIENTS, CollectionKeys.LOSSES, CollectionKeys.OUTPUTS, CollectionKeys.METRICS, CollectionKeys.LOSSES, CollectionKeys.OPTIMIZER_VARIABLES, ] train_model( out_dir, include_collections=include_collections, use_tf_keras=use_tf_keras, eager=False, steps=["train", "eval", "predict", "train"], ) tr = create_trial_fast_refresh(out_dir) if use_tf_keras: assert len(tr.tensor_names()) == 18 else: # can't save optimizer variables in this case assert len(tr.tensor_names()) == 13 assert len(tr.modes()) == 3 assert len(tr.steps( ModeKeys.TRAIN)) == 8 # 0, 3, 6, 9, 12, 15, 18, 19(end of epoch) assert len(tr.steps(ModeKeys.EVAL)) == 4 assert len(tr.steps(ModeKeys.PREDICT)) == 2 # ran 4 steps above assert len(tr.tensor_names(collection=CollectionKeys.GRADIENTS)) == 4 gradient_name = tr.tensor_names(collection=CollectionKeys.GRADIENTS)[0] assert len(tr.tensor(gradient_name).steps(ModeKeys.TRAIN)) == 7 assert len(tr.tensor(gradient_name).steps(ModeKeys.EVAL)) == 0 assert len(tr.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2 assert len(tr.tensor_names(collection=CollectionKeys.BIASES)) == 2 weight_name = tr.tensor_names(collection=CollectionKeys.WEIGHTS)[0] assert len(tr.tensor(weight_name).steps()) == 13 assert len(tr.tensor(weight_name).steps(ModeKeys.TRAIN)) == 7 assert len(tr.tensor(weight_name).steps(ModeKeys.EVAL)) == 4 assert len(tr.tensor_names(collection=CollectionKeys.LOSSES)) == 1 loss_name = tr.tensor_names(collection=CollectionKeys.LOSSES)[0] assert len(tr.tensor(loss_name).steps()) == 12 assert len(tr.tensor_names(collection=CollectionKeys.METRICS)) == 3 if use_tf_keras: assert len( tr.tensor_names( collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5 opt_var_name = tr.tensor_names( collection=CollectionKeys.OPTIMIZER_VARIABLES)[0] assert tr.tensor(opt_var_name).value(0) is not None assert len(tr.tensor(opt_var_name).steps(ModeKeys.EVAL)) == 0
def test_save_all(out_dir): train_model(out_dir, include_collections=None, save_all=True, steps=["train"]) tr = create_trial_fast_refresh(out_dir) assert len(tr.tensor_names()) == 21 assert len(tr.steps()) == 4
def test_keras_fit_shapes(out_dir): hook = smd.KerasHook( out_dir=out_dir, save_all=True, save_config=SaveConfig(save_steps=[0]), reduction_config=ReductionConfig(save_shape=True), ) helper_keras_fit(trial_dir=out_dir, hook=hook) print(create_trial_fast_refresh(out_dir).tensor_names(step=0)) verify_shapes(out_dir, 0)
def test_hook_write(out_dir): pre_test_clean_up() # set up hook hook = SessionHook( out_dir, save_all=True, include_collections=None, save_config=SaveConfig(save_interval=999) ) helper_hook_write(out_dir, hook) tr = create_trial_fast_refresh(out_dir) print(tr.tensor_names(collection="weights")) assert len(tr.tensor_names(collection="weights"))
def test_reductions(out_dir): strategy = helper_mirrored( out_dir, steps=["train", "eval"], reduction_config=smd.ReductionConfig(reductions=["sum", "max"], abs_reductions=["sum", "max"], norms=["l1"]), include_collections=[ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.LOSSES ], eval_distributed=True, ) if skip_trial_check(): return tr = create_trial_fast_refresh(out_dir) assert len(tr.tensor_names()) == 8 + 1 * strategy.num_replicas_in_sync + 1 assert len(tr.steps()) == 4 assert len(tr.steps(ModeKeys.TRAIN)) == 2 assert len(tr.steps(ModeKeys.EVAL)) == 2 for tname in tr.tensor_names(collection="weights"): for s in tr.tensor(tname).steps(ModeKeys.TRAIN): try: tr.tensor(tname).value(s, mode=ModeKeys.TRAIN) assert False except TensorUnavailableForStep: # for some tensors l1 reduction can't be saved due to improper dimensions for the reduction assert len( tr.tensor(tname).reduction_values( s, mode=ModeKeys.TRAIN)) >= 4 for s in tr.tensor(tname).steps(ModeKeys.EVAL): try: tr.tensor(tname).value(s, mode=ModeKeys.EVAL) assert False except TensorUnavailableForStep: # for some tensors l1 reduction can't be saved due to improper dimensions for the reduction assert len( tr.tensor(tname).reduction_values(s, mode=ModeKeys.EVAL)) >= 4 for tname in tr.tensor_names(collection="losses"): for s in tr.tensor(tname).steps(ModeKeys.EVAL): assert len( tr.tensor(tname).reduction_values(s, mode=ModeKeys.EVAL)) == 0 assert tr.tensor(tname).value(s, mode=ModeKeys.EVAL) is not None for tname in tr.tensor_names(collection="losses"): for s in tr.tensor(tname).steps(ModeKeys.TRAIN): assert len( tr.tensor(tname).reduction_values(s, mode=ModeKeys.TRAIN)) == 0 assert tr.tensor(tname).value(s, mode=ModeKeys.TRAIN) is not None
def test_save_all(out_dir): strategy = helper_mirrored( out_dir, steps=["train"], num_steps=1, save_all=True, eval_distributed=True ) if skip_trial_check(): return tr = create_trial_fast_refresh(out_dir) assert len(tr.tensor_names()) > 100 assert len(tr.steps()) assert len(tr.tensor_names(collection="weights")) assert len(tr.tensor_names(collection="biases")) assert len(tr.tensor_names(collection="gradients"))
def test_include_regex(out_dir): hook = KerasHook( out_dir, save_config=SaveConfig(save_interval=9), include_collections=["custom_coll"] ) hook.get_collection("custom_coll").include("dense") train_model(out_dir, hook=hook, save_config=SaveConfig(save_interval=9), steps=["train"]) tr = create_trial_fast_refresh(out_dir) tnames = tr.tensor_names(collection="custom_coll") assert len(tnames) == 12 for tname in tnames: assert tr.tensor(tname).value(0) is not None
def test_layer_names_gradient_tape(out_dir): hook = smd.KerasHook( out_dir, save_config=SaveConfig(save_interval=9), include_collections=[CollectionKeys.LAYERS], ) helper_keras_gradtape(out_dir, hook=hook, save_config=SaveConfig(save_interval=9)) tr = create_trial_fast_refresh(out_dir) tnames = tr.tensor_names(collection=CollectionKeys.LAYERS) pattern = r"^(flatten|dense|dropout)(_\d+)?\/(inputs|outputs)" for tname in tnames: assert re.match(pattern=pattern, string=tname) is not None
def test_save_all_full(out_dir, hook=None): tf.reset_default_graph() if hook is None: hook = SessionHook(out_dir=out_dir, save_all=True, save_config=SaveConfig(save_interval=2)) simple_model(hook) tr = create_trial_fast_refresh(out_dir) assert len(tr.tensor_names()) > 50 print(tr.tensor_names(collection="weights")) assert len(tr.tensor_names(collection="weights")) == 1 assert len(tr.tensor_names(collection="gradients")) == 1 assert len(tr.tensor_names(collection="losses")) == 1
def test_collection_add(out_dir): train_model( out_dir, include_collections=["relu"], save_config=SaveConfig(save_interval=9), create_relu_collection=True, steps=["train"], ) tr = create_trial_fast_refresh(out_dir) relu_coll_tensor_names = tr.tensor_names(collection="relu") assert len(relu_coll_tensor_names) == 2 assert tr.tensor(relu_coll_tensor_names[0]).value(0) is not None assert tr.tensor(relu_coll_tensor_names[1]).value(0) is not None
def test_clash_with_custom_callback(out_dir): strategy = train_model( out_dir, include_collections=[ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.OUTPUTS, CollectionKeys.GRADIENTS, ], save_config=SaveConfig(save_interval=9), steps=["train"], add_callbacks=["fetch_tensor"], ) tr = create_trial_fast_refresh(out_dir) assert len(tr.tensor_names()) == 6 + 6 + strategy.num_replicas_in_sync * 1 + 3
def test_save_one_worker(out_dir): strategy = helper_mirrored( out_dir, steps=["train"], num_steps=1, save_all=True, eval_distributed=True, include_workers="one", ) tr = create_trial_fast_refresh(out_dir) assert len(tr.workers()) == 1 assert len(tr.steps()) assert len(tr.tensor_names(collection="weights")) assert len(tr.tensor_names(collection="biases")) assert len(tr.tensor_names(collection="gradients"))
def test_save_all(out_dir): strategy = train_model( out_dir, include_collections=None, save_all=True, save_config=SaveConfig(save_steps=[5]), steps=["train"], ) tr = create_trial_fast_refresh(out_dir) print(tr.tensor_names()) assert (len(tr.tensor_names()) == 6 + 6 + 5 + 3 + 1 + 3 * strategy.num_replicas_in_sync + 2 * strategy.num_replicas_in_sync) # weights, grads, optimizer_variables, metrics, losses, outputs assert len(tr.steps()) == 3
def test_eval_distributed(out_dir): strategy = helper_mirrored( out_dir, steps=["train", "eval"], include_collections=[ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.LOSSES ], eval_distributed=True, ) if skip_trial_check(): return tr = create_trial_fast_refresh(out_dir) assert len(tr.tensor_names()) == 8 + 1 * strategy.num_replicas_in_sync + 1 assert len(tr.steps()) == 4 assert len(tr.steps(ModeKeys.TRAIN)) == 2 assert len(tr.steps(ModeKeys.EVAL)) == 2 for tname in tr.tensor_names(collection="weights"): for s in tr.tensor(tname).steps(ModeKeys.TRAIN): assert len(tr.tensor(tname).workers( s, ModeKeys.TRAIN)) == strategy.num_replicas_in_sync for worker in tr.tensor(tname).workers(s, ModeKeys.TRAIN): assert tr.tensor(tname).value(s, worker=worker, mode=ModeKeys.TRAIN) is not None for s in tr.tensor(tname).steps(ModeKeys.EVAL): assert len(tr.tensor(tname).workers( s, ModeKeys.EVAL)) == strategy.num_replicas_in_sync assert tr.tensor(tname).value(s, mode=ModeKeys.EVAL) is not None tensornames = tr.tensor_names(regex="Identity_\d+:0") for s in tr.tensor(tensornames[0]).steps(ModeKeys.TRAIN): for w in tr.tensor(tensornames[0]).workers(s, ModeKeys.TRAIN): assert tr.tensor(tensornames[0]).value( s, worker=w, mode=ModeKeys.TRAIN) is not None assert (len(tr.tensor(tensornames[0]).workers( s, ModeKeys.TRAIN)) == strategy.num_replicas_in_sync) for tname in tr.tensor_names(collection="losses"): for s in tr.tensor(tname).steps(ModeKeys.EVAL): assert len(tr.tensor(tname).workers(s, ModeKeys.EVAL)) == 1 assert tr.tensor(tname).value(s, mode=ModeKeys.EVAL) is not None if tname != tensornames[0]: for s in tr.tensor(tname).steps(ModeKeys.TRAIN): assert len(tr.tensor(tname).workers(s, ModeKeys.EVAL)) == 1 assert tr.tensor(tname).value(s, mode=ModeKeys.EVAL) is not None
def test_clash_with_tb_callback(out_dir): # this test cannot be run in non-eager mode helper_keras_fit( out_dir, save_config=SaveConfig(save_interval=9), steps=["train"], include_collections=[ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.LOSSES, CollectionKeys.METRICS, ], add_callbacks=["tensorboard"], ) tr = create_trial_fast_refresh(out_dir) assert len(tr.tensor_names()) == (7 if (is_tf_2_2() or is_tf_2_3()) else 8)
def test_clash_with_tb_callback(out_dir): train_model( out_dir, save_config=SaveConfig(save_interval=9), steps=["train"], include_collections=[ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.LOSSES, CollectionKeys.METRICS, ], add_callbacks=["tensorboard"], ) tr = create_trial_fast_refresh(out_dir) assert len(tr.tensor_names()) == 8 shutil.rmtree(out_dir)
def test_include_regex(out_dir): hook = KerasHook( out_dir=out_dir, save_config=SaveConfig(save_interval=9), include_collections=["custom_coll"], include_workers="all", ) hook.get_collection("custom_coll").include("dense") strategy = train_model(out_dir, hook=hook, steps=["train"]) tr = create_trial_fast_refresh(out_dir) tnames = tr.tensor_names(collection="custom_coll") assert len(tnames) == 4 + 4 + 3 * strategy.num_replicas_in_sync for tname in tnames: assert tr.tensor(tname).value(0) is not None
def test_gradtape_include_regex(out_dir): """ Test custom collection with regex """ hook = smd.KerasHook( out_dir, save_config=SaveConfig(save_interval=9), include_collections=["custom_coll"] ) hook.get_collection("custom_coll").include("dense") helper_keras_gradtape(out_dir, hook=hook, save_config=SaveConfig(save_interval=9)) tr = create_trial_fast_refresh(out_dir) tnames = tr.tensor_names(collection="custom_coll") assert len(tnames) == (12 if is_tf_2_2() else 8) for tname in tnames: assert tr.tensor(tname).value(0) is not None
def test_save_one_worker(out_dir): strategy = train_model( out_dir, include_collections=None, save_all=True, save_config=SaveConfig(save_steps=[5]), steps=["train"], include_workers="one", ) tr = create_trial_fast_refresh(out_dir) assert len(tr.workers()) == 1 assert len(tr.steps()) assert len(tr.tensor_names(collection="weights")) assert len(tr.tensor_names(collection="weights")) assert len(tr.tensor(tr.tensor_names(collection="weights")[0]).workers(5)) == 1 assert len(tr.tensor_names(collection="biases")) assert len(tr.tensor(tr.tensor_names(collection="biases")[0]).workers(5)) == 1 assert len(tr.tensor_names(collection="gradients"))
def test_include_regex(out_dir, tf_eager_mode): hook = smd.KerasHook( out_dir, save_config=SaveConfig(save_interval=9), include_collections=["custom_coll"] ) hook.get_collection("custom_coll").include("dense") helper_keras_fit( out_dir, hook=hook, save_config=SaveConfig(save_interval=9), steps=["train"], run_eagerly=tf_eager_mode, ) tr = create_trial_fast_refresh(out_dir) tnames = tr.tensor_names(collection="custom_coll") assert len(tnames) == (12 if is_tf_2_2() else 4) for tname in tnames: assert tr.tensor(tname).value(0) is not None