def test_keras_fit_shapes(out_dir): hook = smd.KerasHook( out_dir=out_dir, save_all=True, save_config=SaveConfig(save_steps=[0]), reduction_config=ReductionConfig(save_shape=True), ) helper_keras_fit(trial_dir=out_dir, hook=hook) print(create_trial_fast_refresh(out_dir).tensor_names(step=0)) verify_shapes(out_dir, 0)
def test_keras_gradtape_shapes(out_dir): hook = smd.KerasHook( out_dir=out_dir, save_all=True, save_config=SaveConfig(save_steps=[0]), reduction_config=ReductionConfig(save_shape=True), ) helper_keras_gradtape(trial_dir=out_dir, hook=hook) verify_shapes(out_dir, 0) verify_shapes(out_dir, 500)
def test_shapes(out_dir, tf_eager_mode): strategy, _ = train_model( out_dir, save_all=True, save_config=SaveConfig(save_steps=[0]), reduction_config=ReductionConfig(save_shape=True), steps=["train"], eager=tf_eager_mode, ) multiworker = strategy.num_replicas_in_sync > 1 verify_shapes(out_dir, 0, multiworker=multiworker)
def test_tf_keras_shapes(out_dir): train_model( out_dir, save_all=True, reduction_config=ReductionConfig(save_shape=True), use_tf_keras=True, save_config=SaveConfig(save_steps=[0, 10]), eager=False, steps=["train", "eval", "predict", "train"], ) verify_shapes(out_dir, 0)
def test_shapes(out_dir, save_raw_tensor=False): pre_test_clean_up() rdnc = smd.ReductionConfig(save_shape=True, save_raw_tensor=save_raw_tensor) hook = smd.SessionHook( out_dir=out_dir, save_config=smd.SaveConfig(save_interval=1), reduction_config=rdnc, include_collections=["weights", "gradients", "losses"], ) simple_model(hook) verify_shapes(out_dir, 0)
def test_save_shapes(out_dir): global_reduce_config = ReductionConfig(save_shape=True) global_save_config = SaveConfig(save_steps=[0, 1]) hook = t_hook( out_dir=out_dir, save_config=global_save_config, save_all=True, reduction_config=global_reduce_config, ) run_mnist_gluon_model(hook=hook, num_steps_train=5) verify_shapes(out_dir, 0) verify_shapes(out_dir, 1) shutil.rmtree(out_dir)
def test_mnist_shapes(out_dir, on_s3=False): if on_s3: run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") bucket = "smdebug-testing" prefix = "outputs/hooks/estimator_modes/" + run_id out_dir = f"s3://{bucket}/{prefix}" help_test_mnist( out_dir, save_all=True, save_config=smd.SaveConfig(save_steps=[0]), num_steps=1, steps=None, reduction_config=smd.ReductionConfig(save_shape=True), ) verify_shapes(out_dir, 0)
def test_save_shapes(hook=None, out_dir=None): class ChildA(nn.Module): def __init__(self): super(ChildA, self).__init__() self.child2 = ChildB() self.relu0 = nn.ReLU() def forward(self, x): return self.relu0(self.child2(x)) class ChildB(nn.Module): def __init__(self): super(ChildB, self).__init__() self.conv1 = nn.Conv2d(1, 20, 5, 1) def forward(self, x): return self.conv1(x) class NestedNet(nn.Module): def __init__(self): super(NestedNet, self).__init__() self.child1 = ChildA() self.max_pool = nn.MaxPool2d(2, stride=2) self.conv2 = nn.Conv2d(20, 50, 5, 1) relu_module = nn.ReLU() self.relu1 = nn.ReLU() self.max_pool2 = nn.MaxPool2d(2, stride=2) self.fc1 = nn.Linear(4 * 4 * 50, 500) self.relu2 = nn.ReLU() self.fc2 = nn.Linear(500, 10) def forward(self, x): x = self.child1(x) x = self.max_pool(x) x = self.relu1(self.conv2(x)) x = self.max_pool2(x) x = x.view(-1, 4 * 4 * 50) x = self.relu2(self.fc1(x)) x = self.fc2(x) return F.log_softmax(x, dim=1) hook_created = False if hook is None: global_reduce_config = ReductionConfig(save_shape=True) global_save_config = SaveConfig(save_steps=[0]) run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") out_dir = "/tmp/" + run_id hook = t_hook( out_dir=out_dir, save_config=global_save_config, save_all=True, reduction_config=global_reduce_config, ) hook_created = True model = NestedNet().to(torch.device("cpu")) hook.register_module(model) optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) train(model, hook, torch.device("cpu"), optimizer, num_steps=10) # different versions seem to output different number of loss tensors verify_shapes(out_dir, 0) if hook_created: shutil.rmtree(out_dir)