def _train_and_assert_success(estimator, input_dir, output_path): estimator.fit({'training': 'file://{}'.format(os.path.join(input_dir, 'training'))}) success_files = { 'model': ['model.pth'], 'output': ['success'], } assert_files_exist(output_path, success_files)
def test_cpu_nccl(docker_image, sagemaker_local_session, tmpdir): estimator = PyTorch(entry_point=mnist_script, role=ROLE, image_name=docker_image, train_instance_count=2, train_instance_type='local', sagemaker_session=sagemaker_local_session, hyperparameters={'backend': 'nccl'}, output_path='file://{}'.format(tmpdir)) with pytest.raises(RuntimeError): estimator.fit({ 'training': 'file://{}'.format(os.path.join(data_dir, 'training')) }) failure_file = {'output': ['failure']} assert_files_exist(str(tmpdir), failure_file)
def _train_and_assert_success(estimator, output_path, output_files=MODEL_SUCCESS_FILES): estimator.fit( {'training': 'file://{}'.format(os.path.join(data_dir, 'training'))}) assert_files_exist(output_path, output_files)