def test_async_fit(sagemaker_session): endpoint_name = 'test-chainer-attach-deploy-{}'.format( sagemaker_timestamp()) with timeout(minutes=5): training_job_name = _run_mnist_training_job( sagemaker_session, "ml.c4.xlarge", 1, chainer_full_version=CHAINER_VERSION, wait=False) print("Waiting to re-attach to the training job: %s" % training_job_name) time.sleep(20) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=35): print("Re-attaching now to: %s" % training_job_name) estimator = Chainer.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) predictor = estimator.deploy(1, "ml.c4.xlarge", endpoint_name=endpoint_name) _predict_and_assert(predictor)
def test_attach_deploy(chainer_training_job, sagemaker_session): endpoint_name = 'test-chainer-attach-deploy-{}'.format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = Chainer.attach(chainer_training_job, sagemaker_session=sagemaker_session) predictor = estimator.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) _predict_and_assert(predictor)
def test_attach_deploy(chainer_training_job, sagemaker_session): endpoint_name = unique_name_from_base('test-chainer-attach-deploy') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = Chainer.attach(chainer_training_job, sagemaker_session=sagemaker_session) predictor = estimator.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) _predict_and_assert(predictor)
def test_async_fit(sagemaker_session): endpoint_name = 'test-chainer-attach-deploy-{}'.format(sagemaker_timestamp()) with timeout(minutes=5): training_job_name = _run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", 1, chainer_full_version=CHAINER_VERSION, wait=False) print("Waiting to re-attach to the training job: %s" % training_job_name) time.sleep(20) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): print("Re-attaching now to: %s" % training_job_name) estimator = Chainer.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) predictor = estimator.deploy(1, "ml.c4.xlarge", endpoint_name=endpoint_name) _predict_and_assert(predictor)
def test_attach_deploy(sagemaker_session, chainer_latest_version, chainer_latest_py_version, cpu_instance_type): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, "chainer_mnist", "mnist.py") data_path = os.path.join(DATA_DIR, "chainer_mnist") chainer = Chainer( entry_point=script_path, role="SageMakerRole", framework_version=chainer_latest_version, py_version=chainer_latest_py_version, instance_count=1, instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, hyperparameters={"epochs": 1}, ) train_input = sagemaker_session.upload_data( path=os.path.join(data_path, "train"), key_prefix="integ-test-data/chainer_mnist/train") test_input = sagemaker_session.upload_data( path=os.path.join(data_path, "test"), key_prefix="integ-test-data/chainer_mnist/test") job_name = unique_name_from_base("test-chainer-training") chainer.fit({ "train": train_input, "test": test_input }, wait=False, job_name=job_name) endpoint_name = unique_name_from_base("test-chainer-attach-deploy") with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = Chainer.attach(chainer.latest_training_job.name, sagemaker_session=sagemaker_session) predictor = estimator.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) _predict_and_assert(predictor)