def test_ntm(sagemaker_session): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, 'ntm') data_filename = 'nips-train_1.pbr' with open(os.path.join(data_path, data_filename), 'rb') as f: all_records = read_records(f) # all records must be same feature_num = int(all_records[0].features['values'].float32_tensor.shape[0]) ntm = NTM(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', num_topics=10, sagemaker_session=sagemaker_session, base_job_name='test-ntm') record_set = prepare_record_set_from_local_files(data_path, ntm.data_location, len(all_records), feature_num, sagemaker_session) ntm.fit(record_set, None) endpoint_name = unique_name_from_base('ntm') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = NTMModel(ntm.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) predict_input = np.random.rand(1, feature_num) result = predictor.predict(predict_input) assert len(result) == 1 for record in result: assert record.label["topic_weights"] is not None
def test_ntm_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type): with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS): data_path = os.path.join(DATA_DIR, "ntm") data_filename = "nips-train_1.pbr" with open(os.path.join(data_path, data_filename), "rb") as f: all_records = read_records(f) # all records must be same feature_num = int(all_records[0].features["values"].float32_tensor.shape[0]) ntm = NTM( role=ROLE, train_instance_count=SINGLE_INSTANCE_COUNT, train_instance_type=cpu_instance_type, num_topics=10, sagemaker_session=sagemaker_session, ) records = prepare_record_set_from_local_files( data_path, ntm.data_location, len(all_records), feature_num, sagemaker_session ) training_config = _build_airflow_workflow( estimator=ntm, instance_type=cpu_instance_type, inputs=records ) _assert_that_s3_url_contains_data( sagemaker_session, training_config["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"], )
def test_ntm_serverless_inference(sagemaker_session, cpu_instance_type): job_name = unique_name_from_base("ntm-serverless") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, "ntm") data_filename = "nips-train_1.pbr" with open(os.path.join(data_path, data_filename), "rb") as f: all_records = read_records(f) # all records must be same feature_num = int(all_records[0].features["values"].float32_tensor.shape[0]) ntm = NTM( role="SageMakerRole", instance_count=1, instance_type=cpu_instance_type, num_topics=10, sagemaker_session=sagemaker_session, ) record_set = prepare_record_set_from_local_files( data_path, ntm.data_location, len(all_records), feature_num, sagemaker_session ) ntm.fit(records=record_set, job_name=job_name) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): model = NTMModel(ntm.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) predictor = model.deploy( serverless_inference_config=ServerlessInferenceConfig(), endpoint_name=job_name ) assert isinstance(predictor, Predictor)
def test_ntm(sagemaker_session, cpu_instance_type): job_name = unique_name_from_base("ntm") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, "ntm") data_filename = "nips-train_1.pbr" with open(os.path.join(data_path, data_filename), "rb") as f: all_records = read_records(f) # all records must be same feature_num = int( all_records[0].features["values"].float32_tensor.shape[0]) ntm = NTM( role="SageMakerRole", instance_count=1, instance_type=cpu_instance_type, num_topics=10, sagemaker_session=sagemaker_session, ) record_set = prepare_record_set_from_local_files( data_path, ntm.data_location, len(all_records), feature_num, sagemaker_session) ntm.fit(records=record_set, job_name=job_name) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): model = NTMModel(ntm.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name) predict_input = np.random.rand(1, feature_num) result = predictor.predict(predict_input) assert len(result) == 1 for record in result: assert record.label["topic_weights"] is not None