def test_object2vec(sagemaker_session, cpu_instance_type): job_name = unique_name_from_base("object2vec") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, "object2vec") data_filename = "train.jsonl" with open(os.path.join(data_path, data_filename), "r") as f: num_records = len(f.readlines()) object2vec = Object2Vec( role="SageMakerRole", instance_count=1, instance_type=cpu_instance_type, epochs=3, enc0_max_seq_len=20, enc0_vocab_size=45000, enc_dim=16, num_classes=3, negative_sampling_rate=0, comparator_list="hadamard,concat,abs_diff", tied_token_embedding_weight=False, token_embedding_storage_type="dense", sagemaker_session=sagemaker_session, ) record_set = prepare_record_set_from_local_files( data_path, object2vec.data_location, num_records, FEATURE_NUM, sagemaker_session) object2vec.fit(records=record_set, job_name=job_name) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): model = Object2VecModel(object2vec.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name) assert isinstance(predictor, Predictor) predict_input = {"instances": [{"in0": [354, 623], "in1": [16]}]} result = predictor.predict(predict_input) assert len(result) == 1 for record in result: assert record.label["scores"] is not None
def test_object2vec(sagemaker_session): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, 'object2vec') data_filename = 'train.jsonl' with open(os.path.join(data_path, data_filename), 'r') as f: num_records = len(f.readlines()) object2vec = Object2Vec(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', epochs=3, enc0_max_seq_len=20, enc0_vocab_size=45000, enc_dim=16, num_classes=3, sagemaker_session=sagemaker_session, base_job_name='test-object2vec') record_set = prepare_record_set_from_local_files( data_path, object2vec.data_location, num_records, FEATURE_NUM, sagemaker_session) object2vec.fit(record_set, None) endpoint_name = unique_name_from_base('object2vec') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = Object2VecModel(object2vec.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) assert isinstance(predictor, RealTimePredictor) predict_input = {'instances': [{"in0": [354, 623], "in1": [16]}]} result = predictor.predict(predict_input) assert len(result) == 1 for record in result: assert record.label["scores"] is not None