def test_factorization_machines(sagemaker_session, cpu_instance_type,
                                training_set):
    job_name = unique_name_from_base("fm")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        fm = FactorizationMachines(
            role="SageMakerRole",
            instance_count=1,
            instance_type=cpu_instance_type,
            num_factors=10,
            predictor_type="regressor",
            epochs=2,
            clip_gradient=1e2,
            eps=0.001,
            rescale_grad=1.0 / 100,
            sagemaker_session=sagemaker_session,
        )

        # training labels must be 'float32'
        fm.fit(
            fm.record_set(training_set[0][:200],
                          training_set[1][:200].astype("float32")),
            job_name=job_name,
        )

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        model = FactorizationMachinesModel(fm.model_data,
                                           role="SageMakerRole",
                                           sagemaker_session=sagemaker_session)
        predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name)
        result = predictor.predict(training_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label["score"] is not None
Esempio n. 2
0
def test_factorization_machines(sagemaker_session):
    with timeout(minutes=15):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        fm = FactorizationMachines(role='SageMakerRole', train_instance_count=1,
                                   train_instance_type='ml.c4.xlarge',
                                   num_factors=10, predictor_type='regressor',
                                   epochs=2, clip_gradient=1e2, eps=0.001, rescale_grad=1.0/100,
                                   sagemaker_session=sagemaker_session, base_job_name='test-fm')

        # training labels must be 'float32'
        fm.fit(fm.record_set(train_set[0][:200], train_set[1][:200].astype('float32')))

    endpoint_name = name_from_base('fm')
    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        model = FactorizationMachinesModel(fm.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session)
        predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name)
        result = predictor.predict(train_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label["score"] is not None
def test_factorization_machines(sagemaker_session):
    with timeout(minutes=15):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        fm = FactorizationMachines(role='SageMakerRole', train_instance_count=1,
                                   train_instance_type='ml.c4.xlarge',
                                   num_factors=10, predictor_type='regressor',
                                   epochs=2, clip_gradient=1e2, eps=0.001, rescale_grad=1.0 / 100,
                                   sagemaker_session=sagemaker_session, base_job_name='test-fm')

        # training labels must be 'float32'
        fm.fit(fm.record_set(train_set[0][:200], train_set[1][:200].astype('float32')))

    endpoint_name = name_from_base('fm')
    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        model = FactorizationMachinesModel(fm.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session)
        predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name)
        result = predictor.predict(train_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label["score"] is not None
Esempio n. 4
0
def test_async_factorization_machines():

    training_job_name = ""
    endpoint_name = name_from_base('factorizationMachines')
    sagemaker_session = sagemaker.Session(boto_session=boto3.Session(
        region_name=REGION))

    with timeout(minutes=5):

        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {
            'encoding': 'latin1'
        }

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        fm = FactorizationMachines(role='SageMakerRole',
                                   train_instance_count=1,
                                   train_instance_type='ml.c4.xlarge',
                                   num_factors=10,
                                   predictor_type='regressor',
                                   epochs=2,
                                   clip_gradient=1e2,
                                   eps=0.001,
                                   rescale_grad=1.0 / 100,
                                   sagemaker_session=sagemaker_session,
                                   base_job_name='test-fm')

        # training labels must be 'float32'
        fm.fit(fm.record_set(train_set[0][:200],
                             train_set[1][:200].astype('float32')),
               wait=False)
        training_job_name = fm.latest_training_job.name

        print("Detached from training job. Will re-attach in 20 seconds")
        time.sleep(20)
        print("attaching now...")

    with timeout_and_delete_endpoint_by_name(endpoint_name,
                                             sagemaker_session,
                                             minutes=35):
        estimator = FactorizationMachines.attach(
            training_job_name=training_job_name,
            sagemaker_session=sagemaker_session)
        model = FactorizationMachinesModel(estimator.model_data,
                                           role='SageMakerRole',
                                           sagemaker_session=sagemaker_session)
        predictor = model.deploy(1,
                                 'ml.c4.xlarge',
                                 endpoint_name=endpoint_name)
        result = predictor.predict(train_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label["score"] is not None
Esempio n. 5
0
def test_async_factorization_machines(sagemaker_session, cpu_instance_type):
    job_name = unique_name_from_base("fm")

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {
            "encoding": "latin1"
        }

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        fm = FactorizationMachines(
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type=cpu_instance_type,
            num_factors=10,
            predictor_type="regressor",
            epochs=2,
            clip_gradient=1e2,
            eps=0.001,
            rescale_grad=1.0 / 100,
            sagemaker_session=sagemaker_session,
        )

        # training labels must be 'float32'
        fm.fit(
            fm.record_set(train_set[0][:200],
                          train_set[1][:200].astype("float32")),
            job_name=job_name,
            wait=False,
        )

        print("Detached from training job. Will re-attach in 20 seconds")
        time.sleep(20)
        print("attaching now...")

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        estimator = FactorizationMachines.attach(
            training_job_name=job_name, sagemaker_session=sagemaker_session)
        model = FactorizationMachinesModel(estimator.model_data,
                                           role="SageMakerRole",
                                           sagemaker_session=sagemaker_session)
        predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name)
        result = predictor.predict(train_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label["score"] is not None
def test_async_factorization_machines(sagemaker_session, cpu_instance_type,
                                      training_set):
    job_name = unique_name_from_base("fm")

    with timeout(minutes=5):
        fm = FactorizationMachines(
            role="SageMakerRole",
            instance_count=1,
            instance_type=cpu_instance_type,
            num_factors=10,
            predictor_type="regressor",
            epochs=2,
            clip_gradient=1e2,
            eps=0.001,
            rescale_grad=1.0 / 100,
            sagemaker_session=sagemaker_session,
        )

        # training labels must be 'float32'
        fm.fit(
            fm.record_set(training_set[0][:200],
                          training_set[1][:200].astype("float32")),
            job_name=job_name,
            wait=False,
        )

        print("Detached from training job. Will re-attach in 20 seconds")
        time.sleep(20)
        print("attaching now...")

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        estimator = FactorizationMachines.attach(
            training_job_name=job_name, sagemaker_session=sagemaker_session)
        model = FactorizationMachinesModel(estimator.model_data,
                                           role="SageMakerRole",
                                           sagemaker_session=sagemaker_session)
        predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name)
        result = predictor.predict(training_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label["score"] is not None
Esempio n. 7
0
def test_factorization_machines(sagemaker_session):
    job_name = unique_name_from_base("fm")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        fm = FactorizationMachines(
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type="ml.c4.xlarge",
            num_factors=10,
            predictor_type="regressor",
            epochs=2,
            clip_gradient=1e2,
            eps=0.001,
            rescale_grad=1.0 / 100,
            sagemaker_session=sagemaker_session,
        )

        # training labels must be 'float32'
        fm.fit(
            fm.record_set(train_set[0][:200], train_set[1][:200].astype("float32")),
            job_name=job_name,
        )

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        model = FactorizationMachinesModel(
            fm.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session
        )
        predictor = model.deploy(1, "ml.c4.xlarge", endpoint_name=job_name)
        result = predictor.predict(train_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label["score"] is not None