Python mojo_predict_csv Exemples, h2o.mojo_predict_csv Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : pyunit_mojo_predict.py Projet : michalkurka/h2o-3

def mojo_predict_api_test(sandbox_dir):
    data = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv"))

    input_csv = "%s/in.csv" % sandbox_dir
    output_csv = "%s/prediction.csv" % sandbox_dir
    h2o.export_file(data[1, 2:], input_csv)

    data[1] = data[1].asfactor()
    model = H2OGradientBoostingEstimator(distribution="bernoulli")
    model.train(x=[2, 3, 4, 5, 6, 7, 8], y=1, training_frame=data)

    # download mojo
    model_zip_path = os.path.join(sandbox_dir, 'model.zip')
    genmodel_path = os.path.join(sandbox_dir, 'h2o-genmodel.jar')
    download_mojo(model, model_zip_path)
    assert os.path.isfile(model_zip_path)
    assert os.path.isfile(genmodel_path)

    # test that we can predict using default paths
    h2o.mojo_predict_csv(input_csv_path=input_csv, mojo_zip_path=model_zip_path, verbose=True)
    h2o.mojo_predict_csv(input_csv_path=input_csv, mojo_zip_path=model_zip_path, genmodel_jar_path=genmodel_path,
                               verbose=True)
    assert os.path.isfile(output_csv)
    os.remove(model_zip_path)
    os.remove(genmodel_path)
    os.remove(output_csv)

    # test that we can predict using custom genmodel path
    other_sandbox_dir = tempfile.mkdtemp()
    try:
        genmodel_path = os.path.join(other_sandbox_dir, 'h2o-genmodel-custom.jar')
        download_mojo(model, model_zip_path, genmodel_path)
        assert os.path.isfile(model_zip_path)
        assert os.path.isfile(genmodel_path)
        try:
            h2o.mojo_predict_csv(input_csv_path=input_csv, mojo_zip_path=model_zip_path, verbose=True)
            assert False, "There should be no h2o-genmodel.jar at %s" % sandbox_dir
        except RuntimeError:
            pass
        assert not os.path.isfile(output_csv)
        h2o.mojo_predict_csv(input_csv_path=input_csv, mojo_zip_path=model_zip_path,
                                   genmodel_jar_path=genmodel_path, verbose=True)
        assert os.path.isfile(output_csv)
        os.remove(output_csv)

        output_csv = "%s/out.prediction" % other_sandbox_dir

        # test that we can predict using default paths
        h2o.mojo_predict_csv(input_csv_path=input_csv, mojo_zip_path=model_zip_path,
                                   genmodel_jar_path=genmodel_path, verbose=True, output_csv_path=output_csv)
        assert os.path.isfile(output_csv)
        os.remove(model_zip_path)
        os.remove(genmodel_path)
        os.remove(output_csv)
    finally:
        shutil.rmtree(other_sandbox_dir)

Exemple #2

0

Afficher le fichier

def mojo_predict_api_test(sandbox_dir):
    data = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv"))

    input_csv = "%s/in.csv" % sandbox_dir
    output_csv = "%s/prediction.csv" % sandbox_dir
    h2o.export_file(data[1, 2:], input_csv)

    data[1] = data[1].asfactor()
    model = H2OGradientBoostingEstimator(distribution="bernoulli")
    model.train(x=[2, 3, 4, 5, 6, 7, 8], y=1, training_frame=data)

    # download mojo
    model_zip_path = os.path.join(sandbox_dir, 'model.zip')
    genmodel_path = os.path.join(sandbox_dir, 'h2o-genmodel.jar')
    download_mojo(model, model_zip_path)
    assert os.path.isfile(model_zip_path)
    assert os.path.isfile(genmodel_path)

    # test that we can predict using default paths
    h2o.mojo_predict_csv(input_csv_path=input_csv, mojo_zip_path=model_zip_path, verbose=True)
    h2o.mojo_predict_csv(input_csv_path=input_csv, mojo_zip_path=model_zip_path, genmodel_jar_path=genmodel_path,
                               verbose=True)
    assert os.path.isfile(output_csv)
    os.remove(model_zip_path)
    os.remove(genmodel_path)
    os.remove(output_csv)

    # test that we can predict using custom genmodel path
    other_sandbox_dir = tempfile.mkdtemp()
    try:
        genmodel_path = os.path.join(other_sandbox_dir, 'h2o-genmodel-custom.jar')
        download_mojo(model, model_zip_path, genmodel_path)
        assert os.path.isfile(model_zip_path)
        assert os.path.isfile(genmodel_path)
        try:
            h2o.mojo_predict_csv(input_csv_path=input_csv, mojo_zip_path=model_zip_path, verbose=True)
            assert False, "There should be no h2o-genmodel.jar at %s" % sandbox_dir
        except RuntimeError:
            pass
        assert not os.path.isfile(output_csv)
        h2o.mojo_predict_csv(input_csv_path=input_csv, mojo_zip_path=model_zip_path,
                                   genmodel_jar_path=genmodel_path, verbose=True)
        assert os.path.isfile(output_csv)
        os.remove(output_csv)

        output_csv = "%s/out.prediction" % other_sandbox_dir

        # test that we can predict using default paths
        h2o.mojo_predict_csv(input_csv_path=input_csv, mojo_zip_path=model_zip_path,
                                   genmodel_jar_path=genmodel_path, verbose=True, output_csv_path=output_csv)
        assert os.path.isfile(output_csv)
        os.remove(model_zip_path)
        os.remove(genmodel_path)
        os.remove(output_csv)
    finally:
        shutil.rmtree(other_sandbox_dir)

Exemple #3

0

Afficher le fichier

def mojo_predict_csv_test(target_dir):
    mojo_file_name = "prostate_isofor_model.zip"
    mojo_zip_path = os.path.join(target_dir, mojo_file_name)

    data_path = pyunit_utils.locate("smalldata/logreg/prostate.csv")
    prostate = h2o.import_file(path=data_path)

    # =================================================================
    # Isolation Forest
    # =================================================================
    isofor = H2OIsolationForestEstimator()
    isofor.train(training_frame=prostate)

    pred_h2o = isofor.predict(prostate)
    pred_h2o_df = pred_h2o.as_data_frame(use_pandas=True)

    download_mojo(isofor, mojo_zip_path)

    output_csv = "%s/prediction.csv" % target_dir
    print("\nPerforming Isolation Forest Prediction using MOJO @... " +
          target_dir)
    pred_mojo_csv = h2o.mojo_predict_csv(input_csv_path=data_path,
                                         mojo_zip_path=mojo_zip_path,
                                         output_csv_path=output_csv)
    pred_mojo_df = pd.DataFrame(pred_mojo_csv,
                                dtype=np.float64,
                                columns=["predict", "mean_length"])
    print("*** pred_h2o_df ***")
    print(pred_h2o_df)
    print("***pred_mojo_df ***")
    print(pred_mojo_df)
    assert_frame_equal(pred_h2o_df, pred_mojo_df, check_dtype=False)

Exemple #4

0

Afficher le fichier

Fichier : pyunit_mojo_concurrent_contribs_xgboost.py Projet : wwjiang007/h2o-3

def demo_xgboost_concurrent_contributions():
    prostate_path = pyunit_utils.locate("smalldata/logreg/prostate.csv")

    prostate = h2o.import_file(path=prostate_path)
    prostate["CAPSULE"] = prostate["CAPSULE"].asfactor()

    xgb_model = H2OXGBoostEstimator()
    xgb_model.train(
        x=["AGE", "RACE", "DPROS", "DCAPS", "PSA", "VOL", "GLEASON"],
        y="CAPSULE",
        training_frame=prostate)

    results_dir = os.path.join(pyunit_utils.locate("results"),
                               "xgb_concurrent")
    os.mkdir(results_dir)
    mojo_path = xgb_model.download_mojo(results_dir, get_genmodel_jar=True)

    # how many parallel threads to run
    concurrency = 4

    reference_result = h2o.mojo_predict_csv(
        input_csv_path=prostate_path,
        mojo_zip_path=mojo_path,
        output_csv_path=os.path.join(results_dir, "predictions.csv"),
        predict_contributions=True,
        extra_cmd_args=["--testConcurrent",
                        str(concurrency)])
    print(reference_result)

    for test_id in range(4):
        with open(os.path.join(results_dir,
                               "predictions.csv." + str(test_id))) as csv_file:
            concurrent_result = list(csv.DictReader(csv_file))
            assert reference_result == concurrent_result

Exemple #5

0

Afficher le fichier

Fichier : pyunit_build_mojo_pipeline.py Projet : Kendralabs/h2o-4

def build_mojo_pipeline():
    results_dir = pyunit_utils.locate("results")
    iris_csv = pyunit_utils.locate('smalldata/iris/iris_train.csv')
    iris = h2o.import_file(iris_csv)

    pca = H2OPrincipalComponentAnalysisEstimator(k=2)
    pca.train(training_frame=iris)

    principal_components = pca.predict(iris)

    km = H2OKMeansEstimator(k=3)
    km.train(training_frame=principal_components)

    pca_mojo_path = pca.download_mojo(path=results_dir)
    km_mojo_path = km.download_mojo(get_genmodel_jar=True, path=results_dir)

    java_cmd = [
        "java", "-cp",
        os.path.join(results_dir, "h2o-genmodel.jar"),
        "hex.genmodel.tools.BuildPipeline", "--mapping"
    ]
    pca_mojo_name = os.path.basename(pca_mojo_path).split('.')[0]
    for i, pc in enumerate(principal_components.columns):
        mapping = pc + '=' + pca_mojo_name + ':' + str(i)
        java_cmd += [mapping]
    java_cmd += [
        "--output",
        os.path.join(results_dir, "pipe.zip"), "--input", km_mojo_path,
        pca_mojo_path
    ]

    subprocess.Popen(java_cmd, stdout=PIPE, stderr=STDOUT).communicate()

    h2o_preds = km.predict(principal_components)
    mojo_preds_raw = h2o.mojo_predict_csv(input_csv_path=iris_csv,
                                          mojo_zip_path=os.path.join(
                                              results_dir, "pipe.zip"))
    mojo_preds = h2o.H2OFrame([c['cluster'] for c in mojo_preds_raw],
                              column_names=['predict'])

    assert (mojo_preds == h2o_preds).mean()[0, "predict"] == 1

Exemple #6

0

Afficher le fichier

def test_setInvNumNA():
    train = h2o.import_file(
        pyunit_utils.locate(
            "smalldata/glm_test/pubdev_6617_setInvNumNA_train.csv"))
    testdata = pyunit_utils.locate(
        "smalldata/glm_test/pubdev_6617_setInvNumNA_test.csv")
    testdataModel = h2o.import_file(
        pyunit_utils.locate(
            "smalldata/glm_test/pubdev_6617_setInvNumNA_test_model.csv"))

    response = "C2"
    x = ["C1"]
    params = {
        'missing_values_handling': "MeanImputation",
        'family': 'gaussian'
    }
    glmMultinomialModel = pyunit_utils.build_save_model_GLM(
        params, x, train, response)  # build and save mojo model

    MOJONAME = pyunit_utils.getMojoName(glmMultinomialModel._id)
    TMPDIR = os.path.normpath(
        os.path.join(os.path.dirname(os.path.realpath('__file__')), "..",
                     "results", MOJONAME))
    mojoLoco = os.path.normpath(os.path.join(TMPDIR, MOJONAME + '.zip'))
    mojoOut = os.path.normpath(os.path.join(TMPDIR, "mojo_out.csv"))
    genJarDir = str.split(str(TMPDIR), '/')
    genJarDir = '/'.join(genJarDir[0:genJarDir.index('h2o-py')]
                         )  # locate directory of genmodel.jar
    jarpath = os.path.join(genJarDir,
                           "h2o-assemblies/genmodel/build/libs/genmodel.jar")
    mojoPredict = h2o.mojo_predict_csv(input_csv_path=testdata,
                                       mojo_zip_path=mojoLoco,
                                       output_csv_path=mojoOut,
                                       genmodel_jar_path=jarpath,
                                       verbose=True,
                                       setInvNumNA=True)
    modelPred = glmMultinomialModel.predict(testdataModel)
    for ind in range(5):
        assert abs(float(mojoPredict[ind]['predict'])-modelPred[ind,0])<1e-6, "model predict {1} and mojo predict {0} differs " \
                                                                            "too much".format(float(mojoPredict[0]['predict']), modelPred[ind,0])

Exemple #7

0

Afficher le fichier

def mojo_predict_csv_test(target_dir):
    mojo_file_name = "prostate_gbm_model.zip"
    mojo_zip_path = os.path.join(target_dir, mojo_file_name)

    prostate = h2o.import_file(
        path=pyunit_utils.locate("smalldata/logreg/prostate.csv"))

    r = prostate[0].runif()
    train = prostate[r < 0.70]
    test = prostate[r >= 0.70]

    # Getting first row from test data frame
    pdf = test[1, 2:]
    input_csv = "%s/in.csv" % target_dir
    output_csv = "%s/output.csv" % target_dir
    h2o.export_file(pdf, input_csv)

    # =================================================================
    # Regression
    # =================================================================
    regression_gbm1 = H2OGradientBoostingEstimator(distribution="gaussian")
    regression_gbm1.train(x=[2, 3, 4, 5, 6, 7, 8], y=1, training_frame=train)
    pred_reg = regression_gbm1.predict(pdf)
    contribs_reg = regression_gbm1.predict_contributions(pdf)
    p1 = pred_reg[0, 0]
    print("Regression prediction: " + str(p1))

    download_mojo(regression_gbm1, mojo_zip_path)

    print("\nPerforming Regression Prediction using MOJO @... " + target_dir)
    prediction_result = h2o.mojo_predict_csv(input_csv_path=input_csv,
                                             mojo_zip_path=mojo_zip_path,
                                             output_csv_path=output_csv)
    print("Prediction result: " + str(prediction_result))
    assert p1 == float(
        prediction_result[0]['predict']
    ), "expected predictions to be the same for binary and MOJO model for regression"

    print("\nComparing Regression Contributions using MOJO @... " + target_dir)
    contributions_result = h2o.mojo_predict_csv(input_csv_path=input_csv,
                                                mojo_zip_path=mojo_zip_path,
                                                output_csv_path=output_csv,
                                                predict_contributions=True)
    assert contributions_result is not None
    contributions_pandas = pandas.read_csv(output_csv)
    assert_frame_equal(contribs_reg.as_data_frame(use_pandas=True),
                       contributions_pandas,
                       check_dtype=False)

    # =================================================================
    # Binomial
    # =================================================================
    train[1] = train[1].asfactor()
    bernoulli_gbm1 = H2OGradientBoostingEstimator(distribution="bernoulli")

    bernoulli_gbm1.train(x=[2, 3, 4, 5, 6, 7, 8], y=1, training_frame=train)
    pred_bin = bernoulli_gbm1.predict(pdf)
    contribs_bin = bernoulli_gbm1.predict_contributions(pdf)

    binary_prediction_0 = pred_bin[0, 1]
    binary_prediction_1 = pred_bin[0, 2]
    print("Binomial prediction: p0: " + str(binary_prediction_0))
    print("Binomial prediction: p1: " + str(binary_prediction_1))

    download_mojo(bernoulli_gbm1, mojo_zip_path)

    print("\nPerforming Binomial Prediction using MOJO @... " + target_dir)
    prediction_result = h2o.mojo_predict_csv(input_csv_path=input_csv,
                                             mojo_zip_path=mojo_zip_path,
                                             output_csv_path=output_csv)

    mojo_prediction_0 = float(prediction_result[0]['0'])
    mojo_prediction_1 = float(prediction_result[0]['1'])
    print("Binomial prediction: p0: " + str(mojo_prediction_0))
    print("Binomial prediction: p1: " + str(mojo_prediction_1))

    assert binary_prediction_0 == mojo_prediction_0, "expected predictions to be the same for binary and MOJO model for Binomial - p0"
    assert binary_prediction_1 == mojo_prediction_1, "expected predictions to be the same for binary and MOJO model for Binomial - p1"

    print("\nComparing Binary Classification Contributions using MOJO @... " +
          target_dir)
    contributions_bin_result = h2o.mojo_predict_csv(
        input_csv_path=input_csv,
        mojo_zip_path=mojo_zip_path,
        output_csv_path=output_csv,
        predict_contributions=True)
    assert contributions_bin_result is not None
    contributions_bin_pandas = pandas.read_csv(output_csv)
    print(contributions_bin_pandas)
    print(contribs_bin.as_data_frame(use_pandas=True))
    assert_frame_equal(contribs_bin.as_data_frame(use_pandas=True),
                       contributions_bin_pandas,
                       check_dtype=False)

    # =================================================================
    # Multinomial
    # =================================================================
    iris = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris.csv"))

    r = iris[0].runif()
    train = iris[r < 0.90]
    test = iris[r >= 0.10]

    # Getting first row from test data frame
    pdf = test[1, 0:4]
    input_csv = "%s/in-multi.csv" % target_dir
    output_csv = "%s/output.csv" % target_dir
    h2o.export_file(pdf, input_csv)

    multi_gbm = H2OGradientBoostingEstimator()
    multi_gbm.train(x=['C1', 'C2', 'C3', 'C4'], y='C5', training_frame=train)

    pred_multi = multi_gbm.predict(pdf)
    multinomial_prediction_1 = pred_multi[0, 1]
    multinomial_prediction_2 = pred_multi[0, 2]
    multinomial_prediction_3 = pred_multi[0, 3]
    print("Multinomial prediction (Binary): p0: " +
          str(multinomial_prediction_1))
    print("Multinomial prediction (Binary): p1: " +
          str(multinomial_prediction_2))
    print("Multinomial prediction (Binary): p2: " +
          str(multinomial_prediction_3))

    download_mojo(multi_gbm, mojo_zip_path)

    print("\nPerforming Multinomial Prediction using MOJO @... " + target_dir)
    prediction_result = h2o.mojo_predict_csv(input_csv_path=input_csv,
                                             mojo_zip_path=mojo_zip_path,
                                             output_csv_path=output_csv)

    mojo_prediction_1 = float(prediction_result[0]['Iris-setosa'])
    mojo_prediction_2 = float(prediction_result[0]['Iris-versicolor'])
    mojo_prediction_3 = float(prediction_result[0]['Iris-virginica'])
    print("Multinomial prediction (MOJO): p0: " + str(mojo_prediction_1))
    print("Multinomial prediction (MOJO): p1: " + str(mojo_prediction_2))
    print("Multinomial prediction (MOJO): p2: " + str(mojo_prediction_3))

    assert multinomial_prediction_1 == mojo_prediction_1, "expected predictions to be the same for binary and MOJO model for Multinomial - p0"
    assert multinomial_prediction_2 == mojo_prediction_2, "expected predictions to be the same for binary and MOJO model for Multinomial - p1"
    assert multinomial_prediction_3 == mojo_prediction_3, "expected predictions to be the same for binary and MOJO model for Multinomial - p2"

Exemple #8

0

Afficher le fichier

Fichier : pyunit_mojo_predict.py Projet : michalkurka/h2o-3

def mojo_predict_csv_test(target_dir):
    mojo_file_name = "prostate_gbm_model.zip"
    mojo_zip_path = os.path.join(target_dir, mojo_file_name)

    prostate = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv"))

    r = prostate[0].runif()
    train = prostate[r < 0.70]
    test = prostate[r >= 0.70]

    # Getting first row from test data frame
    pdf = test[1, 2:]
    input_csv = "%s/in.csv" % target_dir
    output_csv = "%s/output.csv" % target_dir
    h2o.export_file(pdf, input_csv)

    # =================================================================
    # Regression
    # =================================================================
    regression_gbm1 = H2OGradientBoostingEstimator(distribution="gaussian")
    regression_gbm1.train(x=[2, 3, 4, 5, 6, 7, 8], y=1, training_frame=train)
    pred_reg = regression_gbm1.predict(pdf)
    p1 = pred_reg[0, 0]
    print("Regression prediction: " + str(p1))

    download_mojo(regression_gbm1, mojo_zip_path)

    print("\nPerforming Regression Prediction using MOJO @... " + target_dir)
    prediction_result = h2o.mojo_predict_csv(input_csv_path=input_csv, mojo_zip_path=mojo_zip_path,
                                                   output_csv_path=output_csv)
    print("Prediction result: " + str(prediction_result))
    assert p1 == float(prediction_result[0]['predict']), "expected predictions to be the same for binary and MOJO model for regression"

    # =================================================================
    # Binomial
    # =================================================================
    train[1] = train[1].asfactor()
    bernoulli_gbm1 = H2OGradientBoostingEstimator(distribution="bernoulli")

    bernoulli_gbm1.train(x=[2, 3, 4, 5, 6, 7, 8], y=1, training_frame=train)
    pred_bin = bernoulli_gbm1.predict(pdf)

    binary_prediction_0 = pred_bin[0, 1]
    binary_prediction_1 = pred_bin[0, 2]
    print("Binomial prediction: p0: " + str(binary_prediction_0))
    print("Binomial prediction: p1: " + str(binary_prediction_1))

    download_mojo(bernoulli_gbm1, mojo_zip_path)

    print("\nPerforming Binomial Prediction using MOJO @... " + target_dir)
    prediction_result = h2o.mojo_predict_csv(input_csv_path=input_csv, mojo_zip_path=mojo_zip_path,
                                                   output_csv_path=output_csv)

    mojo_prediction_0 = float(prediction_result[0]['0'])
    mojo_prediction_1 = float(prediction_result[0]['1'])
    print("Binomial prediction: p0: " + str(mojo_prediction_0))
    print("Binomial prediction: p1: " + str(mojo_prediction_1))

    assert binary_prediction_0 == mojo_prediction_0, "expected predictions to be the same for binary and MOJO model for Binomial - p0"
    assert binary_prediction_1 == mojo_prediction_1, "expected predictions to be the same for binary and MOJO model for Binomial - p1"

    # =================================================================
    # Multinomial
    # =================================================================
    iris = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris.csv"))

    r = iris[0].runif()
    train = iris[r < 0.90]
    test = iris[r >= 0.10]

    # Getting first row from test data frame
    pdf = test[1, 0:4]
    input_csv = "%s/in-multi.csv" % target_dir
    output_csv = "%s/output.csv" % target_dir
    h2o.export_file(pdf, input_csv)

    multi_gbm = H2OGradientBoostingEstimator()
    multi_gbm.train(x=['C1', 'C2', 'C3', 'C4'], y='C5', training_frame=train)

    pred_multi = multi_gbm.predict(pdf)
    multinomial_prediction_1 = pred_multi[0, 1]
    multinomial_prediction_2 = pred_multi[0, 2]
    multinomial_prediction_3 = pred_multi[0, 3]
    print("Multinomial prediction (Binary): p0: " + str(multinomial_prediction_1))
    print("Multinomial prediction (Binary): p1: " + str(multinomial_prediction_2))
    print("Multinomial prediction (Binary): p2: " + str(multinomial_prediction_3))

    download_mojo(multi_gbm, mojo_zip_path)

    print("\nPerforming Binomial Prediction using MOJO @... " + target_dir)
    prediction_result = h2o.mojo_predict_csv(input_csv_path=input_csv, mojo_zip_path=mojo_zip_path,
                                                   output_csv_path=output_csv)

    mojo_prediction_1 = float(prediction_result[0]['Iris-setosa'])
    mojo_prediction_2 = float(prediction_result[0]['Iris-versicolor'])
    mojo_prediction_3 = float(prediction_result[0]['Iris-virginica'])
    print("Multinomial prediction (MOJO): p0: " + str(mojo_prediction_1))
    print("Multinomial prediction (MOJO): p1: " + str(mojo_prediction_2))
    print("Multinomial prediction (MOJO): p2: " + str(mojo_prediction_3))

    assert multinomial_prediction_1 == mojo_prediction_1, "expected predictions to be the same for binary and MOJO model for Multinomial - p0"
    assert multinomial_prediction_2 == mojo_prediction_2, "expected predictions to be the same for binary and MOJO model for Multinomial - p1"
    assert multinomial_prediction_3 == mojo_prediction_3, "expected predictions to be the same for binary and MOJO model for Multinomial - p2"

Exemple #9

0

Afficher le fichier

Fichier : pyunit_coxph_mojo_predict.py Projet : zoudongyang/h2o-3

def mojo_predict_csv_test(sandbox_dir):
    data = h2o.import_file(
        path=pyunit_utils.locate("smalldata/coxph_test/heart.csv"))

    input_csv = "%s/in.csv" % sandbox_dir
    output_csv = "%s/prediction.csv" % sandbox_dir
    h2o.export_file(data, input_csv)

    data['transplant'] = data['transplant'].asfactor()
    model = H2OCoxProportionalHazardsEstimator(stratify_by=["transplant"],
                                               start_column="start",
                                               stop_column="stop")
    model.train(x=["age", "surgery", "transplant"],
                y="event",
                training_frame=data)

    h2o_prediction = model.predict(data)

    # download mojo
    model_zip_path = os.path.join(sandbox_dir, 'model.zip')
    genmodel_path = os.path.join(sandbox_dir, 'h2o-genmodel.jar')
    download_mojo(model, model_zip_path)
    assert os.path.isfile(model_zip_path)
    assert os.path.isfile(genmodel_path)

    # test that we can predict using default paths
    h2o.mojo_predict_csv(input_csv_path=input_csv,
                         mojo_zip_path=model_zip_path,
                         verbose=True)
    h2o.mojo_predict_csv(input_csv_path=input_csv,
                         mojo_zip_path=model_zip_path,
                         genmodel_jar_path=genmodel_path,
                         verbose=True)
    assert os.path.isfile(output_csv)
    os.remove(model_zip_path)
    os.remove(genmodel_path)
    os.remove(output_csv)

    # test that we can predict using custom genmodel path
    other_sandbox_dir = tempfile.mkdtemp()
    try:
        genmodel_path = os.path.join(other_sandbox_dir,
                                     'h2o-genmodel-custom.jar')
        download_mojo(model, model_zip_path, genmodel_path)
        assert os.path.isfile(model_zip_path)
        assert os.path.isfile(genmodel_path)
        try:
            h2o.mojo_predict_csv(input_csv_path=input_csv,
                                 mojo_zip_path=model_zip_path,
                                 verbose=True)
            assert False, "There should be no h2o-genmodel.jar at %s" % sandbox_dir
        except RuntimeError:
            pass
        assert not os.path.isfile(output_csv)
        h2o.mojo_predict_csv(input_csv_path=input_csv,
                             mojo_zip_path=model_zip_path,
                             genmodel_jar_path=genmodel_path,
                             verbose=True)
        assert os.path.isfile(output_csv)
        os.remove(output_csv)

        output_csv = "%s/out.prediction" % other_sandbox_dir

        # test that we can predict using default paths
        mojo_prediction = h2o.mojo_predict_csv(input_csv_path=input_csv,
                                               mojo_zip_path=model_zip_path,
                                               genmodel_jar_path=genmodel_path,
                                               verbose=True,
                                               output_csv_path=output_csv)
        assert os.path.isfile(output_csv)
        os.remove(model_zip_path)
        os.remove(genmodel_path)
        os.remove(output_csv)

        print(h2o_prediction)
        print(mojo_prediction)

        assert len(mojo_prediction) == h2o_prediction.nrows

        assert_frame_equal(h2o_prediction.as_data_frame(use_pandas=True),
                           pandas.DataFrame(
                               [float(m['lp']) for m in mojo_prediction],
                               columns=["lp"]),
                           check_dtype=False)
    finally:
        shutil.rmtree(other_sandbox_dir)