def runComparisonTests(autoEncoder, actFun, missingValuesHandling,
                       setAllFactor, train, test, x):
    params = set_params(actFun, missingValuesHandling, setAllFactor,
                        autoEncoder)  # set deeplearning model parameters

    if autoEncoder:
        try:
            deeplearningModel = build_save_model(
                params, x, train)  # build and save mojo model
        except Exception as err:
            if not ("Trying to predict with an unstable model" in err.args[0]):
                raise Exception(
                    'Deeplearning autoencoder model failed to build.  Fix it.')
            return
    else:
        deeplearningModel = build_save_model(
            params, x, train)  # build and save mojo model

    h2o.download_csv(test[x], os.path.join(
        TMPDIR, 'in.csv'))  # save test file, h2o predict/mojo use same file
    pred_h2o, pred_mojo = pyunit_utils.mojo_predict(
        deeplearningModel, TMPDIR, MOJONAME)  # load model and perform predict
    pred_pojo = pyunit_utils.pojo_predict(deeplearningModel, TMPDIR, MOJONAME)
    h2o.save_model(deeplearningModel, path=TMPDIR,
                   force=True)  # save model for debugging
    print("Comparing mojo predict and h2o predict...")
    pyunit_utils.compare_frames_local_onecolumn_NA(pred_h2o,
                                                   pred_mojo,
                                                   prob=1,
                                                   tol=1e-10)
    print("Comparing pojo predict and h2o predict...")
    pyunit_utils.compare_frames_local_onecolumn_NA(pred_mojo,
                                                   pred_pojo,
                                                   prob=1,
                                                   tol=1e-10)
def glm_fractional_binomial_mojo_pojo():
    params = set_params()
    train = h2o.import_file(
        pyunit_utils.locate("smalldata/glm_test/fraction_binommialOrig.csv"))
    test = h2o.import_file(
        pyunit_utils.locate("smalldata/glm_test/fraction_binommialOrig.csv"))
    x = ["log10conc"]
    y = "y"

    glmModel = pyunit_utils.build_save_model_GLM(
        params, x, train, y)  # build and save mojo model

    MOJONAME = pyunit_utils.getMojoName(glmModel._id)
    TMPDIR = os.path.normpath(
        os.path.join(os.path.dirname(os.path.realpath('__file__')), "..",
                     "results", MOJONAME))

    h2o.download_csv(test[x], os.path.join(
        TMPDIR, 'in.csv'))  # save test file, h2o predict/mojo use same file
    pred_h2o, pred_mojo = pyunit_utils.mojo_predict(
        glmModel, TMPDIR, MOJONAME)  # load model and perform predict
    h2o.download_csv(pred_h2o, os.path.join(TMPDIR, "h2oPred.csv"))
    pred_pojo = pyunit_utils.pojo_predict(glmModel, TMPDIR, MOJONAME)
    pred_h2o = pred_h2o.drop(3)
    print("Comparing mojo predict and h2o predict...")
    pyunit_utils.compare_frames_local(
        pred_h2o, pred_mojo, 0.1, tol=1e-10
    )  # make sure operation sequence is preserved from Tomk        h2o.save_model(glmOrdinalModel, path=TMPDIR, force=True)  # save model for debugging
    print("Comparing pojo predict and h2o predict...")
    pyunit_utils.compare_frames_local(pred_mojo, pred_pojo, 0.1, tol=1e-10)
예제 #3
0
def glm_binomial_mojo_pojo():
    h2o.remove_all()
    NTESTROWS = 200  # number of test dataset rows
    PROBLEM = "binomial"
    params = set_params()  # set deeplearning model parameters
    df = pyunit_utils.random_dataset(PROBLEM)  # generate random dataset
    train = df[NTESTROWS:, :]
    test = df[:NTESTROWS, :]
    x = list(set(df.names) - {"response"})
    TMPDIR = tempfile.mkdtemp()
    glmBinomialModel = pyunit_utils.build_save_model_generic(
        params, x, train, "response", "glm",
        TMPDIR)  # build and save mojo model
    MOJONAME = pyunit_utils.getMojoName(glmBinomialModel._id)

    h2o.download_csv(test[x], os.path.join(
        TMPDIR, 'in.csv'))  # save test file, h2o predict/mojo use same file
    pred_h2o, pred_mojo = pyunit_utils.mojo_predict(
        glmBinomialModel, TMPDIR, MOJONAME)  # load model and perform predict
    h2o.download_csv(pred_h2o, os.path.join(TMPDIR, "h2oPred.csv"))
    pred_pojo = pyunit_utils.pojo_predict(glmBinomialModel, TMPDIR, MOJONAME)
    print("Comparing mojo predict and h2o predict...")
    pyunit_utils.compare_frames_local(
        pred_h2o, pred_mojo, 0.1, tol=1e-10
    )  # make sure operation sequence is preserved from Tomk        h2o.save_model(glmOrdinalModel, path=TMPDIR, force=True)  # save model for debugging
    print("Comparing pojo predict and h2o predict...")
    pyunit_utils.compare_frames_local(pred_mojo, pred_pojo, 0.1, tol=1e-10)
def run_comparison_tests(auto_encoder, act_fun, missing_values_handling, set_all_factor, train, test, x):
    # set deeplearning model parameters
    params = set_params(act_fun, missing_values_handling, set_all_factor, auto_encoder) 
    
    if auto_encoder:
        try:
            # build and save mojo model
            deeplearning_model = build_save_model(params, x, train) 
        except Exception as err:
            if not("Trying to predict with an unstable model" in err.args[0]):
                raise Exception('Deeplearning autoencoder model failed to build.  Fix it.')
            return
    else:
        # build and save mojo model
        deeplearning_model = build_save_model(params, x, train) 

    # save test file, h2o predict/mojo use same file
    h2o.download_csv(test[x], os.path.join(TMPDIR, 'in.csv'))  
    # load model and perform predict
    pred_h2o, pred_mojo = pyunit_utils.mojo_predict(deeplearning_model, TMPDIR, MOJONAME)  
    pred_pojo = pyunit_utils.pojo_predict(deeplearning_model, TMPDIR, MOJONAME)
    # save model for debugging
    h2o.save_model(deeplearning_model, path=TMPDIR, force=True)  
    print("Comparing mojo predict and h2o predict...")
    pyunit_utils.compare_frames_local_onecolumn_NA(pred_h2o, pred_mojo, prob=1, tol=1e-10)
    print("Comparing pojo predict and h2o predict...")
    pyunit_utils.compare_frames_local_onecolumn_NA(pred_mojo, pred_pojo, prob=1, tol=1e-10)
예제 #5
0
def glm_ordinal_mojo_pojo():
    h2o.remove_all()
    params = set_params()  # set deeplearning model parameters
    df = random_dataset(PROBLEM)  # generate random dataset
    train = df[NTESTROWS:, :]
    test = df[:NTESTROWS, :]
    x = list(set(df.names) - {"response"})

    try:
        glmOrdinalModel = build_save_model(
            params, x, train, "response")  # build and save mojo model
        h2o.download_csv(test[x], os.path.join(
            TMPDIR,
            'in.csv'))  # save test file, h2o predict/mojo use same file
        pred_h2o, pred_mojo = pyunit_utils.mojo_predict(
            glmOrdinalModel, TMPDIR,
            MOJONAME)  # load model and perform predict
        h2o.download_csv(pred_h2o, os.path.join(TMPDIR, "h2oPred.csv"))
        pred_pojo = pyunit_utils.pojo_predict(glmOrdinalModel, TMPDIR,
                                              MOJONAME)
        print("Comparing mojo predict and h2o predict...")
        pyunit_utils.compare_frames_local(
            pred_h2o, pred_mojo, 0.1, tol=1e-10
        )  # make sure operation sequence is preserved from Tomk        h2o.save_model(glmOrdinalModel, path=TMPDIR, force=True)  # save model for debugging
        print("Comparing pojo predict and h2o predict...")
        pyunit_utils.compare_frames_local(pred_mojo, pred_pojo, 0.1, tol=1e-10)
    except Exception as ex:
        print("***************  ERROR and type is ")
        print(str(type(ex)))
        print(ex)
        if "AssertionError" in str(
                type(ex)
        ):  # only care if there is an AssertionError, ignore the others
            sys.exit(1)
def glm_multinomial_mojo_pojo():
    PROBLEM = "multinomial"
    NTESTROWS = 200
    params = set_params()  # set deeplearning model parameters
    df = pyunit_utils.random_dataset(PROBLEM)  # generate random dataset
    train = df[NTESTROWS:, :]
    test = df[:NTESTROWS, :]
    x = list(set(df.names) - {"response"})

    glmMultinomialModel = pyunit_utils.build_save_model_GLM(
        params, x, train, "response")  # build and save mojo model

    MOJONAME = pyunit_utils.getMojoName(glmMultinomialModel._id)
    TMPDIR = os.path.normpath(
        os.path.join(os.path.dirname(os.path.realpath('__file__')), "..",
                     "results", MOJONAME))

    h2o.download_csv(test[x], os.path.join(
        TMPDIR, 'in.csv'))  # save test file, h2o predict/mojo use same file
    pred_h2o, pred_mojo = pyunit_utils.mojo_predict(
        glmMultinomialModel, TMPDIR,
        MOJONAME)  # load model and perform predict
    h2o.download_csv(pred_h2o, os.path.join(TMPDIR, "h2oPred.csv"))
    pred_pojo = pyunit_utils.pojo_predict(glmMultinomialModel, TMPDIR,
                                          MOJONAME)
    print("Comparing mojo predict and h2o predict...")
    pyunit_utils.compare_frames_local(
        pred_h2o, pred_mojo, 0.1, tol=1e-10
    )  # make sure operation sequence is preserved from Tomk        h2o.save_model(glmOrdinalModel, path=TMPDIR, force=True)  # save model for debugging
    print("Comparing pojo predict and h2o predict...")
    pyunit_utils.compare_frames_local(pred_mojo, pred_pojo, 0.1, tol=1e-10)
def deeplearning_mojo_pojo():
    h2o.remove_all()

    params = set_params()   # set deeplearning model parameters
    df = random_dataset(PROBLEM)       # generate random dataset
    train = df[NTESTROWS:, :]
    test = df[:NTESTROWS, :]
    x = list(set(df.names) - {"response"})

    try:
        deeplearningModel = build_save_model(params, x, train) # build and save mojo model
        h2o.download_csv(test[x], os.path.join(TMPDIR, 'in.csv'))  # save test file, h2o predict/mojo use same file
        pred_h2o, pred_mojo = pyunit_utils.mojo_predict(deeplearningModel, TMPDIR, MOJONAME)  # load model and perform predict
        pred_pojo = pyunit_utils.pojo_predict(deeplearningModel, TMPDIR, MOJONAME)
        h2o.save_model(deeplearningModel, path=TMPDIR, force=True)  # save model for debugging
        print("Comparing mojo predict and h2o predict...")
        pyunit_utils.compare_numeric_frames(pred_h2o, pred_mojo, 0.1, tol=1e-10)    # make sure operation sequence is preserved from Tomk
        print("Comparing pojo predict and h2o predict...")
        pyunit_utils.compare_numeric_frames(pred_mojo, pred_pojo, 0.1, tol=1e-10)
    except Exception as ex:
        print("***************  ERROR and type is ")
        print(str(type(ex)))
        print(ex)
        if "AssertionError" in str(type(ex)):   # only care if there is an AssertionError, ignore the others
            sys.exit(1)
def runComparisonTests(autoEncoder, probleyType):
    params = set_params(autoEncoder)   # set deeplearning model parameters
    df = random_dataset(probleyType)       # generate random dataset
    train = df[NTESTROWS:, :]
    test = df[:NTESTROWS, :]
    x = list(set(df.names) - {"response"})

    deeplearningModel = build_save_model(params, x, train) # build and save mojo model
    h2o.download_csv(test[x], os.path.join(TMPDIR, 'in.csv'))  # save test file, h2o predict/mojo use same file
    pred_h2o, pred_mojo = pyunit_utils.mojo_predict(deeplearningModel, TMPDIR, MOJONAME)  # load model and perform predict
    pred_pojo = pyunit_utils.pojo_predict(deeplearningModel, TMPDIR, MOJONAME)
    h2o.save_model(deeplearningModel, path=TMPDIR, force=True)  # save model for debugging
    print("Comparing mojo predict and h2o predict...")
    pyunit_utils.compare_frames_local_onecolumn_NA(pred_h2o, pred_mojo, prob=1, tol=1e-10)
    print("Comparing pojo predict and h2o predict...")
    pyunit_utils.compare_frames_local_onecolumn_NA(pred_mojo, pred_pojo, prob=1, tol=1e-10)
예제 #9
0
def compare_preds(train, test, x, y, booster, ntrees, max_depth, max_error):
    model = H2OXGBoostEstimator(booster=booster,
                                seed=1,
                                ntrees=ntrees,
                                max_depth=max_depth)
    model.train(training_frame=train, x=x, y=y)

    mojo_name = pyunit_utils.getMojoName(model._id)
    tmp_dir = os.path.normpath(
        os.path.join(os.path.dirname(os.path.realpath('__file__')), "..",
                     "results", mojo_name))
    os.makedirs(tmp_dir)
    model.download_mojo(path=tmp_dir)

    h2o.download_csv(test[x], os.path.join(tmp_dir, 'in.csv'))
    pred_h2o = model.predict(test[x])
    h2o.download_csv(pred_h2o, os.path.join(tmp_dir, "out_h2o.csv"))
    pred_pojo = pyunit_utils.pojo_predict(model, tmp_dir, mojo_name)
    print("%s: Comparing pojo %s predict and h2o predict..." %
          (model._id, booster))
    pyunit_utils.compare_frames_local(pred_h2o, pred_pojo, 1, tol=max_error)
def glm_multinomial_mojo_pojo():
    PROBLEM="multinomial"
    NTESTROWS=200
    params = set_params()   # set deeplearning model parameters
    df = pyunit_utils.random_dataset(PROBLEM)       # generate random dataset
    train = df[NTESTROWS:, :]
    test = df[:NTESTROWS, :]
    x = list(set(df.names) - {"response"})

    glmMultinomialModel = pyunit_utils.build_save_model_GLM(params, x, train, "response") # build and save mojo model

    MOJONAME = pyunit_utils.getMojoName(glmMultinomialModel._id)
    TMPDIR = os.path.normpath(os.path.join(os.path.dirname(os.path.realpath('__file__')), "..", "results", MOJONAME))

    h2o.download_csv(test[x], os.path.join(TMPDIR, 'in.csv'))  # save test file, h2o predict/mojo use same file
    pred_h2o, pred_mojo = pyunit_utils.mojo_predict(glmMultinomialModel, TMPDIR, MOJONAME)  # load model and perform predict
    h2o.download_csv(pred_h2o, os.path.join(TMPDIR, "h2oPred.csv"))
    pred_pojo = pyunit_utils.pojo_predict(glmMultinomialModel, TMPDIR, MOJONAME)
    print("Comparing mojo predict and h2o predict...")
    pyunit_utils.compare_frames_local(pred_h2o, pred_mojo, 0.1, tol=1e-10)    # make sure operation sequence is preserved from Tomk        h2o.save_model(glmOrdinalModel, path=TMPDIR, force=True)  # save model for debugging
    print("Comparing pojo predict and h2o predict...")
    pyunit_utils.compare_frames_local(pred_mojo, pred_pojo, 0.1, tol=1e-10)
def runComparisonTests(autoEncoder, probleyType):
    params = set_params(autoEncoder)  # set deeplearning model parameters
    df = random_dataset(probleyType)  # generate random dataset
    train = df[NTESTROWS:, :]
    test = df[:NTESTROWS, :]
    x = list(set(df.names) - {"response"})

    if autoEncoder:
        try:
            deeplearningModel = build_save_model(
                params, x, train)  # build and save mojo model
        except Exception as err:
            if not ("Trying to predict with an unstable model" in err.args[0]):
                raise Exception(
                    'Deeplearning autoencoder model failed to build.  Fix it.')
            return
    else:
        deeplearningModel = build_save_model(
            params, x, train)  # build and save mojo model
    h2o.download_csv(test[x], os.path.join(
        TMPDIR, 'in.csv'))  # save test file, h2o predict/mojo use same file
    pred_h2o, pred_mojo = pyunit_utils.mojo_predict(
        deeplearningModel, TMPDIR, MOJONAME)  # load model and perform predict
    pred_pojo = pyunit_utils.pojo_predict(deeplearningModel, TMPDIR, MOJONAME)
    h2o.save_model(deeplearningModel, path=TMPDIR,
                   force=True)  # save model for debugging
    print("Comparing mojo predict and h2o predict...")
    pyunit_utils.compare_frames_local_onecolumn_NA(pred_h2o,
                                                   pred_mojo,
                                                   prob=1,
                                                   tol=1e-10)
    print("Comparing pojo predict and h2o predict...")
    pyunit_utils.compare_frames_local_onecolumn_NA(pred_mojo,
                                                   pred_pojo,
                                                   prob=1,
                                                   tol=1e-10)