Beispiel #1
0
def javapredict_cars():

    # optional parameters
    params = {
        'ntrees': 5000,
        'max_depth': 10,
        'min_rows': 1,
        'learn_rate': 0.1,
        'balance_classes': random.sample([True, False], 1)[0]
    }
    print "Parameter list:"
    for k, v in zip(params.keys(), params.values()):
        print "{0}, {1}".format(k, v)

    train = h2o.import_file(
        tests.locate("smalldata/junit/cars_nice_header.csv"))
    test = h2o.import_file(
        tests.locate("smalldata/junit/cars_nice_header.csv"))
    x = [
        "name", "economy", "displacement", "power", "weight", "acceleration",
        "year"
    ]
    y = "cylinders"

    tests.javapredict("gbm", "numeric", train, test, x, y, **params)
def javapredict_iris_drf():

    # optional parameters
    params = {'ntrees':100, 'max_depth':5, 'min_rows':10}
    print "Parameter list:"
    for k,v in zip(params.keys(), params.values()): print "{0}, {1}".format(k,v)

    train = h2o.import_file(tests.locate("smalldata/iris/iris_train.csv"))
    test = h2o.import_file(tests.locate("smalldata/iris/iris_train.csv"))
    x = ["sepal_len","sepal_wid","petal_len","petal_wid"]
    y = "species"

    tests.javapredict("random_forest", "class", train, test, x, y, **params)
def javapredict_smallcat():

    # optional parameters
    params = {'ntrees':100, 'max_depth':5, 'min_rows':10}
    print "Parameter list:"
    for k,v in zip(params.keys(), params.values()): print "{0}, {1}".format(k,v)

    train = h2o.upload_file(h2o.locate("smalldata/iris/setosa_versicolor.csv"))
    test = h2o.upload_file(h2o.locate("smalldata/iris/virginica.csv"))
    x = [0,1,2,4]
    y = 3

    tests.javapredict("random_forest", "numeric", train, test, x, y, **params)
def javapredict_smallcat():

    # optional parameters
    params = {'epochs':100}
    print "Parameter list:"
    for k,v in zip(params.keys(), params.values()): print "{0}, {1}".format(k,v)

    train = h2o.upload_file(tests.locate("smalldata/iris/setosa_versicolor.csv"))
    test = h2o.upload_file(tests.locate("smalldata/iris/virginica.csv"))
    x = [0,1,2,4]
    y = 3

    tests.javapredict("deeplearning", "numeric", train, test, x, y, **params)
def javapredict_cars():

    # optional parameters
    params = {'ntrees':5000, 'max_depth':10, 'min_rows':1, 'learn_rate':0.1, 'balance_classes':random.sample([True,False],1)[0]}
    print "Parameter list:"
    for k,v in zip(params.keys(), params.values()): print "{0}, {1}".format(k,v)

    train = h2o.import_file(h2o.locate("smalldata/junit/cars_nice_header.csv"))
    test = h2o.import_file(h2o.locate("smalldata/junit/cars_nice_header.csv"))
    x = ["name","economy", "displacement","power","weight","acceleration","year"]
    y = "cylinders"

    tests.javapredict("gbm", "numeric", train, test, x, y, **params)
Beispiel #6
0
def javapredict_2x100000():

    # optional parameters
    params = {"max_iterations":1, "solver":"L_BFGS"}
    print "Parameter list:"
    for k,v in zip(params.keys(), params.values()): print "{0}, {1}".format(k,v)

    train = h2o.import_file(tests.locate("smalldata/jira/2x100000_real.csv.gz"))
    test = train
    x = range(1,train.ncol)
    y = 0

    tests.javapredict("glm", "numeric", train, test, x, y, **params)
def javapredict_smallcat():

    # optional parameters
    params = {'ntrees': 100, 'max_depth': 5, 'min_rows': 10}
    print "Parameter list:"
    for k, v in zip(params.keys(), params.values()):
        print "{0}, {1}".format(k, v)

    train = h2o.upload_file(
        tests.locate("smalldata/iris/setosa_versicolor.csv"))
    test = h2o.upload_file(tests.locate("smalldata/iris/virginica.csv"))
    x = [0, 1, 2, 4]
    y = 3

    tests.javapredict("random_forest", "numeric", train, test, x, y, **params)
def javapredict_smallcat():

    # optional parameters
    params = {'epochs': 100}
    print "Parameter list:"
    for k, v in zip(params.keys(), params.values()):
        print "{0}, {1}".format(k, v)

    train = h2o.upload_file(
        tests.locate("smalldata/iris/setosa_versicolor.csv"))
    test = h2o.upload_file(tests.locate("smalldata/iris/virginica.csv"))
    x = [0, 1, 2, 4]
    y = 3

    tests.javapredict("deeplearning", "numeric", train, test, x, y, **params)
def javapredict_dl_xlarge():

    hdfs_name_node = tests.hadoop_namenode()
    hdfs_file_name = "/datasets/z_repro.csv"
    url = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_file_name)

    params = {'hidden':[3500, 3500], 'epochs':0.0001} # 436MB pojo
    print "Parameter list:"
    for k,v in zip(params.keys(), params.values()): print "{0}, {1}".format(k,v)

    train =  h2o.import_file(url)
    test = train[range(0,10),:]
    x = range(1,train.ncol)
    y = 0

    tests.javapredict("deeplearning", "numeric", train, test, x, y, **params)
def javapredict_drf_xlarge():

    hdfs_name_node = tests.hadoop_namenode()
    hdfs_file_name = "/datasets/z_repro.csv"
    url = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_file_name)

    params = {'ntrees':20, 'max_depth':35, 'min_rows':1} # 739MB pojo
    print "Parameter list:"
    for k,v in zip(params.keys(), params.values()): print "{0}, {1}".format(k,v)

    train =  h2o.import_file(url)
    test = train[range(0,10),:]
    x = range(1,train.ncol)
    y = 0

    tests.javapredict("random_forest", "numeric", train, test, x, y, **params)
def javapredict_gbm_xlarge():

    hdfs_name_node = tests.hadoop_namenode()
    hdfs_file_name = "/datasets/z_repro.csv.gz"
    url = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_file_name)

    params = {"ntrees": 22, "max_depth": 37, "min_rows": 1, "sample_rate": 0.1}  # 651MB pojo
    print "Parameter list:"
    for k, v in zip(params.keys(), params.values()):
        print "{0}, {1}".format(k, v)

    train = h2o.import_file(url)
    test = train[range(0, 10), :]
    x = range(1, train.ncol)
    y = 0

    tests.javapredict("gbm", "numeric", train, test, x, y, **params)