Example #1
0
def compare_accuracy(x, y, n_estimators=11, bootstrap=True, slop=0.98, n_repeat=10):
    n = x.shape[0] / 2
    xtrain = x[:n]
    ytrain = y[:n]
    xtest = x[n:]
    ytest = y[n:]
    cudarf = RandomForestClassifier(n_estimators=n_estimators, bootstrap=bootstrap)
    import sklearn.ensemble

    skrf = sklearn.ensemble.RandomForestClassifier(n_estimators=n_estimators, bootstrap=bootstrap)
    cuda_score_total = 0
    sk_score_total = 0
    for i in xrange(n_repeat):
        cudarf.fit(xtrain, ytrain)
        skrf.fit(xtrain, ytrain)
        sk_score = skrf.score(xtest, ytest)
        cuda_score = cudarf.score(xtest, ytest)
        print "Iteration", i
        print "Sklearn score", sk_score
        print "CudaTree score", cuda_score
        sk_score_total += sk_score
        cuda_score_total += cuda_score

    assert cuda_score_total >= (sk_score_total * slop), (
        "Getting significantly worse test accuracy than sklearn: %s vs. %s"
        % (cuda_score_total / n_repeat, sk_score_total / n_repeat)
    )
Example #2
0
def compare_accuracy(x,
                     y,
                     n_estimators=11,
                     bootstrap=True,
                     slop=0.98,
                     n_repeat=10):
    n = x.shape[0] / 2
    xtrain = x[:n]
    ytrain = y[:n]
    xtest = x[n:]
    ytest = y[n:]
    cudarf = RandomForestClassifier(n_estimators=n_estimators,
                                    bootstrap=bootstrap)
    import sklearn.ensemble
    skrf = sklearn.ensemble.RandomForestClassifier(n_estimators=n_estimators,
                                                   bootstrap=bootstrap)
    cuda_score_total = 0
    sk_score_total = 0
    for i in xrange(n_repeat):
        cudarf.fit(xtrain, ytrain)
        skrf.fit(xtrain, ytrain)
        sk_score = skrf.score(xtest, ytest)
        cuda_score = cudarf.score(xtest, ytest)
        print "Iteration", i
        print "Sklearn score", sk_score
        print "CudaTree score", cuda_score
        sk_score_total += sk_score
        cuda_score_total += cuda_score

    assert cuda_score_total >= (sk_score_total * slop), \
      "Getting significantly worse test accuracy than sklearn: %s vs. %s"\
      % (cuda_score_total / n_repeat, sk_score_total / n_repeat)
Example #3
0
def test_digits_memorize():
  with timer("Cuda treelearn"):
    forest = RandomForestClassifier(n_estimators = n_estimators/2, bootstrap = False)
    forest.fit(x, y)
  with timer("Predict"):
    diff, total = util.test_diff(forest.predict(x), y)  
    print "%s (Wrong) / %s (Total). The error rate is %f." % (diff, total, diff/float(total))
  assert diff == 0, "Didn't memorize, got %d wrong" % diff 
Example #4
0
def test_covtype_memorize():
  with timer("Cuda treelearn"):
    forest = RandomForestClassifier(bootstrap = False)
    forest.fit(x, y, bfs_threshold = 500000)
  with timer("Predict"):
    diff, total = util.test_diff(forest.predict(x), y)  
    print "%s(Wrong)/%s(Total). The error rate is %f." % (diff, total, diff/float(total))
  assert diff == 0, "Didn't perfectly memorize, got %d wrong" % diff
Example #5
0
def test_covtype_memorize():
  with timer("Cuda treelearn"):
    forest = RandomForestClassifier(bootstrap = False)
    forest.fit(x, y, bfs_threshold = 500000)
  with timer("Predict"):
    diff, total = util.test_diff(forest.predict(x), y)  
    print("%s(Wrong)/%s(Total). The error rate is %f." % (diff, total, diff/float(total)))
  assert diff == 0, "Didn't perfectly memorize, got %d wrong" % diff
Example #6
0
def benchmark_cuda(dataset, bfs_threshold=None):
    x_train, y_train = load_data(dataset)
    #Just use this forest to compile the code.
    throw_away = RandomForestClassifier(n_estimators=1,
                                        bootstrap=bootstrap,
                                        verbose=False,
                                        max_features=None,
                                        debug=debug)
    throw_away.fit(x_train, y_train, bfs_threshold=bfs_threshold)

    with timer("%s benchmark cuda (bfs_threshold = %s)" %
               (dataset, bfs_threshold)):
        forest = RandomForestClassifier(n_estimators=n_estimators,
                                        bootstrap=bootstrap,
                                        verbose=verbose,
                                        max_features=None,
                                        debug=debug)
        forest.fit(x_train, y_train, bfs_threshold=bfs_threshold)
    forest = None
Example #7
0
def benchmark_cuda(dataset, bfs_threshold = None):
  x_train, y_train = load_data(dataset)
  #Just use this forest to compile the code.
  throw_away = RandomForestClassifier(n_estimators = 1, bootstrap = bootstrap, verbose = False, 
        max_features = None, debug = debug)
  throw_away.fit(x_train, y_train, bfs_threshold = bfs_threshold)

  with timer("%s benchmark cuda (bfs_threshold = %s)" % (dataset, bfs_threshold)): 
    forest = RandomForestClassifier(n_estimators = n_estimators, bootstrap = bootstrap, verbose = verbose, 
        max_features = None, debug = debug)
    forest.fit(x_train, y_train, bfs_threshold = bfs_threshold)
  forest = None