Ejemplo n.º 1
0
    def run_pipeline_test(self, model, dtest_path, libname_fmt,
                          expected_prob_path, expected_margin_path, multiclass,
                          use_annotation, use_quantize):
        dpath = os.path.abspath(os.path.join(os.getcwd(), 'tests/examples/'))
        dtest_path = os.path.join(dpath, dtest_path)
        libpath = libname(libname_fmt)
        X_test, _ = load_svmlight_file(dtest_path, zero_based=True)

        expected_prob_path = os.path.join(dpath, expected_prob_path)
        expected_margin_path = os.path.join(dpath, expected_margin_path)
        expected_prob = load_txt(expected_prob_path)
        expected_margin = load_txt(expected_margin_path)
        if multiclass:
            nrow = X_test.shape[0]
            expected_prob = expected_prob.reshape((nrow, -1))
            expected_margin = expected_margin.reshape((nrow, -1))
        params = {}
        if use_annotation is not None:
            params['annotate_in'] = use_annotation
        if use_quantize:
            params['quantize'] = 1

        for toolchain in os_compatible_toolchains():
            model.export_lib(toolchain=toolchain,
                             libpath=libpath,
                             params=params,
                             verbose=True)
            predictor = treelite.runtime.Predictor(libpath=libpath,
                                                   verbose=True)
            for i in range(X_test.shape[0]):
                x = X_test[i, :]
                # Scipy CSR matrix
                out_prob = predictor.predict_instance(x)
                out_margin = predictor.predict_instance(x, pred_margin=True)
                assert_almost_equal(out_prob, expected_prob[i])
                assert_almost_equal(out_margin, expected_margin[i])
                # NumPy 1D array with 0 as missing value
                x = x.toarray().flatten()
                out_prob = predictor.predict_instance(x, missing=0.0)
                out_margin = predictor.predict_instance(x,
                                                        missing=0.0,
                                                        pred_margin=True)
                assert_almost_equal(out_prob, expected_prob[i])
                assert_almost_equal(out_margin, expected_margin[i])
                # NumPy 1D array with np.nan as missing value
                np.place(x, x == 0.0, [np.nan])
                out_prob = predictor.predict_instance(x, missing=np.nan)
                out_margin = predictor.predict_instance(x,
                                                        missing=np.nan,
                                                        pred_margin=True)
                assert_almost_equal(out_prob, expected_prob[i])
                assert_almost_equal(out_margin, expected_margin[i])
                # NumPy 1D array with np.nan as missing value
                # (default when `missing` parameter is unspecified)
                out_prob = predictor.predict_instance(x)
                out_margin = predictor.predict_instance(x, pred_margin=True)
                assert_almost_equal(out_prob, expected_prob[i])
                assert_almost_equal(out_margin, expected_margin[i])
Ejemplo n.º 2
0
def ex1_multi():
    data = load_txt(os.path.join(ex1path, 'ex1data2.txt'))
    _X = data[:, :2]
    y = data[:, 2]
    m = y.size

    print 'First 10 examples from the dataset:'
    for i in range(10):
        print ' x = %s, y = %s' % (_X[i,:], y[i])
    print

    print 'Normalizing Features ...'
    X_norm, mu, sigma = feature_normalize(_X)

    X = np.hstack((np.ones((m, 1)), X_norm))

    print 'Running gradient descent ...'
    alpha = 1.
    num_iters = 400

    theta = np.zeros((3, 1))
    theta, J_history = gradient_descent(X, y, theta, alpha, num_iters)

    pl.figure()
    pl.plot(J_history, '-b')
    pl.xlabel('number of iterations')
    pl.ylabel('cost J')
    pl.show()

    print 'Theta computed from gradient descent:', theta

    a = (1650 - mu[0]) / sigma[0]
    b = (3 - mu[1]) / sigma[1]

    price = np.matrix([1, a, b]) * theta
    print 'Predicted price of a 1650 sq-ft, 3 br house',\
        '(using gradient descent):', price

    # Normal Equations
    data = load_txt(os.path.join(ex1path, 'ex1data2.txt'))
    _X = data[:, :2]
    y = data[:, 2]
    
    X = np.hstack((np.ones((m, 1)), _X))

    theta = normal_eqn(X, y)

    print 'Theta computed from the normal equations:', theta

    price = np.matrix('1 1650 3') * theta
    print 'Predicted price of a 1650 sq-ft, 3 br house',\
        '(using normal equations):', price
Ejemplo n.º 3
0
  def test_srcpkg(self):
    """Test feature to export a source tarball"""
    model_path = os.path.join(dpath, 'mushroom/mushroom.model')
    dmat_path = os.path.join(dpath, 'mushroom/agaricus.test')
    libpath = libname('./mushroom/mushroom{}')
    model = treelite.Model.load(model_path, model_format='xgboost')

    toolchain = os_compatible_toolchains()[0]
    model.export_srcpkg(platform=os_platform(), toolchain=toolchain,
                        pkgpath='./srcpkg.zip', libname=libpath,
                        params={}, verbose=True)
    with ZipFile('./srcpkg.zip', 'r') as zip_ref:
      zip_ref.extractall('.')
    subprocess.call(['make', '-C', 'mushroom'])

    predictor = treelite.runtime.Predictor(libpath='./mushroom', verbose=True)

    X, _ = load_svmlight_file(dmat_path, zero_based=True)
    dmat = treelite.DMatrix(X)
    batch = treelite.runtime.Batch.from_csr(dmat)

    expected_prob_path = os.path.join(dpath, 'mushroom/agaricus.test.prob')
    expected_prob = load_txt(expected_prob_path)
    out_prob = predictor.predict(batch)
    assert_almost_equal(out_prob, expected_prob)
Ejemplo n.º 4
0
def ex1():
    data = load_txt(os.path.join(ex1path, 'ex1data1.txt'))
    _X = data[:,0]
    y = data[:,1]
    m = _X.size

    # Ploting
    plot_data(_X, y)

    # Gradient descent
    X = np.hstack((np.ones((m, 1)), _X))
    theta = np.zeros((2, 1))

    iterations = 1500
    alpha = 0.01

    print compute_cost(X, y, theta)

    theta, J_history = gradient_descent(X, y, theta, alpha, iterations)

    print 'Theta found by gradient descent: ', theta
    pl.plot(X[:,1], X*theta, '-')

    pl.figure()
    pl.plot(J_history)
    pl.show()
Ejemplo n.º 5
0
def ex2():
    data = load_txt(os.path.join(ex2path, 'ex2data1.txt'))
    _X = data[:, :2]
    y = data[:, 2]

    plot_data(_X, y)
    pl.xlabel('Exam 1 score')
    pl.ylabel('Exam 2 score')
    pl.legend(('Admitted', 'Not admitted'))

    # compute cost and gradient
    m, n = _X.shape
    X = np.hstack((np.ones((m, 1)), _X))

    initial_theta = np.zeros(n + 1)

    cost, grad = cost_function(initial_theta, X, y)
    print 'cost at initial theta (zeros):', cost
    print 'gradient at initial theta (zeros):', grad

    #optimizing using gradient descent
    X_norm, mu, sigma = feature_normalize(_X)
    X = np.hstack((np.ones((m, 1)), X_norm))
    theta, Jhist = gd(cost_function, X, y, initial_theta, alpha=5, maxiter=200)
    print 'gd: theta:', theta, 'cost:', cost_function(theta, X, y)[0]

    pl.plot(Jhist)
    plot_decision_boundary(theta, X, y)

    #optimizing using scipy.optimize
    X = np.hstack((np.ones((m, 1)), _X))

    cache = {}

    def costf(theta):
        k = id(theta)
        if k not in cache:
            cache[k] = cost_function(theta, X, y)
        return cache[k][0]

    def difff(theta):
        k = id(theta)
        if k not in cache:
            cache[k] = cost_function(theta, X, y)
        return cache[k][1]

    print opt.check_grad(costf, difff, initial_theta)

    #TODO: fmin_cg does't work here, why? what's the difference?
    theta, allvec = opt.fmin_ncg(costf, initial_theta, difff, retall=1)
    print 'fmin_cg: theta:', theta, 'cost:', cost_function(theta, X, y)[0]

    Jhist = [costf(t) for t in allvec]
    pl.plot(Jhist)
    plot_decision_boundary(theta, X, y)
Ejemplo n.º 6
0
def ex2_reg():
    data = load_txt(os.path.join(ex2path, 'ex2data2.txt'))
    _X = data[:, :2]
    y = data[:, 2]

    #    plot_data(_X, y)
    #    pl.xlabel('Microchip Test 1')
    #    pl.ylabel('Microchip Test 2')
    #    pl.legend(['y=1', 'y=0'])

    X = map_feature(_X[:, 0], _X[:, 1])

    initial_theta = np.zeros(X.shape[1])

    lambda_ = 0
    print 'cost at initial theta (zeros):', cost_function(
        initial_theta, X, y, lambda_)[0]

    # Regularization
    #X_norm, mu, sigma = feature_normalize(X[:,1:])
    #X = np.hstack((np.ones((m, 1)), X_norm))

    def costf(theta):
        return logistic_cost_function(theta, X, y, lambda_)

    def difff(theta):
        return logistic_grad_function(theta, X, y, lambda_)

    maxiter = 50
    theta, allvec = opt.fmin_ncg(costf,
                                 initial_theta,
                                 difff,
                                 retall=1,
                                 maxiter=maxiter,
                                 callback=step())
    #    theta, allvec = opt.fmin_bfgs(costf, initial_theta, difff, retall=1, maxiter=maxiter, callback=step())
    print 'optimal cost:', costf(theta)

    Jhist = [costf(t) for t in allvec]
    pl.figure()
    pl.plot(Jhist)
    plot_decision_boundary(theta, X, y)

    # Compute accuracy on our training set
    h = np.dot(X, theta)
    print 'Train Accuracy:', ((h > 0) == y).mean() * 100