Esempio n. 1
0
def main():
    if len(sys.argv) < 2:
        print("Usage:\n\t{} [housing-data]".format(sys.argv[0]))
        sys.exit(1)

    dataset = reader.read(sys.argv[1], delim=' ')

    # Exapand features with nonlinear functions
    # Need to put them back together to handle 
    features, labels = util.fldivide(dataset)
    features, scale = scaling.unit_scale(features)
    features = util.basis_expand(features, lambda x: x ** 2, lambda x: x ** 3)
    features = np.hstack([features, np.ones((len(features), 1))])
    dataset = util.fljoin(features, labels)
    
    reg = NormalEquationLinearRegressor(regularization=1e-8)
    cv  = CrossValidator(reg)

    feat_indices, feat_errors = cv.best_3features_topN(dataset, n=5)
    for indices, err in zip(feat_indices, feat_errors):
        bestfeats = np.dstack([features[:, i] for i in indices]).squeeze()
        data = util.fljoin(bestfeats, labels)
        reg.train(data)
        print(reg.w)
        print("indices = {}, err = {}".format(indices, err))
Esempio n. 2
0
def main():
    if len(sys.argv) < 2:
        print("Usage:\n\t{} [trainfile] [testfile]".format(sys.argv[0]))
        sys.exit(1)
    
    train_file, test_file = sys.argv[1:]
    train_data, train_labels = util.fldivide(read(train_file))
    test_data, test_labels   = util.fldivide(read(test_file))
    
    for i in range(5):
        nth_train_data = util.make_nth_order(train_data, i)
        nth_train = np.hstack((nth_train_data, train_labels.reshape((len(train_labels), 1))))
        nth_test_data  = util.make_nth_order(test_data, i)

        model = GradientDescentLinearRegressor(learn_rate=0.4, regularization=1e1)
        model.train(nth_train)
        predicted = model.predict(nth_test_data)

        mse = model.error(predicted, test_labels)

        plot_scatter_curve(test_data, test_labels, model.w, fignum=i,
                title="Gradient Descent, order {}, alpha={}, lambda={}, mse={}".format(i, model.learn_rate, model.l, mse))

    plt.show()