def main(): if len(sys.argv) < 2: print("Usage:\n\t{} [housing-data]".format(sys.argv[0])) sys.exit(1) dataset = reader.read(sys.argv[1], delim=' ') # Exapand features with nonlinear functions # Need to put them back together to handle features, labels = util.fldivide(dataset) features, scale = scaling.unit_scale(features) features = util.basis_expand(features, lambda x: x ** 2, lambda x: x ** 3) features = np.hstack([features, np.ones((len(features), 1))]) dataset = util.fljoin(features, labels) reg = NormalEquationLinearRegressor(regularization=1e-8) cv = CrossValidator(reg) feat_indices, feat_errors = cv.best_3features_topN(dataset, n=5) for indices, err in zip(feat_indices, feat_errors): bestfeats = np.dstack([features[:, i] for i in indices]).squeeze() data = util.fljoin(bestfeats, labels) reg.train(data) print(reg.w) print("indices = {}, err = {}".format(indices, err))
def main(): if len(sys.argv) < 2: print("Usage:\n\t{} [trainfile] [testfile]".format(sys.argv[0])) sys.exit(1) train_file, test_file = sys.argv[1:] train_data, train_labels = util.fldivide(read(train_file)) test_data, test_labels = util.fldivide(read(test_file)) for i in range(5): nth_train_data = util.make_nth_order(train_data, i) nth_train = np.hstack((nth_train_data, train_labels.reshape((len(train_labels), 1)))) nth_test_data = util.make_nth_order(test_data, i) model = GradientDescentLinearRegressor(learn_rate=0.4, regularization=1e1) model.train(nth_train) predicted = model.predict(nth_test_data) mse = model.error(predicted, test_labels) plot_scatter_curve(test_data, test_labels, model.w, fignum=i, title="Gradient Descent, order {}, alpha={}, lambda={}, mse={}".format(i, model.learn_rate, model.l, mse)) plt.show()