def best_degree(x, y, phi, dmax, folds): for d in range(0, dmax+1): if d == 0: X = numpy.matrix(phi(0, x)) else: X = numpy.vstack((X, phi(d, x))) yield cross_validation(X, y, folds), d
def main(datadir, dmax, folds, plot_precision): colors = pyplot.rcParams["axes.color_cycle"] # Input print("-> Reading data in folder \"{}\"".format(datadir)) x, y = read_data(datadir) # Risk print("-> Computing the average risk for polynomials of degrees between 0 and {},".format(dmax)) print(" using cross validation with {} folds :".format(folds)) risks = list(best_degree(x, y, phi, dmax, folds)) best_risk, best_d = min(risks) risks, degrees = zip(*risks) fig = pyplot.figure() pyplot.yscale('log') pyplot.plot(degrees, risks, zorder=2) pyplot.plot([best_d], [best_risk], 'o', zorder=1) pyplot.xlabel('degree') pyplot.ylabel('average risk') pyplot.show() print(" -> The lowest risk ({:.2f}) is obtained with degree {}.".format(best_risk, best_d)) print("-> Computing the best function of degree {} predicting the value of y.".format(best_d)) X = numpy.matrix([phi(d,x) for d in range(0, best_d+1)]) theta = least_square(X, y) print(" -> The best function is (approximately) : {}".format(f_str(theta))) pyplot.plot(x, y, 'o', color=colors[1], zorder=2) plot.plot_function(f(theta), plot_precision, color=colors[0], zorder=1) pyplot.show()