Esempio n. 1
0
    return np.sum(np.square(x0 - x1))

# Generate the training and validation sets
a = 0
b = 1
N = 20
k = N/4
std = 0.05
x_train = np.linspace(a, b, N) + np.random.normal(0, std, N)
y_train = f(x_train) + np.random.normal(0, std, N)
x_valid = np.linspace(a, b, k) + np.random.normal(0, std, k)
y_valid = f(x_valid) + np.random.normal(0, std, k)


max_degree = 8
ws = [polyfit(x_train, y_train, n) for n in range(1, max_degree)]

x = np.linspace(a-std,b+std,100)

# Create anonymous functions for each polynomial fit
y_fs = [lambda t, w=w: sum([w[i]*t**i for i in range(len(w))]) for w in ws]

training_errors = [L2(y(x_train),y_train) for y in y_fs]
valid_errors = [L2(y(x_valid),y_valid) for y in y_fs]


# Plot the polynomial fit to the training data
fig, ax = plot.subplots(frameon=True)
ax.set_axis_bgcolor((1,1,1))
ax.grid(which='both',color='gray')
for i in ax.spines:
Esempio n. 2
0
# Remove rows whose average is NaN
df = df[pd.notnull(df['average'])]

# Compute the period of the oscillations in the data
# Note that some data has been removed because it was missing
a = df.average
# http://stackoverflow.com/a/4625132
minima = np.r_[True, a[1:] < a[:-1]] & np.r_[a[:-1] < a[1:], True]
minima_index = [i for i,j in enumerate(minima) if j]
period = np.mean([j-i for i,j in zip(minima_index,minima_index[1:])])

# Compute the polynomial coefficients
x = df['decimal_date']
y = df['average']
N = len(df.index)
ws = [polyfit(x, y, n) for n in range(1,4)]
y_fs = [lambda t, w=w: sum([w[i]*t**i for i in range(len(w))]) for w in ws]

fig, ax = plot.subplots(frameon=True)
ax.set_axis_bgcolor((1,1,1))
ax.grid(which='both',color='gray')
for i in ax.spines:
    ax.spines[i].set_color('k')
ax.set_title('Polynomial Fits to Average Monthly\nMeasurements of CO2 Concentration', fontsize=18)
#ax.set_title('Mean Monthly\nMeasurements of CO2 Concentration', fontsize=18)
ax.set_xlabel('Date')
ax.set_ylabel('Average Measurement of CO2 (ppm)')

plot.plot(df['decimal_date'], df['average'], label='Original Data')
for i,y in enumerate(y_fs, 1):
    plot.plot(df['decimal_date'], y(df['decimal_date']), label='Degree=%d'%i)