Example #1
0
def problem_1b_gauss_newton_ill():
    n_observations, n_variables = problem1_variables()

    X = generate_ill_conditioned(n_observations, n_variables)
    beta = generate_sample_beta(X.shape[1])
    g, grad = generate_least_square(X, beta)

    x0 = np.zeros([beta.shape[0], 1])

    estimated_beta, history = gauss_newton(g, grad, x0, beta, method = 'diminishing')
    np.savetxt('problem_1b_gauss_newton_ill', history)
Example #2
0
def problem_1c_gauss_newton():
    n_observations, n_variables = problem1_variables()

    X = generate_ill_conditioned(n_observations, n_variables)
    beta = generate_sample_beta(X.shape[1])

    for penalty in [0.1, 1.0, 10.0, 100.0]:
        g, grad = generate_least_square_ridge(X, beta, penalty)

        x0 = np.zeros([beta.shape[0], 1])

        estimated_beta, history = gauss_newton(g, grad, x0, beta, method = 'diminishing')
        np.savetxt('problem_1c_gauss_newton' + '_%.1f' % penalty, history)
def plot_zipf_transformed_param_contours(empirical_freqs):
    n_theta = 200
    n_radii = 100
    theta_plot = np.linspace(0, 2*np.pi, n_theta)
    r_plot = np.linspace(0, 3, n_radii)
    R_mesh, Theta_mesh = np.meshgrid(r_plot, theta_plot)

    ranks = np.arange(1, empirical_freqs.size + 1)
    design_mat = np.stack([np.ones_like(ranks), np.log(ranks)], axis=1)
    A = design_mat.T @ design_mat
    eig_vals, eig_vecs = np.linalg.eigh(A)
    major_axis = eig_vecs[:,0]
    angle = np.arctan2(major_axis[1], major_axis[0])
    rot_mat = np.asarray([[np.cos(angle), -np.sin(angle)], [np.sin(angle), np.cos(angle)]])
    a, b = 1/np.sqrt(eig_vals[0]), 1/np.sqrt(eig_vals[1])

    y = np.log(empirical_freqs)
    r = lambda x: design_mat @ x - y
    center = least_squares(r, x0=np.zeros(2)).x.reshape((-1,1))
    scaled_x = (a * R_mesh * np.cos(Theta_mesh)).reshape((-1, 1))
    scaled_y = (b * R_mesh * np.sin(Theta_mesh)).reshape((-1, 1))
    plot_grid = rot_mat @ np.hstack([scaled_x, scaled_y]).T + center

    Z = np.empty(R_mesh.size)
    for i in range(n_theta * n_radii):
        residual = r(plot_grid[:,i])
        mse = np.sqrt(np.mean(np.square(residual)))
        Z[i] = mse

    X = plot_grid[0].reshape(R_mesh.shape)
    Y = plot_grid[1].reshape(R_mesh.shape)
    Z = Z.reshape(R_mesh.shape)

    fig = plt.figure(figsize=(10, 10))
    plt.contour(X, Y, Z, cmap="hot", levels=20)
    plt.title(r"$\sum_{i=1}^N (\log\,f_i - \log\, K - \alpha\, \log\, r_i)^2$")
    plt.xlabel(r"$\log\,K$", fontsize=14)
    plt.ylabel(r"$\alpha$", fontsize=14)
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(pjoin(("..", "..", "images", "non-linear-least-squares", "shakespeare-zipf-transformed-param-contours.png"))

def plot_transformed_scatter_points(freqs):
    ranks = np.arange(1, 1 + freqs.size)
    xs = np.log(ranks)
    ys = np.log(freqs)

    plt.figure(figsize=(10,10))
    plt.scatter(xs, ys)
    plt.title(r"$\log(freq)$ vs. $\log(rank)$", fontsize=20)
    plt.xlabel(r"$\log(rank)$", fontsize=18)
    plt.ylabel(r"$\log(freq)$", fontsize=18)
    plt.grid(b=True, which="major", linestyle='-')
    plt.minorticks_on()
    plt.grid(b=True, which="minor", linestyle='--')    
    plt.tight_layout()
    plt.savefig(pjoin(("..", "..", "images", "non-linear-least-squares", "shakespeare-zipf-transformed-param-scatter.png"))

def plot_zipf_fit(empirical_freqs):
    K_ols, alpha_ols, _ = fit_zipf_ols(empirical_freqs)
    K_nlls, alpha_nlls, _ = fit_zipf_nlls(empirical_freqs)
    xs = np.asarray([i for i in range(1, empirical_freqs.size + 1)], dtype=np.float)

    plt.figure(figsize=(10,10))
    plt.title("Word Frequency vs. Rank", fontsize=14)
    plt.xlabel("Rank", fontsize=14)
    plt.ylabel("Word Frequency", fontsize=14)
    plt.grid(True)
    
    plt.scatter(xs, empirical_freqs, alpha=0.9)
    plt.plot(xs, K_ols * xs ** alpha_ols, c="tab:orange", linewidth=2, label=rf"$f_{{ols}}(x) = {K_ols:.2}x^{{{alpha_ols:.2}}}$")
    plt.plot(xs, K_nlls * xs ** alpha_nlls, c="tab:green", linewidth=2, label=rf"$f_{{nlls}}(x) = {K_nlls:.2}x^{{{alpha_nlls:.2}}}$")

    plt.ylim([0, 0.04])
    plt.legend()
    plt.grid(b=True, which="major", linestyle='-')
    plt.minorticks_on()
    plt.grid(b=True, which="minor", linestyle='--')
    plt.tight_layout()
    plt.savefig(pjoin("..", "..", "images", "non-linear-least-squares", "shakespeare-zipf-fit.png"))

    plt.figure(figsize=(10,10))
    plt.title(r"$log(freq)$ vs. $log(rank)$", fontsize=14)
    plt.xlabel(r"$log(freq)$ ", fontsize=14)
    plt.ylabel(r"$log(rank)$", fontsize=14)
    plt.grid(True)

    plt.scatter(np.log(xs), np.log(empirical_freqs), alpha=0.9)
    plt.plot(np.log(xs), np.log(K_ols * xs ** alpha_ols), c="tab:orange", linewidth=2, label=rf"$\log(f_{{ols}}(x))$")
    plt.plot(np.log(xs), np.log(K_nlls * xs ** alpha_nlls), c="tab:green", linewidth=2, label=rf"$\log(f_{{nlls}}(x))$")

    plt.legend()
    plt.grid(b=True, which="major", linestyle='-')
    plt.minorticks_on()
    plt.grid(b=True, which="minor", linestyle='--')
    plt.tight_layout()
    plt.savefig(pjoin("..", "..", "images", "non-linear-least-squares", "shakespeare-zipf-fit-loglog.png"))

def plot_direction_arrows(x_coords, y_coords, min_segment_length=0.05, c="b"):
    for i in range(1, x_coords.size):
        dx = x_coords[i] - x_coords[i-1]
        dy = y_coords[i] - y_coords[i-1]
        if np.sqrt(dx ** 2 + dy**2) > min_segment_length:
            x_start = x_coords[i-1] + dx / 2
            y_start = y_coords[i-1] + dy / 2
            plt.arrow(x_start, y_start, 0.1 * dx, 0.1 * dy, shape="full", lw=0, length_includes_head=True, head_width=.03, color=c)

def plot_gauss_newton_convergence(empirical_freqs):
    N_iter = 100
    fig = plt.figure(figsize=(10, 10))
    colors = plt.rcParams["axes.prop_cycle"].by_key()["color"]
    for x0, c in zip([[1,1], [-1,1], [-1,-1],[1,-1]], colors):
        x0 = np.asfarray(x0) / 1.5
        ranks = np.arange(1, empirical_freqs.size + 1)
        zipf = lambda K, alpha: K * ranks ** alpha
        iterates, costs = gauss_newton(
            f = lambda x: empirical_freqs - zipf(x[0], x[1]),
            x0=x0,
            J = lambda x: np.stack(
                [
                    -ranks ** x[1],
                    -np.log(ranks) * zipf(x[0], x[1]),
                ], axis=1),
            max_iter=N_iter,
        )
        x_coords = np.asarray([elem[0] for elem in iterates])
        y_coords = np.asarray([elem[1] for elem in iterates])
        plt.plot(x_coords, y_coords, 'o-', alpha=0.7)
        plot_direction_arrows(x_coords, y_coords, c=c, min_segment_length=0.05)

    k_lower = -0.75
    k_upper = -k_lower
    alpha_lower = -1.0
    alpha_upper = -alpha_lower
    K_plot = np.linspace(k_lower, k_upper, 200)
    alpha_plot = np.linspace(alpha_lower, alpha_upper, 200)

    K_mesh, Alpha_mesh = np.meshgrid(K_plot, alpha_plot)
    ranks = np.arange(1, empirical_freqs.size + 1)
    Z = np.empty_like(K_mesh)
    for i, alpha in enumerate(alpha_plot):
        for j, c in enumerate(K_plot):
            yhat = c * ranks ** alpha
            residual = empirical_freqs - yhat
            mse = np.sqrt(np.mean(np.square(residual)))
            Z[i, j] = mse
    lvls = np.asarray([0.00 + 0.03 * i for i in range(1,20)] + [-.3 + 1.3 ** i for i in range(20)])
    plt.contour(K_mesh, Alpha_mesh, Z, cmap="hot", levels=lvls)
    plt.title(r"$\sum_{i=1}^N (f_i - K r_i^{\alpha_i})^2$")
    plt.xlabel(r"$K$", fontsize=14)
    plt.ylabel(r"$\alpha$", fontsize=14)
    plt.grid(True)
    plt.xlim([k_lower, k_upper])
    plt.ylim([alpha_lower, alpha_upper])
    plt.tight_layout()
    plt.savefig(pjoin("..", "..", "images", "non-linear-least-squares", "shakespeare-gauss-newton-fit.png"))

def plot_levenberg_marquardt_convergence(empirical_freqs):
    N_iter = 100
    fig = plt.figure(figsize=(10, 10))
    colors = plt.rcParams["axes.prop_cycle"].by_key()["color"]
    for x0, c in zip([[1,1], [-1,1], [-1,-1],[1,-1]], colors):
        x0 = np.asfarray(x0) / 1.5
        ranks = np.arange(1, empirical_freqs.size + 1)
        zipf = lambda K, alpha: K * ranks ** alpha
        iterates, _ = levenberg_marquardt(
            f = lambda x: empirical_freqs - zipf(x[0], x[1]),
            x0=x0,
            J = lambda x: np.stack(
                [
                    -ranks ** x[1],
                    -np.log(ranks) * zipf(x[0], x[1]),
                ], axis=1),
                max_iter=N_iter,
        )
        x_coords = np.asarray([elem[0] for elem in iterates])
        y_coords = np.asarray([elem[1] for elem in iterates])
        plt.plot(x_coords, y_coords, 'o-', alpha=0.7)
        print(iterates[-1])
        plot_direction_arrows(x_coords, y_coords, c=c, min_segment_length=0.05)

    k_lower = -1.0
    k_upper = -k_lower
    alpha_lower = -1.0
    alpha_upper = -alpha_lower
    K_plot = np.linspace(k_lower, k_upper, 200)
    alpha_plot = np.linspace(alpha_lower, alpha_upper, 200)

    K_mesh, Alpha_mesh = np.meshgrid(K_plot, alpha_plot)
    ranks = np.arange(1, empirical_freqs.size + 1)
    Z = np.empty_like(K_mesh)
    for i, alpha in enumerate(alpha_plot):
        for j, c in enumerate(K_plot):
            yhat = c * ranks ** alpha
            residual = empirical_freqs - yhat
            mse = np.sqrt(np.mean(np.square(residual)))
            Z[i, j] = mse
    lvls = np.asarray([0.00 + 0.03 * i for i in range(1,20)] + [-.3 + 1.3 ** i for i in range(20)])
    plt.contour(K_mesh, Alpha_mesh, Z, cmap="hot", levels=lvls)
    plt.title(r"$\sum_{i=1}^N (f_i - K r_i^{\alpha_i})^2$")
    plt.xlabel(r"$K$", fontsize=14)
    plt.ylabel(r"$\alpha$", fontsize=14)
    plt.grid(True)
    plt.xlim([k_lower, k_upper])
    plt.ylim([alpha_lower, alpha_upper])
    plt.tight_layout()
    plt.savefig(pjoin("..", "..", "images", "non-linear-least-squares", "shakespeare-levenberg-marquardt-fit.png"))


if __name__ == "__main__":
    N = 100
    most_freq_words, freqs = word_freqs(N)
    # plot_scatter_points(freqs)
    # plot_zipf_param_surface(freqs)
    # plot_zipf_param_contours(freqs)
    # plot_transformed_scatter_points(freqs)
    # plot_zipf_transformed_param_contours(freqs)
    # plot_zipf_transformed_param_surface(freqs)
    plot_zipf_fit(freqs)
    # plot_gauss_newton_convergence(freqs)
    # plot_levenberg_marquardt_convergence(freqs)