Beispiel #1
0
def _test_mpca(display=False):
    if display:
        print()
        print("-" * 70)
        print("Begin tests for 'mpca'")

    GENERATE_APPROXIMATIONS = True
    BIG_PLOTS = False
    SHOW_2D_POINTS = False

    import random
    # Generate some points for testing.
    np.random.seed(4)  # 4
    random.seed(0)  # 0
    rgen = np.random.RandomState(1)  # 10
    n = 100
    points = (rgen.rand(n, 2) - .5) * 2
    # points *= np.array([.5, 1.])

    # Create some testing functions (for learning different behaviors)
    funcs = [
        lambda x: x[1],  # Linear on y
        lambda x: abs(x[0] + x[1]),  # "V" function on 1:1 diagonal
        lambda x: abs(2 * x[0] + x[1]),  # "V" function on 2:1 diagonal
        lambda x: x[0]**2,  # Quadratic on x
        lambda x: (x[0] + x[1])**2,  # Quadratic on 1:1 diagonal
        lambda x: (2 * x[0] + x[1])**3,  # Cubic on 2:1 diagonal
        lambda x: (x[0]**3),  # Cubic on x
        lambda x: rgen.rand(),  # Random function
    ]
    # Calculate the response values associated with each function.
    responses = np.vstack(tuple(tuple(map(f, points)) for f in funcs)).T

    # Reduce to just the first function
    choice = 3
    func = funcs[choice]
    response = responses[:, choice]

    # Run the princinple response analysis function.
    components, values = mpca(points, response)
    values /= np.sum(values)
    conditioner = np.matmul(components, np.diag(values))

    if display:
        print()
        print("Components")
        print(components)
        print()
        print("Values")
        print(values)
        print()
        print("Conditioner")
        print(conditioner)
        print()

    components = np.array([[1.0, 0.], [0., 1.]])
    values = normalize_error(np.matmul(points, components.T), response,
                             abs_diff)
    values /= np.sum(values)
    if display:
        print()
        print()
        print("True Components")
        print(components)
        print()
        print("True Values")
        print(values)
        print()

    # Generate a plot of the response surfaces.
    from util.plot import Plot, multiplot
    if display: print("Generating plots of source function..")

    # Add function 1
    p1 = Plot()
    p1.add("Points", *(points.T), response, opacity=.8)
    p1.add_func("Surface", func, [-1, 1], [-1, 1], plot_points=100)

    if GENERATE_APPROXIMATIONS:
        from util.approximate import NearestNeighbor, Delaunay, condition
        p = Plot()
        # Add the source points and a Delaunay fit.
        p.add("Points", *(points.T), response, opacity=.8)
        p.add_func("Truth", func, [-1, 1], [-1, 1])
        # Add an unconditioned nearest neighbor fit.
        model = NearestNeighbor()
        model.fit(points, response)
        p.add_func("Unconditioned Approximation",
                   model, [-1, 1], [-1, 1],
                   mode="markers",
                   opacity=.8)
        # Generate a conditioned approximation
        model = condition(NearestNeighbor, method="MPCA")()
        model.fit(points, response)
        p.add_func("Best Approximation",
                   model, [-1, 1], [-1, 1],
                   mode="markers",
                   opacity=.8)

        if display: p.plot(show=False, height=400, width=650)

    if display: print("Generating metric principle components..")

    # Return the between vectors and the differences between those points.
    def between(x, y, unique=True):
        vecs = []
        diffs = []
        for i1 in range(x.shape[0]):
            start = i1 + 1 if unique else 0
            for i2 in range(start, x.shape[0]):
                if (i1 == i2): continue
                vecs.append(x[i2] - x[i1])
                diffs.append(y[i2] - y[i1])
        return np.array(vecs), np.array(diffs)

    # Plot the between slopes to verify they are working.
    # Calculate the between slopes
    vecs, diffs = between(points, response)
    vec_lengths = np.sqrt(np.sum(vecs**2, axis=1))
    between_slopes = diffs / vec_lengths
    bs = ((vecs.T / vec_lengths) * between_slopes).T
    # Extrac a random subset for display
    size = 100
    random_subset = np.arange(len(bs))
    rgen.shuffle(random_subset)
    bs = bs[random_subset[:size], :]
    # Normalize the between slopes so they fit on the plot
    max_bs_len = np.max(np.sqrt(np.sum(bs**2, axis=1)))
    bs /= max_bs_len
    # Get a random subset of the between slopes and plot them.
    p2 = Plot("", "Metric PCA on Z", "")
    p2.add("Between Slopes", *(bs.T), color=p2.color(4, alpha=.4))

    if SHOW_2D_POINTS:
        # Add the points and transformed points for demonstration.
        new_pts = np.matmul(np.matmul(conditioner, points),
                            np.linalg.inv(components))
        p2.add("Original Points", *(points.T))
        p2.add("Transformed Points", *(new_pts.T), color=p2.color(6, alpha=.7))

    # Add the principle response components
    for i, (vec, m) in enumerate(zip(components, values)):
        vec = vec * m
        p2.add(f"PC {i+1}", [0, vec[0]], [0, vec[1]], mode="lines")
        ax, ay = (vec / sum(vec**2)**.5) * 3
        p2.add_annotation(f"{m:.2f}", vec[0], vec[1])

    p3 = Plot("", "PCA on X", "")
    p3.add("Points", *(points.T), color=p3.color(4, alpha=.4))

    # Add the normal principle components
    components, values = pca(points)
    values /= np.sum(values)
    for i, (vec, m) in enumerate(zip(components, values)):
        vec = vec * m
        p3.add(f"PC {i+1}", [0, vec[0]], [0, vec[1]], mode="lines")
        ax, ay = (vec / sum(vec**2)**.5) * 3
        p3.add_annotation(f"{m:.2f}", vec[0], vec[1])

    if BIG_PLOTS:
        if display: p1.plot(file_name="source_func.html", show=False)
        if display: p2.plot(append=True, x_range=[-8, 8], y_range=[-5, 5])
    else:
        # Make the plots (with manual ranges)
        p1 = p1.plot(html=False, show_legend=False)
        p2 = p2.plot(html=False,
                     x_range=[-1, 1],
                     y_range=[-1, 1],
                     show_legend=False)
        p3 = p3.plot(html=False,
                     x_range=[-1, 1],
                     y_range=[-1, 1],
                     show_legend=False)
        # Generate the multiplot of the two side-by-side figures
        if display: multiplot([p1, p2, p3], height=126, width=650, append=True)

    if display: print("-" * 70)
Beispiel #2
0
                        start_end = (-.5, 1.5)
                        p.add_histogram(plot_name,
                                        d[0, "Errors"],
                                        show_in_legend=show,
                                        num_bins=num_bins,
                                        barmode="",
                                        color=color,
                                        marker_line_width=10,
                                        marker_line_color="rgb(0,0,0)",
                                        start_end=start_end,
                                        padding=0.0)
            # p.show(append=True)

legend_settings = dict(
    xanchor="center",
    yanchor="top",
    x=.5,
    y=1.25,
    bordercolor="#DDD",
    borderwidth=1,
    font=dict(size=15),
    orientation="h",
)

plots = [p.plot(legend=legend_settings, show=False) for p in plots]
multiplot(plots,
          width=700,
          height=150,
          gap=.06,
          file_name="errors_histogram.html")
Beispiel #3
0
    p2.add_annotation(f"{m:.2f}", vec[0], vec[1], font_family="times")


p3 = Plot("", "PCA on X", "", font_family="times")
p3.add("Points", *(points.T), color=p3.color(4, alpha=.4))

# Add the normal principle components
components, values = pca(points)
for i,(vec,m) in enumerate(zip(components, values)):
    vec = vec * m
    p3.add(f"PC {i+1}", [0,vec[0]], [0,vec[1]], mode="lines")
    ax, ay = (vec / sum(vec**2)**.5) * 3
    p3.add_annotation(f"{m:.2f}", vec[0], vec[1], font_family="times")


if BIG_PLOTS:
    p1.plot(file_name="source_func.html", show=False)
    p2.plot(append=True, x_range=[-8,8], y_range=[-5,5])
else:
    # # Adjust the settings for display appearance
    p1.font_family = "times"
    p2.font_family = "times"
    p3.font_family = "times"
    # Make the plots (with manual ranges)
    p1 = p1.plot(html=False, show_legend=False)
    p2 = p2.plot(html=False, x_range=[-1,1], y_range=[-1,1], show_legend=False)
    p3 = p3.plot(html=False, x_range=[-1,1], y_range=[-1,1], show_legend=False)
    # Generate the multiplot of the two side-by-side figures
    # multiplot([p1,p2,p3], file_name="mpca_demo.html", height=126, width=600)
    multiplot([p1,p2,p3], file_name="mpca_demo.html", height=126, width=600)
Beispiel #4
0
                          group=algorithm,
                          show_in_legend=(len(plots) == 0))
                # Save the relevant data to a file.
                d_alg = d_alg.copy()
                d_alg.reorder(["Train", "Abs Errors"])
                d_alg = d_alg[:, :2].copy()
                for er in d_alg["Abs Errors"]:
                    er.sort()
                for i in range(len(d_alg[0, "Abs Errors"])):
                    d_alg[f"abs error {i+1}"] = (er[i]
                                                 for er in d_alg["Abs Errors"])
                d_alg.pop("Abs Errors")
                d_alg.save(f"{algorithm}-{S}-{F}-{D}.csv")
            # Give the y-axis a log range (like the x axis)
            extra = dict(y_range="", y_axis_settings=dict(type="log"))
            p_fig = p.plot(html=False, layout=dict(boxmode="group"), **extra)
            plots.append(p_fig)
        multiplot(plots, append=True)

# Make box plots
if BOX_PLOTS:
    d = Data.load(output_data_name)
    # Generate interesting extra columns.
    d["Abs Errors"] = ([float(abs(v)) for v in l] for l in d["Errors"])
    d["Mean Abs Error"] = (sum(l) / len(l) for l in d["Abs Errors"])
    d["Min Abs Error"] = (min(l) for l in d["Abs Errors"])
    d["Max Abs Error"] = (max(l) for l in d["Abs Errors"])
    if (len(algorithms) > 0): d = d[d["Algorithm"] == algorithms]
    print(d)
    config_cols = ["Function", "SNR", "Train"]
    configs = d[:, config_cols].unique()