Exemple #1
0
 def _test_mesh(show=True):
     from weakly_admissable_meshes import polar_wam, box_wam
     from util.plot import Plot
     p = Plot("Weakly admissable mesh")
     p.add("Polar", *(polar_wam(8).T), color=p.color(0))
     p.add("Box", *(box_wam(8).T), color=p.color(1))
     if show: p.show(width=1000*.7, height=900*.7, append=True)
Exemple #2
0
 def __init__(self, dev, odpch='1', sign=True):
     self.odpch = odpch
     self.start = False
     self.sign = sign
     data = self.get_data(dev)
     data.dev = dev
     Plot.__init__(self, 'fx', data=data)
     self.root.title(self.odpcmd)
Exemple #3
0
def _test_Distribution(display=False):
    # Verify that the distribution works under a weighted sum.
    import numpy as np
    d = []
    count = 3
    scale = 2**(-20)
    scale = 2**(30)
    for i in range(count):
        pts = np.random.random(20) * np.random.random() * scale
        d.append(cdf_fit(pts, fit="cubic"))
    wts = np.random.random((count, ))
    wts /= sum(wts)
    min_max = (-scale / 3, scale + scale / 3)
    if display:
        print(sum(dist * w for (dist, w) in zip(d, wts)))
        from util.plot import Plot
        p = Plot("Weighted cubic fit")
        out = sum(dist * w for (dist, w) in zip(d, wts))
        p.add_func("Weighted sum", out, min_max)
        for i, (dist, w) in enumerate(zip(d, wts)):
            p.add_func(f"Dist {i+1} -- {round(w,3)}",
                       dist,
                       min_max,
                       opacity=.3)
        p.show()
Exemple #4
0
def test_support(
        model,
        low=0,
        upp=1,
        plot_points=3000,
        p=None,
        fun=lambda x: 3 * x[0] + .5 * np.cos(8 * x[0]) + np.sin(5 * x[-1]),
        N=20,
        D=2,
        random=True,
        seed=0):
    # Force D to be 2
    D = 2
    np.random.seed(seed)
    # Generate x points
    if random:
        x = np.random.random(size=(N, D))
    else:
        N = int(round(N**(1 / D)))
        x = np.array(
            [r.flatten() for r in np.meshgrid(*[np.linspace(0, 1, N)] * D)]).T
    # Calculate response values
    y = np.array([fun(v) for v in x])
    # Fit the model to the points
    model.fit(x, y)
    # Generate the plot
    from util.plotly import Plot
    if type(p) == type(None): p = Plot()
    p.add("Training Points", *x.T, color=p.color(len(x)))
    for i in range(len(x)):
        name = f"{i+1}"
        p.add(name, [x[i][0]], [x[i][1]], group=name)

        def supported(pt):
            pts, wts = model.points_and_weights(pt)
            return (i in pts)

        p.add_region(name + " region",
                     supported,
                     *([(low - .1, upp + .1)] * D),
                     color=p.color(p.color_num),
                     group=name,
                     plot_points=plot_points,
                     show_in_legend=False)
    # p.add_func(str(model), model, *([(low-.1,upp+.1)]*D),
    #            plot_points=plot_points, vectorized=True)
    return p, x, y
Exemple #5
0
def test_latin(display=False):
    from util.random import latin
    print("Testing latin..", end=" ")
    if display: print()

    if display:
        from util.plot import Plot
        D = 2
        N = 400

        p = Plot("Latin hyper cube design")
        # Add grid lines for the cube.
        for i in range(N + 1):
            p.add(f"gly {i}", [0, 1], [i / N] * 2,
                  mode="lines",
                  group="grid",
                  color="rgba(.6,.1,.1,.2)",
                  show_in_legend=(i == 0))
            p.add(f"glx {i}", [i / N] * 2, [0, 1],
                  mode="lines",
                  group="grid",
                  color="rgba(.6,.1,.1,.2)",
                  show_in_legend=False)
        # Add the random points.
        p.add("Points", *(latin(N, D).T))
        p.show(file_name="/Users/thomaslux/Desktop/lhc.html")

    from numpy import sort
    N = 10000
    D = 1000
    pts = latin(N, D)
    for i in range(D):
        values = sort(pts[:, i])
        max_adj_diff = max(abs(values[:-1] - values[1:]))
        if max_adj_diff >= (2 / N):
            print("BAD!", "   ", i, max_adj_diff, 1 / N, 2 / N)
        assert (max_adj_diff < (2 / N))

    print("passed.")
Exemple #6
0
    def test(self, start, end):
        stock_info = {'stock': self.__stock_code, 'start': start, 'end': end}
        self.__mq.send(json.dumps(stock_info))
        time.sleep(1)
        msg = self.__mq.recv()
        datas = json.loads(msg)
        _logger.info(datas)

        if datas and len(datas):
            plot = Plot(self.__stock_code, datas)
            current_time = datetime.now().strftime('%Y%m%d%H%M%S')
            filename = self.__stock_code + "_" + current_time
            plot.plot(filename)
Exemple #7
0
def rank_by_slope(components,
                  points,
                  values,
                  metric,
                  max_pairs=10000,
                  display=True):
    # Compute the magnitudes using average metric slope.
    if display:
        print(" computing average metric slope per component.. ",
              end="",
              flush=True)
    avg_slope = np.zeros(len(components))
    update = end = ""
    for i in range(len(components)):
        update = "\b" * len(end)
        end = f"{i+1} of {len(components)}"
        if display: print(update, end=end, flush=True)
        x = np.matmul(points, components[i])
        for (p1, p2) in gen_random_pairs(len(x), count=max_pairs):
            avg_slope[i] += metric(values[p1], values[p2]) / abs(x[p1] - x[p2])
    print("Results so far:")
    from util.plot import Plot
    p = Plot()
    for (comp, slope) in zip(components, avg_slope):
        print(comp, slope)
        x, y = np.matmul(points, comp), values
        p.add(f"Points: {slope:.2f}", x, y)
    print()
    p.show()
    exit()
    # Invert the total average metric slope.
    if (min(avg_slope) <= 0.0):
        avg_slope = np.where(avg_slope == 0, 1., 0.)
    else:
        avg_slope = 1 / avg_slope
    avg_slope /= np.sum(avg_slope)
    # Re-order the components according to inverse average metric slope.
    order = np.argsort(avg_slope)[::-1]
    # If they are not already ordered correctly, re-order the returns.
    if not all(order[i] < order[i + 1] for i in range(len(order) - 1)):
        if display:
            print(" reordering components by average metric slope..",
                  end="\r",
                  flush=True)
        components, avg_slope = components[order], avg_slope[order]
    if display:
        print("                                                ",
              end="\r",
              flush=True)
    return components, avg_slope
Exemple #8
0
def test_plot(
        model,
        low=0,
        upp=1,
        plot_points=3000,
        p=None,
        fun=lambda x: 3 * x[0] + .5 * np.cos(8 * x[0]) + np.sin(5 * x[-1]),
        N=20,
        D=2,
        noise=0.,
        random=True,
        seed=0,
        x=None,
        y=None,
        classifier=False):
    np.random.seed(seed)
    provided_points = (type(x) != type(None)) and (type(y) != type(None))
    if (type(x) == type(None)):
        # Generate x points
        if random:
            x = np.random.random(size=(N, D))
        else:
            N = int(round(N**(1 / D)))
            x = np.array([
                r.flatten() for r in np.meshgrid(*[np.linspace(0, 1, N)] * D)
            ]).T
    if (type(y) == type(None)):
        # Calculate response values
        y = np.array([
            round(fun(v) +
                  np.random.random() * noise) if classifier else fun(v) +
            np.random.random() * noise for v in x
        ])
    # Fit the model to the points
    model.fit(x, y, classifier=classifier)
    # Generate the plot
    from util.plot import Plot
    if type(p) == type(None): p = Plot()
    if not provided_points: p.add("Training Points", *x.T, y)
    p.add_func(str(model),
               model,
               *([(low - .1, upp + .1)] * D),
               plot_points=plot_points,
               vectorized=True)
    # p.add_func("truth", fun, *([(low-.1,upp+.1)]*D),
    #            plot_points=plot_points)
    return p, x, y
Exemple #9
0
    def _test_fekete(max_n=2000, max_d=1000, start_n=1000, start_d=10, steps=2, show=True):
        if show:
            # Generate some points in 3 dimensions for showing.
            pts = fekete_points(47, 3)
            from util.plot import Plot
            p = Plot("Fekete")
            p.add("fekete points", *(pts.T))
            p.show(append=True)

        from util.system import Timer
        t = Timer()
        steps -= 1
        for n in range(start_n,max_n+1, (max_n-start_n-1) // steps):
            for d in range(start_d, max_d+1, (max_d-start_d-1) // steps):
                t.start()
                print()
                print('-'*70)
                print(f"  {n}, {d}", flush=True)
                pts = fekete_points(n, d)
                print(f"     {pts.shape}")
                print(f"     {t()} seconds", flush=True)
                t.stop()
        print()
        print()
Exemple #10
0
else:
    # Two normals
    size = 200
    points = np.random.normal(size=(size, 2))
    points[:, 0] *= 1 / 3
    # points += np.array([1.,-2.])
    points = np.concatenate((points, np.random.normal(0, 2, size=(size, 2))))
    points[-size:, 1] *= 1 / 3

dim = 2
vecs = np.random.random(size=(dim, points.shape[1]))
# Normalize the vectors.
for i in range(vecs.shape[0]):
    vecs[i] /= np.linalg.norm(vecs[i], ord=2)

p = Plot()

points = points - np.mean(points, axis=0)


# Given a vector, return all points flipped onto the same side of the vector.
def flipped_points(vec):
    # Get the signs
    signs = np.matmul(points, vec)
    signs[signs == 0] = 1.
    signs /= abs(signs)
    return (points.T * signs).T


# Update each of the vectors to be the average of flipped points.
def update_vecs():
Exemple #11
0
def _test_cdf_fit():
    import numpy as np
    from util.math import SMALL
    from util.random import cdf
    from util.plot import Plot

    n = 10000
    f = cdf(nodes=5, fit="linear")

    sample = f.inverse(np.random.random((n, )))
    g = cdf_fit(sample, fit=None)
    print("Fit first CDF point: ", g.nodes[0])
    print("Fit second CDF point:", g.nodes[1])
    print("Fit last CDF point:  ", g.nodes[-1])

    print("Expected max error:", samples(n, confidence=.99))
    print("Actual max error:  ", f - g)

    min_max = (f.min - SMALL, f.max + SMALL)

    p = Plot()
    p.add_func("Truth", f, min_max)
    p.add_func("EDF", g, min_max)
    p.show(show=False, height=700, width=800)

    p = Plot()
    p.add_func("Truth", f.inverse, min_max)
    p.add_func("EDF", g.inverse, min_max)
    p.show(append=True, height=700, width=800)
Exemple #12
0
def _test_mpca(display=False):
    if display:
        print()
        print("-" * 70)
        print("Begin tests for 'mpca'")

    GENERATE_APPROXIMATIONS = True
    BIG_PLOTS = False
    SHOW_2D_POINTS = False

    import random
    # Generate some points for testing.
    np.random.seed(4)  # 4
    random.seed(0)  # 0
    rgen = np.random.RandomState(1)  # 10
    n = 100
    points = (rgen.rand(n, 2) - .5) * 2
    # points *= np.array([.5, 1.])

    # Create some testing functions (for learning different behaviors)
    funcs = [
        lambda x: x[1],  # Linear on y
        lambda x: abs(x[0] + x[1]),  # "V" function on 1:1 diagonal
        lambda x: abs(2 * x[0] + x[1]),  # "V" function on 2:1 diagonal
        lambda x: x[0]**2,  # Quadratic on x
        lambda x: (x[0] + x[1])**2,  # Quadratic on 1:1 diagonal
        lambda x: (2 * x[0] + x[1])**3,  # Cubic on 2:1 diagonal
        lambda x: (x[0]**3),  # Cubic on x
        lambda x: rgen.rand(),  # Random function
    ]
    # Calculate the response values associated with each function.
    responses = np.vstack(tuple(tuple(map(f, points)) for f in funcs)).T

    # Reduce to just the first function
    choice = 3
    func = funcs[choice]
    response = responses[:, choice]

    # Run the princinple response analysis function.
    components, values = mpca(points, response)
    values /= np.sum(values)
    conditioner = np.matmul(components, np.diag(values))

    if display:
        print()
        print("Components")
        print(components)
        print()
        print("Values")
        print(values)
        print()
        print("Conditioner")
        print(conditioner)
        print()

    components = np.array([[1.0, 0.], [0., 1.]])
    values = normalize_error(np.matmul(points, components.T), response,
                             abs_diff)
    values /= np.sum(values)
    if display:
        print()
        print()
        print("True Components")
        print(components)
        print()
        print("True Values")
        print(values)
        print()

    # Generate a plot of the response surfaces.
    from util.plot import Plot, multiplot
    if display: print("Generating plots of source function..")

    # Add function 1
    p1 = Plot()
    p1.add("Points", *(points.T), response, opacity=.8)
    p1.add_func("Surface", func, [-1, 1], [-1, 1], plot_points=100)

    if GENERATE_APPROXIMATIONS:
        from util.approximate import NearestNeighbor, Delaunay, condition
        p = Plot()
        # Add the source points and a Delaunay fit.
        p.add("Points", *(points.T), response, opacity=.8)
        p.add_func("Truth", func, [-1, 1], [-1, 1])
        # Add an unconditioned nearest neighbor fit.
        model = NearestNeighbor()
        model.fit(points, response)
        p.add_func("Unconditioned Approximation",
                   model, [-1, 1], [-1, 1],
                   mode="markers",
                   opacity=.8)
        # Generate a conditioned approximation
        model = condition(NearestNeighbor, method="MPCA")()
        model.fit(points, response)
        p.add_func("Best Approximation",
                   model, [-1, 1], [-1, 1],
                   mode="markers",
                   opacity=.8)

        if display: p.plot(show=False, height=400, width=650)

    if display: print("Generating metric principle components..")

    # Return the between vectors and the differences between those points.
    def between(x, y, unique=True):
        vecs = []
        diffs = []
        for i1 in range(x.shape[0]):
            start = i1 + 1 if unique else 0
            for i2 in range(start, x.shape[0]):
                if (i1 == i2): continue
                vecs.append(x[i2] - x[i1])
                diffs.append(y[i2] - y[i1])
        return np.array(vecs), np.array(diffs)

    # Plot the between slopes to verify they are working.
    # Calculate the between slopes
    vecs, diffs = between(points, response)
    vec_lengths = np.sqrt(np.sum(vecs**2, axis=1))
    between_slopes = diffs / vec_lengths
    bs = ((vecs.T / vec_lengths) * between_slopes).T
    # Extrac a random subset for display
    size = 100
    random_subset = np.arange(len(bs))
    rgen.shuffle(random_subset)
    bs = bs[random_subset[:size], :]
    # Normalize the between slopes so they fit on the plot
    max_bs_len = np.max(np.sqrt(np.sum(bs**2, axis=1)))
    bs /= max_bs_len
    # Get a random subset of the between slopes and plot them.
    p2 = Plot("", "Metric PCA on Z", "")
    p2.add("Between Slopes", *(bs.T), color=p2.color(4, alpha=.4))

    if SHOW_2D_POINTS:
        # Add the points and transformed points for demonstration.
        new_pts = np.matmul(np.matmul(conditioner, points),
                            np.linalg.inv(components))
        p2.add("Original Points", *(points.T))
        p2.add("Transformed Points", *(new_pts.T), color=p2.color(6, alpha=.7))

    # Add the principle response components
    for i, (vec, m) in enumerate(zip(components, values)):
        vec = vec * m
        p2.add(f"PC {i+1}", [0, vec[0]], [0, vec[1]], mode="lines")
        ax, ay = (vec / sum(vec**2)**.5) * 3
        p2.add_annotation(f"{m:.2f}", vec[0], vec[1])

    p3 = Plot("", "PCA on X", "")
    p3.add("Points", *(points.T), color=p3.color(4, alpha=.4))

    # Add the normal principle components
    components, values = pca(points)
    values /= np.sum(values)
    for i, (vec, m) in enumerate(zip(components, values)):
        vec = vec * m
        p3.add(f"PC {i+1}", [0, vec[0]], [0, vec[1]], mode="lines")
        ax, ay = (vec / sum(vec**2)**.5) * 3
        p3.add_annotation(f"{m:.2f}", vec[0], vec[1])

    if BIG_PLOTS:
        if display: p1.plot(file_name="source_func.html", show=False)
        if display: p2.plot(append=True, x_range=[-8, 8], y_range=[-5, 5])
    else:
        # Make the plots (with manual ranges)
        p1 = p1.plot(html=False, show_legend=False)
        p2 = p2.plot(html=False,
                     x_range=[-1, 1],
                     y_range=[-1, 1],
                     show_legend=False)
        p3 = p3.plot(html=False,
                     x_range=[-1, 1],
                     y_range=[-1, 1],
                     show_legend=False)
        # Generate the multiplot of the two side-by-side figures
        if display: multiplot([p1, p2, p3], height=126, width=650, append=True)

    if display: print("-" * 70)
Exemple #13
0
def _test_samples(display=True, test_correctness=False):
    from util.math import Fraction
    from util.plot import Plot

    if display:
        for size in tuple(range(2, 42)) + (128, 129, 256, 257):
            p = Plot(f"Error at x with {size} samples")
            for confidence in (Fraction(9,
                                        10), Fraction(185,
                                                      200), Fraction(95, 100),
                               Fraction(97, 100), Fraction(99, 100)):
                f = lambda x: samples(
                    size=size, confidence=confidence, at=x[0])
                p.add_func(f"{confidence} confidence", f, [0, 1])
            p.show(append=True, show=(size == 2))
        exit()

    if display:
        print()
        print("-" * 70)
        print("Begin tests for 'samples'")
        print()
    key_values = [
        (11, Fraction(3, 10), Fraction(95, 100)),
        (25, Fraction(2, 10), Fraction(95, 100)),
        (97, Fraction(1, 10), Fraction(95, 100)),
        (385, Fraction(5, 100), Fraction(95, 100)),
        (9604, Fraction(1, 100), Fraction(95, 100)),
        (19, Fraction(3, 10), Fraction(99, 100)),
        (42, Fraction(2, 10), Fraction(99, 100)),
        (166, Fraction(1, 10), Fraction(99, 100)),
        (664, Fraction(5, 100), Fraction(99, 100)),
        (16588, Fraction(1, 100), Fraction(99, 100)),
        (33733, Fraction(1, 10), Fraction(999, 1000)),
        (134930, Fraction(5, 100), Fraction(999, 1000)),
        (3373242, Fraction(1, 100), Fraction(999, 1000)),
    ]
    if display: print("samples (max error, confidence)")
    for (s, e, c) in key_values[:-3]:
        needed = samples(error=e, confidence=c)
        if display:
            print("needed: ", needed)
            print("%6d  (%2.0f%%, %2.0f%%)" % (needed, 100 * e, 100 * c))
        # if (s != None): assert(needed == s)

    if display:
        print()
        for n in (10, 25, 90, 350, 9000):
            print(
                f"With {n:4d} samples we are 95% confident in max CDF error <=",
                round(samples(n, confidence=.95), 1 if n < 350 else 2))
        print()
        for n in (20, 40, 160, 660, 16000):
            print(
                f"With {n:5d} samples we are 99% confident in max CDF error <=",
                round(samples(n, confidence=.99), 1 if n < 600 else 2))
        print("-" * 70)

    if test_correctness:
        # Generate a random CDF
        from util import random
        from util.plot import Plot

        TESTS = 10000
        DIFFS = 100
        N = 100

        fit = "linear"
        truth = random.cdf(nodes=5, fit=fit)
        max_error = samples(N, confidence=.99)
        if display: print("Largest expected error:", max_error)
        mid_error = []
        errors = {(1 / 100): [], (1 / 4): [], (1 / 3): [], (1 / 2): []}
        max_errors = []
        mean_failed = []
        for i in range(TESTS):
            sample = truth.inverse(np.random.random((N, )))
            guess = cdf_fit(sample, fit=fit)
            diff = truth - guess
            diff_func = lambda x: abs(truth(x) - guess(x))
            diffs = diff_func(np.linspace(0, 1, DIFFS))
            mean_failed += [sum(diffs > max_error)]
            if display:
                print(
                    f"Failed: {mean_failed[-1]:4d}   {sum(mean_failed)/len(mean_failed):.0f}"
                )
            max_errors.append(diff)
            for v in errors:
                errors[v].append(truth(v) - guess(v))
            # if (diff > max_error):
            #     print(i, N, diff, max_error)
            #     p = Plot()
            #     p.add_func("Truth", truth, truth())
            #     p.add_func("Guess", guess, guess())
            #     p.add_func("Error", diff_func, truth())
            #     p.show(show=False)
            #     p = Plot()
            #     p.add_func("Truth Inverse", truth.inverse, (0.,1.))
            #     p.add_func("Guess Inverse", guess.inverse, (0.,1.))
            #     p.show(show=False, append=True)
            #     break

        total_failed = sum(e > max_error for e in max_errors)
        if display or (total_failed > 0):
            print(
                f"Failed {total_failed} out of {TESTS}, or {100*total_failed/TESTS:.1f}%."
            )

            p = Plot()
            # Add the distribution of maximum errors.
            f = cdf_fit(max_errors)
            p.add_func(f"{len(max_errors)} max errors", f, f())

            # Add the distribution of errors at different values.
            for v in sorted(errors)[::-1]:
                mean = np.mean(errors[v])
                std = np.std(errors[v])
                f = cdf_fit(errors[v])
                p.add_func(
                    f"{v:.1f} errors ({mean:.1e}, {std:.1e}) {samples(N,confidence=.99):.2f}",
                    f, f())

            p.show(append=True)

            p = Plot()
            p.add_func("Truth", truth, truth())
            p.show(append=True)
Exemple #14
0
        # Get the matrices.
        train_mat = train.to_matrix()
        test_mat = test.to_matrix()
        train_x = train_mat[:, in_idxs]
        test_x = test_mat[:, out_idxs]
        m = Voronoi()
        m.fit(train_x)
        lengths += [len(ids) for ids, wts in m(test_x)]
        save(lengths)

print("len(lengths): ", len(lengths))

from util.plot import Plot
from util.stats import cdf_fit

p = Plot("Distribution of Number of Influencers",
         "Number of records used to make prediction", "CDF value")
cdf = cdf_fit(lengths)
p.add_func("lengths", cdf, cdf(), show_in_legend=False, color=p.color(1))
p.show()

exit()

from util.approximate import Voronoi, NearestNeighbor, NeuralNetwork, DecisionTree
from util.approximate import ShepMod, BoxMesh, LSHEP, Delaunay
from util.approximate.testing import test_plot

# model = NearestNeighbor()
# model = Voronoi()
# model = DecisionTree()
# model = NeuralNetwork()
Exemple #15
0
def _test_epdf_diff(display=False):
    if display:
        print()
        print("-" * 70)
        print("Begin tests for 'epdf_diff'")

    # ----------------------------------------------------------------
    def demo(seq):
        if display:
            print('~' * 70)
            print(len(seq), seq)
            print()
        total = 0
        for vals in edf_pair_gen(seq):
            total += vals[-1]
            if display: print("[% 4s, % 3s] (%.2f) --" % vals, round(total, 3))
        if display:
            print('~' * 70)
            print()

    demo([0] + list(range(9)))
    demo(sorted(np.random.random(size=(10, ))))
    demo(list(range(9)) + [8])
    # ----------------------------------------------------------------
    # a = [1, 1, 3, 3, 5, 6]
    # b = [0, 1, 2, 3, 4]
    #
    n = 100
    if display:
        print(f"a = [0,100] ({n} samples)")
        print(f"b = [v + d for v in a]")
        print()
    for d in (.0, .01, .1, .5, .55, .9, 1., 1.5):
        a = [v / n for v in list(range(n + 1))]
        b = [v + d for v in a]
        if display:
            print(
                f"d = {d:.2f}   a~b = {epdf_diff(a,b):.2f}   b~a = {epdf_diff(b,a):.2f}   a~a = {epdf_diff(a,a):.2f}   b~b = {epdf_diff(b,b):.2f}"
            )
    if display: print()

    for d in (.0, .01, .1, .5, .55, .9, 1., 1.5):
        # Generate a random sequence.
        a = sorted((np.random.random(size=(10, ))))
        b = sorted((np.random.random(size=(1000, )) + d))
        diff = epdf_diff(a, b)
        if display:
            print(f"d = {d:.2f}", "", "[%.2f, %.2f]" % (min(a), max(a)),
                  "[%.2f, %.2f]" % (min(b), max(b)), "", diff)
    if display: print()
    # ----------------------------------------------------------------
    from util.plot import Plot

    # Generate a random sequence.
    a = sorted((np.random.random(size=(2000, ))))
    b = sorted((np.random.random(size=(2000, ))))

    p = Plot("Empirical PDF Diff Test")
    p1 = pdf_fit(a, smooth=0.00001)
    p2 = pdf_fit(b, smooth=0.00001)
    p.add_func("a", p1, p1())  #(-.5,2))
    p.add_func("b", p2, p2())  #(-.5,2))
    if display: p.show(show=False, y_range=[-.5, 1.5])

    p = Plot("Empirical CDF Diff Test")
    p1 = cdf_fit(a)
    p2 = cdf_fit(b)
    p.add_func("a", p1, p1())  #(-.5,2))
    p.add_func("b", p2, p2())  #(-.5,2))
    if display: p.show(append=True)
    # ----------------------------------------------------------------
    if display: print("-" * 70)
Exemple #16
0
def _test_fit_funcs(display=False):
    if display:
        print()
        print("-" * 70)
        print("Begin tests for 'fit_funcs'")

    from util.plot import Plot

    # ==============================================
    #      Test the fit functions and smoothing
    # ==============================================
    # Make data
    smooth = .1
    data = np.random.normal(size=(1000, ))
    # data[:len(data)//2] += 2
    min_max = (min(data) - .1, max(data) + .1)
    if display:
        print()
        print("(min, max) : (%.2f, %.2f)" % (min_max))
        print("Normal confidence: %.2f%%" % (100 * normal_confidence(data)))
        print()
    # Make PDF fits
    pfit = pdf_fit(data)
    smooth_pfit = pdf_fit(data, smooth=smooth)
    # Make CDF fits
    cfit = cdf_fit(data)
    stdev = .05 * (cfit.max - cfit.min)
    smooth_cfit = gauss_smooth(cfit, stdev)
    stdev = smooth * (cfit.max - cfit.min)
    smoother_cfit = gauss_smooth(cfit, stdev)

    # Make PDF plots
    p = Plot()
    p.add_func("PDF", pfit, min_max)
    # Make smooth PDF
    p.add_func("Smooth PDF", smooth_pfit, min_max)
    if display: p.show(show=False)
    # Make CDF plots
    p = Plot()
    p.add_func("CDF", cfit, min_max)
    # Make CDF whose derivative is the default PDF.
    p.add_func("CDF for default PDF", smooth_cfit, min_max)
    # Make smoother cdf.
    p.add_func("Smooth CDF", smoother_cfit, min_max)
    if display: p.show(append=True)
    # Make an animation transitioning between two normal distributions.
    np.random.seed(0)
    d1 = np.random.normal(0, .5, size=(500, ))
    d2 = np.random.normal(3, 1, size=(500, ))
    f1 = cdf_fit(d1, smooth=.1)
    f2 = cdf_fit(d2, smooth=.1)
    p = Plot()
    for w in np.linspace(0, 1, 21):
        w = round(w, 2)
        f3 = w * f1 + (1 - w) * f2
        p.add_func("0, 1/2", f1, f1(), frame=w)
        p.add_func("3, 1", f2, f2(), frame=w)
        p.add_func("weighted sum", f3, f3(), frame=w)
    if display: p.show(bounce=True, append=True)
    if display: print()
    if display: print("-" * 70)
Exemple #17
0
# PCA versus MPCA-10
colors = {}
plots = []
for data_name in ("yelp", "mnist", "cifar"):
    for sample_ratio in (1, ):
        for a in algs:
            # Skip uninteresting sets.
            if (data_name == "yelp") and (a != "KNN10"): continue
            if (data_name == "mnist") and (a != "KNN1"): continue
            if (data_name == "cifar") and (a != "KNN10"): continue
            # Break up data by dimension
            y_axis = "Count" if data_name == "yelp" else ""
            p = Plot("",
                     data_name + " errors",
                     y_axis,
                     font_size=20,
                     font_family="times")
            plots.append(p)
            for method in ("MPCA", "PCA"):
                for dim in dims:
                    # Color by method
                    if method not in colors:
                        colors[method] = p.color(len(colors))
                    color = colors[method]
                    # Reduce data
                    d = all_data[all_data["Data"] == data_name]
                    d = d[d["Method"] == method]
                    d = d[d["Sample Ratio"] == sample_ratio]
                    d = d[d["Algorithm"] == a]
                    d = d[d["Dimension"] == dim]
Exemple #18
0
print("Components")
print(components)
print()
print("Values")
print(values)
print()
print("Conditioner")
print(conditioner)
print()

# Generate a plot of the response surfaces.
from util.plot import Plot, multiplot
print("Generating plots of source function..")

# Add function 1
p1 = Plot(font_family="times")
p1.add("Points", *(points.T), response, opacity=.8)
p1.add_func("Surface", func, [-1,1], [-1,1], plot_points=1000, color=p1.color(1))

if GENERATE_APPROXIMATIONS:
    from util.algorithms import NearestNeighbor
    model = NearestNeighbor()
    model.fit(points, response)
    p1.add_func("Unconditioned Approximation", model, [-1,1], [-1,1],
                mode="markers", opacity=.8)
    # Generate a conditioned approximation
    model = NearestNeighbor()
    model.fit(np.matmul(points, conditioner), response)
    approx = lambda x: model(np.matmul(x, conditioner))
    p1.add_func("Best Approximation", approx, [-1,1], [-1,1],
                mode="markers", opacity=.8)
Exemple #19
0
 if (len(algorithms) > 0): d = d[d["Algorithm"] == algorithms]
 print(d)
 d.save("all-data.pkl")
 print()
 config_cols = ["Function", "Dimension"]
 configs = d[:, config_cols].unique()
 configs.sort()
 configs.max_display = float('inf')
 print(configs)
 for conf in configs:
     plots = []
     for S in sorted(set(d["SNR"])):
         d_conf = d[d[:, config_cols + ["SNR"]] == (list(conf) + [S])]
         F, D = conf
         p = Plot(f"{D}D '{F}' function",
                  f"SNR = {S}",
                  "|<i>error</i>|",
                  font_family="times")  #b"log(|<i>error</i>|)")
         seen = {}
         for algorithm in sorted(set(d["Algorithm"])):
             d_alg = d_conf[d_conf["Algorithm"] == algorithm]
             print(d_alg)
             values = []
             locations = []
             # Sort the algorithm data by the
             d_alg.sort(key=lambda i: i["Train"])
             for row in d_alg:
                 if (row["Train"] < row["Dimension"]): continue
                 locations.append("N = " + str(row["Train"]))
                 values.append([v for v in row["Abs Errors"]])
             p.add_box(algorithm,
                       values,
Exemple #20
0
if __name__ == "__main__":
    from util.plot import Plot
    from util.approximate.testing import test_plot
    model = BoxMesh()

    p,x,y = test_plot(model)
    p.plot(show=False)
    x = model.points.T
    print(x)


    # ==============================================
    #      Display the boxes that were computed     
    # ==============================================
    p = Plot()
    # Get the extreme points.
    min_x = np.min(x[:,0]) - .1
    max_x = np.max(x[:,0]) + .1
    min_y = np.min(x[:,1]) - .1
    max_y = np.max(x[:,1]) + .1
    # Get the box edges (about the centers).
    boxes = model.box_sizes.T
    boxes[:,0] = np.where(boxes[:,0] != -1, boxes[:,0], min_x)
    boxes[:,1] = np.where(boxes[:,1] != -1, boxes[:,1], min_y)
    boxes[:,2] = np.where(boxes[:,2] != -1, boxes[:,2], max_x)
    boxes[:,3] = np.where(boxes[:,3] != -1, boxes[:,3], max_y)
    # Add boxes to plot.
    for i,(pt,bounds) in enumerate(zip(x, boxes)):
        shifts = np.array([[- boxes[i,0], - boxes[i, 1]],
                           [  boxes[i,2], - boxes[i, 1]],
Exemple #21
0
def test_random_cdf(display=True):
    if not display: return

    from util.plot import Plot
    p = Plot("")
    for nodes in range(1, 100):
        f = cdf()
        p.add_func(f"Random PDF {nodes}",
                   f.derivative,
                   f(),
                   color=p.color(nodes - 1),
                   group=nodes)
        # p.add(f"Points {nodes}", *list(zip(*f.nodes)),
        #       color=p.color(nodes-1,alpha=.3), group=nodes)
    p.show(show=False)

    print()

    p = Plot("")
    for nodes in range(1, 30):
        f = cdf(nodes=nodes)
        p.add_func(f"Random {nodes} node CDF",
                   f,
                   f(),
                   color=p.color(nodes - 1),
                   group=nodes)
        p.add(f"Points {nodes}",
              *list(zip(*f.nodes)),
              color=p.color(nodes - 1, alpha=.3),
              group=nodes)
    p.show(append=True)
Exemple #22
0
result = ag.aggregate_dataset(dataset, granularity)
print 'done.'
print 'imputing nan and normalizing set...',
result_nan = ag.impute_nan(result)
result_norm = ag.normalize_data(result_nan)
print 'done.'
print 'constructing training and test set...',
#input_attributes, input_training, input_test, output_attributes, output_training, output_test = ag.identify_rnn_dataset(result_norm, 'AS14.01', 0.5, ['mood'], ['circumplex.arousal', 'circumplex.valence'])
input_attributes, input_training, input_test, output_attributes, output_training, output_test = ag.identify_regression_dataset(result_norm, 'AS14.01', 0.5, ['mood'], ['circumplex.arousal', 'circumplex.valence'])
print ' done.'

print 'regression...',
regr = Regression()
regr.train_regression(input_training, input_attributes, output_training, output_attributes)
Y_regression = regr.test_regression(input_test, output_test)
print 'done'

print 'echo state network'
esn = EchoStateNetwork()
esn.initializeNetwork(len(input_attributes), len(output_attributes), 10, True)
print 'training the network...',
esn.trainNetwork(input_training, output_training)
print 'done.'
print 'testing the network...',
Y_esn = esn.testNetwork(input_test, output_test)
print 'done.'

p1 = Plot()
p1.plot_results(output_test, Y_regression, output_attributes, 'regression')
p2 = Plot()
p2.plot_results(output_test, Y_esn, output_attributes, 'ESN')
Exemple #23
0
def modes(data, confidence=.99, tol=1/1000):
    from util.optimize import zero
    num_samples = len(data)
    error = 2*samples(num_samples, confidence=confidence)
    cdf = cdf_fit(data, fit="cubic")
    print("error: ",error)
    # Find all of the zeros of the derivative (mode centers / dividers)
    checks = np.linspace(cdf.min, cdf.max, np.ceil(1/tol))
    second_deriv = cdf.derivative.derivative
    deriv_evals = second_deriv(checks)
    modes = [i for i in range(1, len(deriv_evals)) if
             (deriv_evals[i-1] * deriv_evals[i] <= 0) and
             (deriv_evals[i-1] >= deriv_evals[i])]
    antimodes = [i for i in range(1, len(deriv_evals)) if
                 (deriv_evals[i-1] * deriv_evals[i] <= 0) and
                 (deriv_evals[i-1] < deriv_evals[i])]
    # Compute exact modes and antimodes using a zero-finding function.
    modes = [zero(second_deriv, checks[i-1], checks[i]) for i in modes]
    antimodes = [zero(second_deriv, checks[i-1], checks[i]) for i in antimodes]
    original_antimodes = antimodes[:]
    # Fix the bounds of the antimodes to match the distribution.
    if modes[0] < antimodes[0]: antimodes    = [cdf.min] + antimodes
    else:                       antimodes[0] =  cdf.min
    if modes[-1] > antimodes[-1]: antimodes    += [cdf.max]
    else:                         antimodes[-1] =  cdf.max
    # Make sure that there is an antimode between each mode.
    for i in range(len(modes)):
        if antimodes[i] > modes[i]:
            # Update the next antimode with this one (as long as it's not the max).
            if (i < len(modes)-1):
                antimodes[i+1] = (antimodes[i] + antimodes[i+1]) / 2
            # Always update this antimode to properly be LESS than the mode.
            antimodes[i] = (modes[i] + modes[i-1]) / 2
    print("len(modes):     ",len(modes))
    print("len(antimodes): ",len(antimodes))
    # Define a function that counts the number of modes thta are too small.
    def count_too_small():
        return sum( (cdf(upp) - cdf(low)) < error for (low,upp) in
                    zip(antimodes[:-1],antimodes[1:]) )
    # Show PDF
    from util.plot import Plot
    p = Plot()
    pdf = pdf_fit(cdf.inverse(np.random.random((1000,))))

    # Loop until all modes are big enough to be accepted given error tolerance.
    step = 1
    while count_too_small() > 0:
        print()
        print("step: ",step, (len(modes), len(antimodes)))
        f = len(modes)
        p.add_func("PDF", pdf, cdf(), color=p.color(1), frame=f, show_in_legend=(step==1))
        # Generate the mode lines.
        mode_lines = [[],[]]
        for z in modes:
            mode_lines[0] += [z,z,None]
            mode_lines[1] += [0,.2,None]
        p.add("modes", *mode_lines, color=p.color(0), mode="lines",
              group="modes", show_in_legend=(z==modes[0] and step==1), frame=f)
        # Generate the antimode lines.
        anti_lines = [[],[]]
        for z in antimodes:
            anti_lines[0] += [z,z,None]
            anti_lines[1] += [0,.2,None]
        p.add("seperator", *anti_lines, color=p.color(3,alpha=.3), mode="lines",
              group="seperator", show_in_legend=(z==antimodes[0] and (step==1)), frame=f)
        step += 1


        # Compute the densities and the sizes of each mode.
        sizes = [cdf(antimodes[i+1]) - cdf(antimodes[i])
                 for i in range(len(modes))]
        densities = [(cdf(antimodes[i+1]) - cdf(antimodes[i])) /
                     (antimodes[i+1] - antimodes[i])
                     for i in range(len(modes))]
        # Compute those modes that have neighbors that are too small.
        to_grow = [i for i in range(len(modes))
                   if (i > 0 and sizes[i-1] < error)
                   or (i < len(sizes)-1 and sizes[i+1] < error)]
        if len(to_grow) == 0: break
        print("modes:     ",modes)
        print("antimodes: ",antimodes)
        print("sizes:     ",sizes)
        print("densities: ",densities)
        print("to_grow:   ",to_grow)
        # Sort the modes to be grown by their size, largest first.
        to_grow = sorted(to_grow, key=lambda i: -densities[i])
        # Keep track of the modes that have already been absorbed.
        preference = {}
        taken = set()
        conflicts = set()
        modes_to_remove = []
        anti_to_remove  = []
        while len(to_grow) > 0:
            i = to_grow.pop(0)
            # Pick which of the adjacent nodes to absorb.
            to_absorb = None
            if (i < len(modes)-1) and (sizes[i+1] < error):
                direction = 1
                to_absorb = i + 1
            if (i > 0) and (sizes[i-1] < error):
                # If there wasn't a right mode, take the left by default.
                if (to_absorb == None):
                    direction = -1
                    to_absorb = i - 1
                # Otherwise we have to pick based on the density similarity.
                elif (abs(modes[i-1]-modes[i]) < abs(modes[i+1]-modes[i])):
                    # Take the other one if its density is more similar.
                    direction = -1
                    to_absorb = i - 1
            # If there is no good option to absorb, the skip.
            if (to_absorb in preference): continue
            # Record the preferred pick of this mode.
            preference[i] = (direction, to_absorb)
            # If this mode is already absorbed, then add it to conflict list.
            if to_absorb in taken: conflicts.add( to_absorb )
            # Remove the ability to 'absorb' from modes getting absorbed.
            if to_absorb in to_grow: to_grow.remove(to_absorb)
            # Add the absorbed value to the set of "taken" modes.
            taken.add(to_absorb)

        # Resolve conflicts by giving absorbed modes to closer modes.
        for i in sorted(conflicts, key=lambda i: -densities[i]):
            if (abs(modes[i-1] - modes[i]) < abs(modes[i+1] - modes[i])):
                preference.pop(i+1)
            else:
                preference.pop(i-1)

        # Update the boundaries
        for i in sorted(preference, key=lambda i: -densities[i]):
            direction, to_absorb = preference[i]
            # Update the boundary of this mode.
            antimodes[i+(direction>0)] = antimodes[to_absorb + (direction>0)]
            # Update the "to_remove" lists.
            anti_to_remove.append( antimodes[to_absorb + (direction>0)] )
            modes_to_remove.append( modes[to_absorb] )
        # Remove the modes and antimodes that were merged.
        for m in modes_to_remove: modes.remove(m)
        for a in anti_to_remove:  antimodes.remove(a)
        # Update the remaining antimodes to be nearest to the middle
        # of the remaining modes (making them representative dividers).
        for i in range(len(modes)-1):
            middle = (modes[i] + modes[i+1]) / 2
            closest = np.argmin([abs(oam - middle) for oam in original_antimodes])
            antimodes[i+1] = original_antimodes[closest]



    f = len(modes)
    p.add_func("PDF", pdf, cdf(), color=p.color(1), frame=f, show_in_legend=(step==1))
    # Generate the mode lines.
    mode_lines = [[],[]]
    for z in modes:
        mode_lines[0] += [z,z,None]
        mode_lines[1] += [0,.2,None]
    p.add("modes", *mode_lines, color=p.color(0), mode="lines",
          group="modes", show_in_legend=(z==modes[0] and step==1), frame=f)
    # Generate the antimode lines.
    anti_lines = [[],[]]
    for z in antimodes:
        anti_lines[0] += [z,z,None]
        anti_lines[1] += [0,.2,None]
    p.add("seperator", *anti_lines, color=p.color(3,alpha=.3), mode="lines",
          group="seperator", show_in_legend=(z==antimodes[0] and (step==1)), frame=f)

    p.show(append=True, y_range=[0,.15])

    p = Plot()
    p.add_func("CDF", cdf, cdf(), color=p.color(1))
    for z in modes:
        p.add("modes", [z,z], [0,1], color=p.color(0), mode="lines",
              group="modes", show_in_legend=(z==modes[0]))
    for z in antimodes:
        p.add("seperator", [z,z], [0,1], color=p.color(3), mode="lines",
              group="sep", show_in_legend=(z==antimodes[0]))
    p.show(append=True)
Exemple #24
0
 def init_layout(self):
     Plot.init_layout(self)
     self.add_wdgts()
     self.fig.canvas.mpl_connect('button_press_event', self.double_button_cb)
Exemple #25
0
 def plot_cb3(self):
     Plot.plot_cb3(self, io_start_after_idle=self.start)
     #self.root.after_idle(self.io.start)
     return True
Exemple #26
0
def main():
    plt = Plot()
    analyze_hidden_units(plt)
    analyze_learning_rate(plt)
    analyze_momentum(plt)
    analyze_batch_size(plt)
Exemple #27
0
def modes(data, confidence=.99, tol=1/1000):
    from util.optimize import zero
    num_samples = len(data)
    error = 2*samples(num_samples, confidence=confidence)
    print()
    print("Smallest allowed mode: ",error)
    print()
    # Get the CDF points (known to be true based on data).
    x, y = cdf_points(data)
    cdf = cdf_fit((x,y), fit="linear")
    x, y = x[1:], y[1:]
    # Generate the candidate break points based on linear interpolation.
    slopes = [(y[i+1] - y[i]) / (x[i+1] - x[i]) for i in range(len(x)-1)]
    candidates = [i for i in range(1,len(slopes)-1)
                  if (slopes[i] < slopes[i-1]) and (slopes[i] < slopes[i+1])]
    # Sort candidates by their 'width', the widest being the obvious divisors.
    candidates = sorted(candidates, key=lambda i: -(x[i+1] - x[i]))
    print("candidates: ",candidates)
    print("slopes:     ",[slopes[c] for c in candidates])
    # Break the data at candidates as much as can be done with confidence.
    breaks = [min(x), max(x)]
    sizes = [1.]
    chosen = []
    print()
    print("breaks: ",breaks)
    print("sizes:  ",sizes)
    print("chosen: ",chosen)
    print()
    # Loop until there are no candidate break points left.
    while len(candidates) > 0:
        new_break_idx = candidates.pop(0)
        new_break = (x[new_break_idx + 1] + x[new_break_idx]) / 2
        b_idx = np.searchsorted(breaks, new_break, side="right")
        # Compute the CDF values at the upper, break, and lower positions.
        upp = cdf(breaks[b_idx+1]) if (b_idx < len(breaks)-1) else cdf.max
        mid = cdf(new_break)
        low = cdf(breaks[b_idx-1])
        print()
        print("new_break: ", new_break)
        print("b_idx: ",b_idx)
        print("  upp: ",upp, upp - mid)
        print("  mid: ",mid)
        print("  low: ",low, mid - low)
        # Compute the size of the smallest mode resulting from the break.
        smallest_result = min(upp - mid, mid - low)
        # Skip the break if it makes a mode smaller than error.
        if smallest_result < error: continue

        print()
        print("Num_modes: ", len(sizes))
        print("breaks:    ", breaks)
        print("sizes:     ", sizes)
        print()

        # Update the "breaks" and "sizes" lists.
        breaks.insert(b_idx, new_break)
        sizes.insert(b_idx, upp - mid)
        sizes[b_idx-1] = mid - low
        chosen.append(new_break_idx)
        
    # From the "breaks" and "sizes", construct a list of "modes".
    # Consider the most dense point between breaks the "mode".
    modes = []
    for i in range(len(chosen)):
        low = 0 if (i == 0) else chosen[i-1]
        upp = len(slopes)-1 if (i == len(chosen)-1) else chosen[i+1]
        mode_idx = low + np.argmax(slopes[low:upp])
        modes.append( (x[mode_idx+1] + x[mode_idx]) / 2 )


    from util.plot import Plot
    p = Plot()
    pdf = pdf_fit(data)
    p.add_func("PDF", pdf, pdf(), color=p.color(1))
    # Generate the mode lines.
    mode_lines = [[],[]]
    for z in modes:
        mode_lines[0] += [z,z,None]
        mode_lines[1] += [0,.2,None]
    p.add("modes", *mode_lines, color=p.color(0), mode="lines", group="modes")
    # Generate the antimode lines.
    break_lines = [[],[]]
    for z in breaks:
        break_lines[0] += [z,z,None]
        break_lines[1] += [0,.2,None]
    p.add("seperator", *break_lines, color=p.color(3,alpha=.3), mode="lines", group="seperator")
    p.show()
    # Show CDF
    p = Plot()
    pdf = pdf_fit(data)
    p.add_func("CDF", cdf, cdf(), color=p.color(1))
    # Generate the mode lines.
    mode_lines = [[],[]]
    for z in modes:
        mode_lines[0] += [z,z,None]
        mode_lines[1] += [0,1,None]
    p.add("modes", *mode_lines, color=p.color(0), mode="lines", group="modes")
    # Generate the antimode lines.
    break_lines = [[],[]]
    for z in breaks:
        break_lines[0] += [z,z,None]
        break_lines[1] += [0,1,None]
    p.add("seperator", *break_lines, color=p.color(3,alpha=.3), mode="lines", group="seperator")
    p.show(append=True)