def _test_mesh(show=True): from weakly_admissable_meshes import polar_wam, box_wam from util.plot import Plot p = Plot("Weakly admissable mesh") p.add("Polar", *(polar_wam(8).T), color=p.color(0)) p.add("Box", *(box_wam(8).T), color=p.color(1)) if show: p.show(width=1000*.7, height=900*.7, append=True)
def __init__(self, dev, odpch='1', sign=True): self.odpch = odpch self.start = False self.sign = sign data = self.get_data(dev) data.dev = dev Plot.__init__(self, 'fx', data=data) self.root.title(self.odpcmd)
def _test_Distribution(display=False): # Verify that the distribution works under a weighted sum. import numpy as np d = [] count = 3 scale = 2**(-20) scale = 2**(30) for i in range(count): pts = np.random.random(20) * np.random.random() * scale d.append(cdf_fit(pts, fit="cubic")) wts = np.random.random((count, )) wts /= sum(wts) min_max = (-scale / 3, scale + scale / 3) if display: print(sum(dist * w for (dist, w) in zip(d, wts))) from util.plot import Plot p = Plot("Weighted cubic fit") out = sum(dist * w for (dist, w) in zip(d, wts)) p.add_func("Weighted sum", out, min_max) for i, (dist, w) in enumerate(zip(d, wts)): p.add_func(f"Dist {i+1} -- {round(w,3)}", dist, min_max, opacity=.3) p.show()
def test_support( model, low=0, upp=1, plot_points=3000, p=None, fun=lambda x: 3 * x[0] + .5 * np.cos(8 * x[0]) + np.sin(5 * x[-1]), N=20, D=2, random=True, seed=0): # Force D to be 2 D = 2 np.random.seed(seed) # Generate x points if random: x = np.random.random(size=(N, D)) else: N = int(round(N**(1 / D))) x = np.array( [r.flatten() for r in np.meshgrid(*[np.linspace(0, 1, N)] * D)]).T # Calculate response values y = np.array([fun(v) for v in x]) # Fit the model to the points model.fit(x, y) # Generate the plot from util.plotly import Plot if type(p) == type(None): p = Plot() p.add("Training Points", *x.T, color=p.color(len(x))) for i in range(len(x)): name = f"{i+1}" p.add(name, [x[i][0]], [x[i][1]], group=name) def supported(pt): pts, wts = model.points_and_weights(pt) return (i in pts) p.add_region(name + " region", supported, *([(low - .1, upp + .1)] * D), color=p.color(p.color_num), group=name, plot_points=plot_points, show_in_legend=False) # p.add_func(str(model), model, *([(low-.1,upp+.1)]*D), # plot_points=plot_points, vectorized=True) return p, x, y
def test_latin(display=False): from util.random import latin print("Testing latin..", end=" ") if display: print() if display: from util.plot import Plot D = 2 N = 400 p = Plot("Latin hyper cube design") # Add grid lines for the cube. for i in range(N + 1): p.add(f"gly {i}", [0, 1], [i / N] * 2, mode="lines", group="grid", color="rgba(.6,.1,.1,.2)", show_in_legend=(i == 0)) p.add(f"glx {i}", [i / N] * 2, [0, 1], mode="lines", group="grid", color="rgba(.6,.1,.1,.2)", show_in_legend=False) # Add the random points. p.add("Points", *(latin(N, D).T)) p.show(file_name="/Users/thomaslux/Desktop/lhc.html") from numpy import sort N = 10000 D = 1000 pts = latin(N, D) for i in range(D): values = sort(pts[:, i]) max_adj_diff = max(abs(values[:-1] - values[1:])) if max_adj_diff >= (2 / N): print("BAD!", " ", i, max_adj_diff, 1 / N, 2 / N) assert (max_adj_diff < (2 / N)) print("passed.")
def test(self, start, end): stock_info = {'stock': self.__stock_code, 'start': start, 'end': end} self.__mq.send(json.dumps(stock_info)) time.sleep(1) msg = self.__mq.recv() datas = json.loads(msg) _logger.info(datas) if datas and len(datas): plot = Plot(self.__stock_code, datas) current_time = datetime.now().strftime('%Y%m%d%H%M%S') filename = self.__stock_code + "_" + current_time plot.plot(filename)
def rank_by_slope(components, points, values, metric, max_pairs=10000, display=True): # Compute the magnitudes using average metric slope. if display: print(" computing average metric slope per component.. ", end="", flush=True) avg_slope = np.zeros(len(components)) update = end = "" for i in range(len(components)): update = "\b" * len(end) end = f"{i+1} of {len(components)}" if display: print(update, end=end, flush=True) x = np.matmul(points, components[i]) for (p1, p2) in gen_random_pairs(len(x), count=max_pairs): avg_slope[i] += metric(values[p1], values[p2]) / abs(x[p1] - x[p2]) print("Results so far:") from util.plot import Plot p = Plot() for (comp, slope) in zip(components, avg_slope): print(comp, slope) x, y = np.matmul(points, comp), values p.add(f"Points: {slope:.2f}", x, y) print() p.show() exit() # Invert the total average metric slope. if (min(avg_slope) <= 0.0): avg_slope = np.where(avg_slope == 0, 1., 0.) else: avg_slope = 1 / avg_slope avg_slope /= np.sum(avg_slope) # Re-order the components according to inverse average metric slope. order = np.argsort(avg_slope)[::-1] # If they are not already ordered correctly, re-order the returns. if not all(order[i] < order[i + 1] for i in range(len(order) - 1)): if display: print(" reordering components by average metric slope..", end="\r", flush=True) components, avg_slope = components[order], avg_slope[order] if display: print(" ", end="\r", flush=True) return components, avg_slope
def test_plot( model, low=0, upp=1, plot_points=3000, p=None, fun=lambda x: 3 * x[0] + .5 * np.cos(8 * x[0]) + np.sin(5 * x[-1]), N=20, D=2, noise=0., random=True, seed=0, x=None, y=None, classifier=False): np.random.seed(seed) provided_points = (type(x) != type(None)) and (type(y) != type(None)) if (type(x) == type(None)): # Generate x points if random: x = np.random.random(size=(N, D)) else: N = int(round(N**(1 / D))) x = np.array([ r.flatten() for r in np.meshgrid(*[np.linspace(0, 1, N)] * D) ]).T if (type(y) == type(None)): # Calculate response values y = np.array([ round(fun(v) + np.random.random() * noise) if classifier else fun(v) + np.random.random() * noise for v in x ]) # Fit the model to the points model.fit(x, y, classifier=classifier) # Generate the plot from util.plot import Plot if type(p) == type(None): p = Plot() if not provided_points: p.add("Training Points", *x.T, y) p.add_func(str(model), model, *([(low - .1, upp + .1)] * D), plot_points=plot_points, vectorized=True) # p.add_func("truth", fun, *([(low-.1,upp+.1)]*D), # plot_points=plot_points) return p, x, y
def _test_fekete(max_n=2000, max_d=1000, start_n=1000, start_d=10, steps=2, show=True): if show: # Generate some points in 3 dimensions for showing. pts = fekete_points(47, 3) from util.plot import Plot p = Plot("Fekete") p.add("fekete points", *(pts.T)) p.show(append=True) from util.system import Timer t = Timer() steps -= 1 for n in range(start_n,max_n+1, (max_n-start_n-1) // steps): for d in range(start_d, max_d+1, (max_d-start_d-1) // steps): t.start() print() print('-'*70) print(f" {n}, {d}", flush=True) pts = fekete_points(n, d) print(f" {pts.shape}") print(f" {t()} seconds", flush=True) t.stop() print() print()
else: # Two normals size = 200 points = np.random.normal(size=(size, 2)) points[:, 0] *= 1 / 3 # points += np.array([1.,-2.]) points = np.concatenate((points, np.random.normal(0, 2, size=(size, 2)))) points[-size:, 1] *= 1 / 3 dim = 2 vecs = np.random.random(size=(dim, points.shape[1])) # Normalize the vectors. for i in range(vecs.shape[0]): vecs[i] /= np.linalg.norm(vecs[i], ord=2) p = Plot() points = points - np.mean(points, axis=0) # Given a vector, return all points flipped onto the same side of the vector. def flipped_points(vec): # Get the signs signs = np.matmul(points, vec) signs[signs == 0] = 1. signs /= abs(signs) return (points.T * signs).T # Update each of the vectors to be the average of flipped points. def update_vecs():
def _test_cdf_fit(): import numpy as np from util.math import SMALL from util.random import cdf from util.plot import Plot n = 10000 f = cdf(nodes=5, fit="linear") sample = f.inverse(np.random.random((n, ))) g = cdf_fit(sample, fit=None) print("Fit first CDF point: ", g.nodes[0]) print("Fit second CDF point:", g.nodes[1]) print("Fit last CDF point: ", g.nodes[-1]) print("Expected max error:", samples(n, confidence=.99)) print("Actual max error: ", f - g) min_max = (f.min - SMALL, f.max + SMALL) p = Plot() p.add_func("Truth", f, min_max) p.add_func("EDF", g, min_max) p.show(show=False, height=700, width=800) p = Plot() p.add_func("Truth", f.inverse, min_max) p.add_func("EDF", g.inverse, min_max) p.show(append=True, height=700, width=800)
def _test_mpca(display=False): if display: print() print("-" * 70) print("Begin tests for 'mpca'") GENERATE_APPROXIMATIONS = True BIG_PLOTS = False SHOW_2D_POINTS = False import random # Generate some points for testing. np.random.seed(4) # 4 random.seed(0) # 0 rgen = np.random.RandomState(1) # 10 n = 100 points = (rgen.rand(n, 2) - .5) * 2 # points *= np.array([.5, 1.]) # Create some testing functions (for learning different behaviors) funcs = [ lambda x: x[1], # Linear on y lambda x: abs(x[0] + x[1]), # "V" function on 1:1 diagonal lambda x: abs(2 * x[0] + x[1]), # "V" function on 2:1 diagonal lambda x: x[0]**2, # Quadratic on x lambda x: (x[0] + x[1])**2, # Quadratic on 1:1 diagonal lambda x: (2 * x[0] + x[1])**3, # Cubic on 2:1 diagonal lambda x: (x[0]**3), # Cubic on x lambda x: rgen.rand(), # Random function ] # Calculate the response values associated with each function. responses = np.vstack(tuple(tuple(map(f, points)) for f in funcs)).T # Reduce to just the first function choice = 3 func = funcs[choice] response = responses[:, choice] # Run the princinple response analysis function. components, values = mpca(points, response) values /= np.sum(values) conditioner = np.matmul(components, np.diag(values)) if display: print() print("Components") print(components) print() print("Values") print(values) print() print("Conditioner") print(conditioner) print() components = np.array([[1.0, 0.], [0., 1.]]) values = normalize_error(np.matmul(points, components.T), response, abs_diff) values /= np.sum(values) if display: print() print() print("True Components") print(components) print() print("True Values") print(values) print() # Generate a plot of the response surfaces. from util.plot import Plot, multiplot if display: print("Generating plots of source function..") # Add function 1 p1 = Plot() p1.add("Points", *(points.T), response, opacity=.8) p1.add_func("Surface", func, [-1, 1], [-1, 1], plot_points=100) if GENERATE_APPROXIMATIONS: from util.approximate import NearestNeighbor, Delaunay, condition p = Plot() # Add the source points and a Delaunay fit. p.add("Points", *(points.T), response, opacity=.8) p.add_func("Truth", func, [-1, 1], [-1, 1]) # Add an unconditioned nearest neighbor fit. model = NearestNeighbor() model.fit(points, response) p.add_func("Unconditioned Approximation", model, [-1, 1], [-1, 1], mode="markers", opacity=.8) # Generate a conditioned approximation model = condition(NearestNeighbor, method="MPCA")() model.fit(points, response) p.add_func("Best Approximation", model, [-1, 1], [-1, 1], mode="markers", opacity=.8) if display: p.plot(show=False, height=400, width=650) if display: print("Generating metric principle components..") # Return the between vectors and the differences between those points. def between(x, y, unique=True): vecs = [] diffs = [] for i1 in range(x.shape[0]): start = i1 + 1 if unique else 0 for i2 in range(start, x.shape[0]): if (i1 == i2): continue vecs.append(x[i2] - x[i1]) diffs.append(y[i2] - y[i1]) return np.array(vecs), np.array(diffs) # Plot the between slopes to verify they are working. # Calculate the between slopes vecs, diffs = between(points, response) vec_lengths = np.sqrt(np.sum(vecs**2, axis=1)) between_slopes = diffs / vec_lengths bs = ((vecs.T / vec_lengths) * between_slopes).T # Extrac a random subset for display size = 100 random_subset = np.arange(len(bs)) rgen.shuffle(random_subset) bs = bs[random_subset[:size], :] # Normalize the between slopes so they fit on the plot max_bs_len = np.max(np.sqrt(np.sum(bs**2, axis=1))) bs /= max_bs_len # Get a random subset of the between slopes and plot them. p2 = Plot("", "Metric PCA on Z", "") p2.add("Between Slopes", *(bs.T), color=p2.color(4, alpha=.4)) if SHOW_2D_POINTS: # Add the points and transformed points for demonstration. new_pts = np.matmul(np.matmul(conditioner, points), np.linalg.inv(components)) p2.add("Original Points", *(points.T)) p2.add("Transformed Points", *(new_pts.T), color=p2.color(6, alpha=.7)) # Add the principle response components for i, (vec, m) in enumerate(zip(components, values)): vec = vec * m p2.add(f"PC {i+1}", [0, vec[0]], [0, vec[1]], mode="lines") ax, ay = (vec / sum(vec**2)**.5) * 3 p2.add_annotation(f"{m:.2f}", vec[0], vec[1]) p3 = Plot("", "PCA on X", "") p3.add("Points", *(points.T), color=p3.color(4, alpha=.4)) # Add the normal principle components components, values = pca(points) values /= np.sum(values) for i, (vec, m) in enumerate(zip(components, values)): vec = vec * m p3.add(f"PC {i+1}", [0, vec[0]], [0, vec[1]], mode="lines") ax, ay = (vec / sum(vec**2)**.5) * 3 p3.add_annotation(f"{m:.2f}", vec[0], vec[1]) if BIG_PLOTS: if display: p1.plot(file_name="source_func.html", show=False) if display: p2.plot(append=True, x_range=[-8, 8], y_range=[-5, 5]) else: # Make the plots (with manual ranges) p1 = p1.plot(html=False, show_legend=False) p2 = p2.plot(html=False, x_range=[-1, 1], y_range=[-1, 1], show_legend=False) p3 = p3.plot(html=False, x_range=[-1, 1], y_range=[-1, 1], show_legend=False) # Generate the multiplot of the two side-by-side figures if display: multiplot([p1, p2, p3], height=126, width=650, append=True) if display: print("-" * 70)
def _test_samples(display=True, test_correctness=False): from util.math import Fraction from util.plot import Plot if display: for size in tuple(range(2, 42)) + (128, 129, 256, 257): p = Plot(f"Error at x with {size} samples") for confidence in (Fraction(9, 10), Fraction(185, 200), Fraction(95, 100), Fraction(97, 100), Fraction(99, 100)): f = lambda x: samples( size=size, confidence=confidence, at=x[0]) p.add_func(f"{confidence} confidence", f, [0, 1]) p.show(append=True, show=(size == 2)) exit() if display: print() print("-" * 70) print("Begin tests for 'samples'") print() key_values = [ (11, Fraction(3, 10), Fraction(95, 100)), (25, Fraction(2, 10), Fraction(95, 100)), (97, Fraction(1, 10), Fraction(95, 100)), (385, Fraction(5, 100), Fraction(95, 100)), (9604, Fraction(1, 100), Fraction(95, 100)), (19, Fraction(3, 10), Fraction(99, 100)), (42, Fraction(2, 10), Fraction(99, 100)), (166, Fraction(1, 10), Fraction(99, 100)), (664, Fraction(5, 100), Fraction(99, 100)), (16588, Fraction(1, 100), Fraction(99, 100)), (33733, Fraction(1, 10), Fraction(999, 1000)), (134930, Fraction(5, 100), Fraction(999, 1000)), (3373242, Fraction(1, 100), Fraction(999, 1000)), ] if display: print("samples (max error, confidence)") for (s, e, c) in key_values[:-3]: needed = samples(error=e, confidence=c) if display: print("needed: ", needed) print("%6d (%2.0f%%, %2.0f%%)" % (needed, 100 * e, 100 * c)) # if (s != None): assert(needed == s) if display: print() for n in (10, 25, 90, 350, 9000): print( f"With {n:4d} samples we are 95% confident in max CDF error <=", round(samples(n, confidence=.95), 1 if n < 350 else 2)) print() for n in (20, 40, 160, 660, 16000): print( f"With {n:5d} samples we are 99% confident in max CDF error <=", round(samples(n, confidence=.99), 1 if n < 600 else 2)) print("-" * 70) if test_correctness: # Generate a random CDF from util import random from util.plot import Plot TESTS = 10000 DIFFS = 100 N = 100 fit = "linear" truth = random.cdf(nodes=5, fit=fit) max_error = samples(N, confidence=.99) if display: print("Largest expected error:", max_error) mid_error = [] errors = {(1 / 100): [], (1 / 4): [], (1 / 3): [], (1 / 2): []} max_errors = [] mean_failed = [] for i in range(TESTS): sample = truth.inverse(np.random.random((N, ))) guess = cdf_fit(sample, fit=fit) diff = truth - guess diff_func = lambda x: abs(truth(x) - guess(x)) diffs = diff_func(np.linspace(0, 1, DIFFS)) mean_failed += [sum(diffs > max_error)] if display: print( f"Failed: {mean_failed[-1]:4d} {sum(mean_failed)/len(mean_failed):.0f}" ) max_errors.append(diff) for v in errors: errors[v].append(truth(v) - guess(v)) # if (diff > max_error): # print(i, N, diff, max_error) # p = Plot() # p.add_func("Truth", truth, truth()) # p.add_func("Guess", guess, guess()) # p.add_func("Error", diff_func, truth()) # p.show(show=False) # p = Plot() # p.add_func("Truth Inverse", truth.inverse, (0.,1.)) # p.add_func("Guess Inverse", guess.inverse, (0.,1.)) # p.show(show=False, append=True) # break total_failed = sum(e > max_error for e in max_errors) if display or (total_failed > 0): print( f"Failed {total_failed} out of {TESTS}, or {100*total_failed/TESTS:.1f}%." ) p = Plot() # Add the distribution of maximum errors. f = cdf_fit(max_errors) p.add_func(f"{len(max_errors)} max errors", f, f()) # Add the distribution of errors at different values. for v in sorted(errors)[::-1]: mean = np.mean(errors[v]) std = np.std(errors[v]) f = cdf_fit(errors[v]) p.add_func( f"{v:.1f} errors ({mean:.1e}, {std:.1e}) {samples(N,confidence=.99):.2f}", f, f()) p.show(append=True) p = Plot() p.add_func("Truth", truth, truth()) p.show(append=True)
# Get the matrices. train_mat = train.to_matrix() test_mat = test.to_matrix() train_x = train_mat[:, in_idxs] test_x = test_mat[:, out_idxs] m = Voronoi() m.fit(train_x) lengths += [len(ids) for ids, wts in m(test_x)] save(lengths) print("len(lengths): ", len(lengths)) from util.plot import Plot from util.stats import cdf_fit p = Plot("Distribution of Number of Influencers", "Number of records used to make prediction", "CDF value") cdf = cdf_fit(lengths) p.add_func("lengths", cdf, cdf(), show_in_legend=False, color=p.color(1)) p.show() exit() from util.approximate import Voronoi, NearestNeighbor, NeuralNetwork, DecisionTree from util.approximate import ShepMod, BoxMesh, LSHEP, Delaunay from util.approximate.testing import test_plot # model = NearestNeighbor() # model = Voronoi() # model = DecisionTree() # model = NeuralNetwork()
def _test_epdf_diff(display=False): if display: print() print("-" * 70) print("Begin tests for 'epdf_diff'") # ---------------------------------------------------------------- def demo(seq): if display: print('~' * 70) print(len(seq), seq) print() total = 0 for vals in edf_pair_gen(seq): total += vals[-1] if display: print("[% 4s, % 3s] (%.2f) --" % vals, round(total, 3)) if display: print('~' * 70) print() demo([0] + list(range(9))) demo(sorted(np.random.random(size=(10, )))) demo(list(range(9)) + [8]) # ---------------------------------------------------------------- # a = [1, 1, 3, 3, 5, 6] # b = [0, 1, 2, 3, 4] # n = 100 if display: print(f"a = [0,100] ({n} samples)") print(f"b = [v + d for v in a]") print() for d in (.0, .01, .1, .5, .55, .9, 1., 1.5): a = [v / n for v in list(range(n + 1))] b = [v + d for v in a] if display: print( f"d = {d:.2f} a~b = {epdf_diff(a,b):.2f} b~a = {epdf_diff(b,a):.2f} a~a = {epdf_diff(a,a):.2f} b~b = {epdf_diff(b,b):.2f}" ) if display: print() for d in (.0, .01, .1, .5, .55, .9, 1., 1.5): # Generate a random sequence. a = sorted((np.random.random(size=(10, )))) b = sorted((np.random.random(size=(1000, )) + d)) diff = epdf_diff(a, b) if display: print(f"d = {d:.2f}", "", "[%.2f, %.2f]" % (min(a), max(a)), "[%.2f, %.2f]" % (min(b), max(b)), "", diff) if display: print() # ---------------------------------------------------------------- from util.plot import Plot # Generate a random sequence. a = sorted((np.random.random(size=(2000, )))) b = sorted((np.random.random(size=(2000, )))) p = Plot("Empirical PDF Diff Test") p1 = pdf_fit(a, smooth=0.00001) p2 = pdf_fit(b, smooth=0.00001) p.add_func("a", p1, p1()) #(-.5,2)) p.add_func("b", p2, p2()) #(-.5,2)) if display: p.show(show=False, y_range=[-.5, 1.5]) p = Plot("Empirical CDF Diff Test") p1 = cdf_fit(a) p2 = cdf_fit(b) p.add_func("a", p1, p1()) #(-.5,2)) p.add_func("b", p2, p2()) #(-.5,2)) if display: p.show(append=True) # ---------------------------------------------------------------- if display: print("-" * 70)
def _test_fit_funcs(display=False): if display: print() print("-" * 70) print("Begin tests for 'fit_funcs'") from util.plot import Plot # ============================================== # Test the fit functions and smoothing # ============================================== # Make data smooth = .1 data = np.random.normal(size=(1000, )) # data[:len(data)//2] += 2 min_max = (min(data) - .1, max(data) + .1) if display: print() print("(min, max) : (%.2f, %.2f)" % (min_max)) print("Normal confidence: %.2f%%" % (100 * normal_confidence(data))) print() # Make PDF fits pfit = pdf_fit(data) smooth_pfit = pdf_fit(data, smooth=smooth) # Make CDF fits cfit = cdf_fit(data) stdev = .05 * (cfit.max - cfit.min) smooth_cfit = gauss_smooth(cfit, stdev) stdev = smooth * (cfit.max - cfit.min) smoother_cfit = gauss_smooth(cfit, stdev) # Make PDF plots p = Plot() p.add_func("PDF", pfit, min_max) # Make smooth PDF p.add_func("Smooth PDF", smooth_pfit, min_max) if display: p.show(show=False) # Make CDF plots p = Plot() p.add_func("CDF", cfit, min_max) # Make CDF whose derivative is the default PDF. p.add_func("CDF for default PDF", smooth_cfit, min_max) # Make smoother cdf. p.add_func("Smooth CDF", smoother_cfit, min_max) if display: p.show(append=True) # Make an animation transitioning between two normal distributions. np.random.seed(0) d1 = np.random.normal(0, .5, size=(500, )) d2 = np.random.normal(3, 1, size=(500, )) f1 = cdf_fit(d1, smooth=.1) f2 = cdf_fit(d2, smooth=.1) p = Plot() for w in np.linspace(0, 1, 21): w = round(w, 2) f3 = w * f1 + (1 - w) * f2 p.add_func("0, 1/2", f1, f1(), frame=w) p.add_func("3, 1", f2, f2(), frame=w) p.add_func("weighted sum", f3, f3(), frame=w) if display: p.show(bounce=True, append=True) if display: print() if display: print("-" * 70)
# PCA versus MPCA-10 colors = {} plots = [] for data_name in ("yelp", "mnist", "cifar"): for sample_ratio in (1, ): for a in algs: # Skip uninteresting sets. if (data_name == "yelp") and (a != "KNN10"): continue if (data_name == "mnist") and (a != "KNN1"): continue if (data_name == "cifar") and (a != "KNN10"): continue # Break up data by dimension y_axis = "Count" if data_name == "yelp" else "" p = Plot("", data_name + " errors", y_axis, font_size=20, font_family="times") plots.append(p) for method in ("MPCA", "PCA"): for dim in dims: # Color by method if method not in colors: colors[method] = p.color(len(colors)) color = colors[method] # Reduce data d = all_data[all_data["Data"] == data_name] d = d[d["Method"] == method] d = d[d["Sample Ratio"] == sample_ratio] d = d[d["Algorithm"] == a] d = d[d["Dimension"] == dim]
print("Components") print(components) print() print("Values") print(values) print() print("Conditioner") print(conditioner) print() # Generate a plot of the response surfaces. from util.plot import Plot, multiplot print("Generating plots of source function..") # Add function 1 p1 = Plot(font_family="times") p1.add("Points", *(points.T), response, opacity=.8) p1.add_func("Surface", func, [-1,1], [-1,1], plot_points=1000, color=p1.color(1)) if GENERATE_APPROXIMATIONS: from util.algorithms import NearestNeighbor model = NearestNeighbor() model.fit(points, response) p1.add_func("Unconditioned Approximation", model, [-1,1], [-1,1], mode="markers", opacity=.8) # Generate a conditioned approximation model = NearestNeighbor() model.fit(np.matmul(points, conditioner), response) approx = lambda x: model(np.matmul(x, conditioner)) p1.add_func("Best Approximation", approx, [-1,1], [-1,1], mode="markers", opacity=.8)
if (len(algorithms) > 0): d = d[d["Algorithm"] == algorithms] print(d) d.save("all-data.pkl") print() config_cols = ["Function", "Dimension"] configs = d[:, config_cols].unique() configs.sort() configs.max_display = float('inf') print(configs) for conf in configs: plots = [] for S in sorted(set(d["SNR"])): d_conf = d[d[:, config_cols + ["SNR"]] == (list(conf) + [S])] F, D = conf p = Plot(f"{D}D '{F}' function", f"SNR = {S}", "|<i>error</i>|", font_family="times") #b"log(|<i>error</i>|)") seen = {} for algorithm in sorted(set(d["Algorithm"])): d_alg = d_conf[d_conf["Algorithm"] == algorithm] print(d_alg) values = [] locations = [] # Sort the algorithm data by the d_alg.sort(key=lambda i: i["Train"]) for row in d_alg: if (row["Train"] < row["Dimension"]): continue locations.append("N = " + str(row["Train"])) values.append([v for v in row["Abs Errors"]]) p.add_box(algorithm, values,
if __name__ == "__main__": from util.plot import Plot from util.approximate.testing import test_plot model = BoxMesh() p,x,y = test_plot(model) p.plot(show=False) x = model.points.T print(x) # ============================================== # Display the boxes that were computed # ============================================== p = Plot() # Get the extreme points. min_x = np.min(x[:,0]) - .1 max_x = np.max(x[:,0]) + .1 min_y = np.min(x[:,1]) - .1 max_y = np.max(x[:,1]) + .1 # Get the box edges (about the centers). boxes = model.box_sizes.T boxes[:,0] = np.where(boxes[:,0] != -1, boxes[:,0], min_x) boxes[:,1] = np.where(boxes[:,1] != -1, boxes[:,1], min_y) boxes[:,2] = np.where(boxes[:,2] != -1, boxes[:,2], max_x) boxes[:,3] = np.where(boxes[:,3] != -1, boxes[:,3], max_y) # Add boxes to plot. for i,(pt,bounds) in enumerate(zip(x, boxes)): shifts = np.array([[- boxes[i,0], - boxes[i, 1]], [ boxes[i,2], - boxes[i, 1]],
def test_random_cdf(display=True): if not display: return from util.plot import Plot p = Plot("") for nodes in range(1, 100): f = cdf() p.add_func(f"Random PDF {nodes}", f.derivative, f(), color=p.color(nodes - 1), group=nodes) # p.add(f"Points {nodes}", *list(zip(*f.nodes)), # color=p.color(nodes-1,alpha=.3), group=nodes) p.show(show=False) print() p = Plot("") for nodes in range(1, 30): f = cdf(nodes=nodes) p.add_func(f"Random {nodes} node CDF", f, f(), color=p.color(nodes - 1), group=nodes) p.add(f"Points {nodes}", *list(zip(*f.nodes)), color=p.color(nodes - 1, alpha=.3), group=nodes) p.show(append=True)
result = ag.aggregate_dataset(dataset, granularity) print 'done.' print 'imputing nan and normalizing set...', result_nan = ag.impute_nan(result) result_norm = ag.normalize_data(result_nan) print 'done.' print 'constructing training and test set...', #input_attributes, input_training, input_test, output_attributes, output_training, output_test = ag.identify_rnn_dataset(result_norm, 'AS14.01', 0.5, ['mood'], ['circumplex.arousal', 'circumplex.valence']) input_attributes, input_training, input_test, output_attributes, output_training, output_test = ag.identify_regression_dataset(result_norm, 'AS14.01', 0.5, ['mood'], ['circumplex.arousal', 'circumplex.valence']) print ' done.' print 'regression...', regr = Regression() regr.train_regression(input_training, input_attributes, output_training, output_attributes) Y_regression = regr.test_regression(input_test, output_test) print 'done' print 'echo state network' esn = EchoStateNetwork() esn.initializeNetwork(len(input_attributes), len(output_attributes), 10, True) print 'training the network...', esn.trainNetwork(input_training, output_training) print 'done.' print 'testing the network...', Y_esn = esn.testNetwork(input_test, output_test) print 'done.' p1 = Plot() p1.plot_results(output_test, Y_regression, output_attributes, 'regression') p2 = Plot() p2.plot_results(output_test, Y_esn, output_attributes, 'ESN')
def modes(data, confidence=.99, tol=1/1000): from util.optimize import zero num_samples = len(data) error = 2*samples(num_samples, confidence=confidence) cdf = cdf_fit(data, fit="cubic") print("error: ",error) # Find all of the zeros of the derivative (mode centers / dividers) checks = np.linspace(cdf.min, cdf.max, np.ceil(1/tol)) second_deriv = cdf.derivative.derivative deriv_evals = second_deriv(checks) modes = [i for i in range(1, len(deriv_evals)) if (deriv_evals[i-1] * deriv_evals[i] <= 0) and (deriv_evals[i-1] >= deriv_evals[i])] antimodes = [i for i in range(1, len(deriv_evals)) if (deriv_evals[i-1] * deriv_evals[i] <= 0) and (deriv_evals[i-1] < deriv_evals[i])] # Compute exact modes and antimodes using a zero-finding function. modes = [zero(second_deriv, checks[i-1], checks[i]) for i in modes] antimodes = [zero(second_deriv, checks[i-1], checks[i]) for i in antimodes] original_antimodes = antimodes[:] # Fix the bounds of the antimodes to match the distribution. if modes[0] < antimodes[0]: antimodes = [cdf.min] + antimodes else: antimodes[0] = cdf.min if modes[-1] > antimodes[-1]: antimodes += [cdf.max] else: antimodes[-1] = cdf.max # Make sure that there is an antimode between each mode. for i in range(len(modes)): if antimodes[i] > modes[i]: # Update the next antimode with this one (as long as it's not the max). if (i < len(modes)-1): antimodes[i+1] = (antimodes[i] + antimodes[i+1]) / 2 # Always update this antimode to properly be LESS than the mode. antimodes[i] = (modes[i] + modes[i-1]) / 2 print("len(modes): ",len(modes)) print("len(antimodes): ",len(antimodes)) # Define a function that counts the number of modes thta are too small. def count_too_small(): return sum( (cdf(upp) - cdf(low)) < error for (low,upp) in zip(antimodes[:-1],antimodes[1:]) ) # Show PDF from util.plot import Plot p = Plot() pdf = pdf_fit(cdf.inverse(np.random.random((1000,)))) # Loop until all modes are big enough to be accepted given error tolerance. step = 1 while count_too_small() > 0: print() print("step: ",step, (len(modes), len(antimodes))) f = len(modes) p.add_func("PDF", pdf, cdf(), color=p.color(1), frame=f, show_in_legend=(step==1)) # Generate the mode lines. mode_lines = [[],[]] for z in modes: mode_lines[0] += [z,z,None] mode_lines[1] += [0,.2,None] p.add("modes", *mode_lines, color=p.color(0), mode="lines", group="modes", show_in_legend=(z==modes[0] and step==1), frame=f) # Generate the antimode lines. anti_lines = [[],[]] for z in antimodes: anti_lines[0] += [z,z,None] anti_lines[1] += [0,.2,None] p.add("seperator", *anti_lines, color=p.color(3,alpha=.3), mode="lines", group="seperator", show_in_legend=(z==antimodes[0] and (step==1)), frame=f) step += 1 # Compute the densities and the sizes of each mode. sizes = [cdf(antimodes[i+1]) - cdf(antimodes[i]) for i in range(len(modes))] densities = [(cdf(antimodes[i+1]) - cdf(antimodes[i])) / (antimodes[i+1] - antimodes[i]) for i in range(len(modes))] # Compute those modes that have neighbors that are too small. to_grow = [i for i in range(len(modes)) if (i > 0 and sizes[i-1] < error) or (i < len(sizes)-1 and sizes[i+1] < error)] if len(to_grow) == 0: break print("modes: ",modes) print("antimodes: ",antimodes) print("sizes: ",sizes) print("densities: ",densities) print("to_grow: ",to_grow) # Sort the modes to be grown by their size, largest first. to_grow = sorted(to_grow, key=lambda i: -densities[i]) # Keep track of the modes that have already been absorbed. preference = {} taken = set() conflicts = set() modes_to_remove = [] anti_to_remove = [] while len(to_grow) > 0: i = to_grow.pop(0) # Pick which of the adjacent nodes to absorb. to_absorb = None if (i < len(modes)-1) and (sizes[i+1] < error): direction = 1 to_absorb = i + 1 if (i > 0) and (sizes[i-1] < error): # If there wasn't a right mode, take the left by default. if (to_absorb == None): direction = -1 to_absorb = i - 1 # Otherwise we have to pick based on the density similarity. elif (abs(modes[i-1]-modes[i]) < abs(modes[i+1]-modes[i])): # Take the other one if its density is more similar. direction = -1 to_absorb = i - 1 # If there is no good option to absorb, the skip. if (to_absorb in preference): continue # Record the preferred pick of this mode. preference[i] = (direction, to_absorb) # If this mode is already absorbed, then add it to conflict list. if to_absorb in taken: conflicts.add( to_absorb ) # Remove the ability to 'absorb' from modes getting absorbed. if to_absorb in to_grow: to_grow.remove(to_absorb) # Add the absorbed value to the set of "taken" modes. taken.add(to_absorb) # Resolve conflicts by giving absorbed modes to closer modes. for i in sorted(conflicts, key=lambda i: -densities[i]): if (abs(modes[i-1] - modes[i]) < abs(modes[i+1] - modes[i])): preference.pop(i+1) else: preference.pop(i-1) # Update the boundaries for i in sorted(preference, key=lambda i: -densities[i]): direction, to_absorb = preference[i] # Update the boundary of this mode. antimodes[i+(direction>0)] = antimodes[to_absorb + (direction>0)] # Update the "to_remove" lists. anti_to_remove.append( antimodes[to_absorb + (direction>0)] ) modes_to_remove.append( modes[to_absorb] ) # Remove the modes and antimodes that were merged. for m in modes_to_remove: modes.remove(m) for a in anti_to_remove: antimodes.remove(a) # Update the remaining antimodes to be nearest to the middle # of the remaining modes (making them representative dividers). for i in range(len(modes)-1): middle = (modes[i] + modes[i+1]) / 2 closest = np.argmin([abs(oam - middle) for oam in original_antimodes]) antimodes[i+1] = original_antimodes[closest] f = len(modes) p.add_func("PDF", pdf, cdf(), color=p.color(1), frame=f, show_in_legend=(step==1)) # Generate the mode lines. mode_lines = [[],[]] for z in modes: mode_lines[0] += [z,z,None] mode_lines[1] += [0,.2,None] p.add("modes", *mode_lines, color=p.color(0), mode="lines", group="modes", show_in_legend=(z==modes[0] and step==1), frame=f) # Generate the antimode lines. anti_lines = [[],[]] for z in antimodes: anti_lines[0] += [z,z,None] anti_lines[1] += [0,.2,None] p.add("seperator", *anti_lines, color=p.color(3,alpha=.3), mode="lines", group="seperator", show_in_legend=(z==antimodes[0] and (step==1)), frame=f) p.show(append=True, y_range=[0,.15]) p = Plot() p.add_func("CDF", cdf, cdf(), color=p.color(1)) for z in modes: p.add("modes", [z,z], [0,1], color=p.color(0), mode="lines", group="modes", show_in_legend=(z==modes[0])) for z in antimodes: p.add("seperator", [z,z], [0,1], color=p.color(3), mode="lines", group="sep", show_in_legend=(z==antimodes[0])) p.show(append=True)
def init_layout(self): Plot.init_layout(self) self.add_wdgts() self.fig.canvas.mpl_connect('button_press_event', self.double_button_cb)
def plot_cb3(self): Plot.plot_cb3(self, io_start_after_idle=self.start) #self.root.after_idle(self.io.start) return True
def main(): plt = Plot() analyze_hidden_units(plt) analyze_learning_rate(plt) analyze_momentum(plt) analyze_batch_size(plt)
def modes(data, confidence=.99, tol=1/1000): from util.optimize import zero num_samples = len(data) error = 2*samples(num_samples, confidence=confidence) print() print("Smallest allowed mode: ",error) print() # Get the CDF points (known to be true based on data). x, y = cdf_points(data) cdf = cdf_fit((x,y), fit="linear") x, y = x[1:], y[1:] # Generate the candidate break points based on linear interpolation. slopes = [(y[i+1] - y[i]) / (x[i+1] - x[i]) for i in range(len(x)-1)] candidates = [i for i in range(1,len(slopes)-1) if (slopes[i] < slopes[i-1]) and (slopes[i] < slopes[i+1])] # Sort candidates by their 'width', the widest being the obvious divisors. candidates = sorted(candidates, key=lambda i: -(x[i+1] - x[i])) print("candidates: ",candidates) print("slopes: ",[slopes[c] for c in candidates]) # Break the data at candidates as much as can be done with confidence. breaks = [min(x), max(x)] sizes = [1.] chosen = [] print() print("breaks: ",breaks) print("sizes: ",sizes) print("chosen: ",chosen) print() # Loop until there are no candidate break points left. while len(candidates) > 0: new_break_idx = candidates.pop(0) new_break = (x[new_break_idx + 1] + x[new_break_idx]) / 2 b_idx = np.searchsorted(breaks, new_break, side="right") # Compute the CDF values at the upper, break, and lower positions. upp = cdf(breaks[b_idx+1]) if (b_idx < len(breaks)-1) else cdf.max mid = cdf(new_break) low = cdf(breaks[b_idx-1]) print() print("new_break: ", new_break) print("b_idx: ",b_idx) print(" upp: ",upp, upp - mid) print(" mid: ",mid) print(" low: ",low, mid - low) # Compute the size of the smallest mode resulting from the break. smallest_result = min(upp - mid, mid - low) # Skip the break if it makes a mode smaller than error. if smallest_result < error: continue print() print("Num_modes: ", len(sizes)) print("breaks: ", breaks) print("sizes: ", sizes) print() # Update the "breaks" and "sizes" lists. breaks.insert(b_idx, new_break) sizes.insert(b_idx, upp - mid) sizes[b_idx-1] = mid - low chosen.append(new_break_idx) # From the "breaks" and "sizes", construct a list of "modes". # Consider the most dense point between breaks the "mode". modes = [] for i in range(len(chosen)): low = 0 if (i == 0) else chosen[i-1] upp = len(slopes)-1 if (i == len(chosen)-1) else chosen[i+1] mode_idx = low + np.argmax(slopes[low:upp]) modes.append( (x[mode_idx+1] + x[mode_idx]) / 2 ) from util.plot import Plot p = Plot() pdf = pdf_fit(data) p.add_func("PDF", pdf, pdf(), color=p.color(1)) # Generate the mode lines. mode_lines = [[],[]] for z in modes: mode_lines[0] += [z,z,None] mode_lines[1] += [0,.2,None] p.add("modes", *mode_lines, color=p.color(0), mode="lines", group="modes") # Generate the antimode lines. break_lines = [[],[]] for z in breaks: break_lines[0] += [z,z,None] break_lines[1] += [0,.2,None] p.add("seperator", *break_lines, color=p.color(3,alpha=.3), mode="lines", group="seperator") p.show() # Show CDF p = Plot() pdf = pdf_fit(data) p.add_func("CDF", cdf, cdf(), color=p.color(1)) # Generate the mode lines. mode_lines = [[],[]] for z in modes: mode_lines[0] += [z,z,None] mode_lines[1] += [0,1,None] p.add("modes", *mode_lines, color=p.color(0), mode="lines", group="modes") # Generate the antimode lines. break_lines = [[],[]] for z in breaks: break_lines[0] += [z,z,None] break_lines[1] += [0,1,None] p.add("seperator", *break_lines, color=p.color(3,alpha=.3), mode="lines", group="seperator") p.show(append=True)