def example_approx(N=30, dim=2, batch_number=5): print('\n***mds.example_disk_batch()***\n') Y = misc.disk(N, dim) labels = misc.labels(Y) plt.figure() plt.scatter(Y[:, 0], Y[:, 1], c=labels) plt.title('Original data') plt.draw() plt.pause(0.1) D = distances.compute(Y) title = 'basic disk example using approximate gradient' mds = MDS(D, dim=dim, verbose=1, title=title, labels=labels) mds.initialize() mds.approximate(verbose=2, max_iters=200, lr=0.1, batch_number=batch_number, algorithm='gd') mds.figureX(title='Final embedding') mds.figureH() plt.show()
def mds_comparison(): max_iters = 100 save_frequency = 10 import misc, distances, mds print('\n*** gd.mds_comparison() ***\n') Y = misc.disk(30,2) plt.figure() plt.plot(Y[:,0],Y[:,1],'o') plt.title('Original data') plt.draw() plt.pause(0.1) D = distances.compute(Y) vis = mds.MDS(D,dim=2,verbose=1) vis.initialize_Y() vis.optimize(algorithm='gd',max_iters=max_iters,save_cost=True, learning_rate=0.01,from_scratch=True, save_frequency=save_frequency,label='mds, lr=0.01',verbose=2) vis.optimize(algorithm='gd',max_iters=max_iters,save_cost=True, learning_rate=0.1,from_scratch=True, save_frequency=save_frequency,label='mds, lr=0.05',verbose=2) vis.optimize(algorithm='agd',max_iters=max_iters,save_cost=True, from_scratch=True,save_frequency=save_frequency,verbose=2) plt.show()
def agd_multiview_mds_standard(N=100,trials=3,runs=5): """\ Test convergence of adaptive gradient descent for multiview MDS with standard projections, by solving a single multiview MDS problem using multiple initial parameters. """ print('*** test.agd_multiview_mds_standard() ***') print(f' N : {N}') print() for i in range(trials): print(f' Trial # {i}') print(' Normalized cost :') X = misc.disk(N,dim=3) persp = perspective.Persp() persp.fix_Q(special='standard') Y = persp.compute_Y(X) D = distances.compute(Y) mv = multiview.Multiview(D,persp=persp) mv.setup_visualization(visualization='mds') stress = [] for run in range(runs): mv.initialize_X() mv.optimize_X(algorithm='agd') stress.append(mv.ncost) print(f' {mv.ncost:0.2e}') print()
def generate_physical(N, dim=3): """\ Generates a dissimilarity graph from the distances of coordinates. """ X = misc.disk(N, dim=dim) D = from_coordinates(X) return D
def varying_view_number(N=100,runs=1): """\ Multiview-MDS experiment with varying number of views """ view_number = range(1,11) print('*** test.varying_view_number() ***') print(f' N : {N}') print() X = misc.disk(N,dim=3) persp = perspective.Persp(dimX=3,dimY=2,family='linear', restriction='orthogonal') cost = [] for i in view_number: persp.fix_Q(number=i,random='orthogonal') Y = persp.compute_Y(X) D = distances.compute(Y) mv = multiview.Multiview(D,persp=persp) mv.setup_visualization(visualization='mds') mv.initialize_X(number=runs) mv.optimize_X(algorithm='agd') cost.append(mv.ncost) print(f' {i:>2} : {mv.ncost:0.2e}') print()
def disk(N=128, weights=None, **kwargs): #basic disk example #N is number of points #weights: use None or 'reciprocal' or array, etc print('\n***disk example***\n') X = misc.disk(N, 2) colors = misc.labels(X) distances = scipy.spatial.distance.pdist(X) title = 'basic disk example' mds = MDS(distances, weights=weights, dim=2, verbose=2, title=title, sample_colors=colors) fig, ax = plt.subplots(1, 3, figsize=(9, 3)) fig.suptitle('MDS - disk data') fig.subplots_adjust(top=0.80) mds.plot_embedding(title='initial embedding', ax=ax[0]) mds.gd(min_cost=1e-6, **kwargs) mds.plot_computations(ax=ax[1]) mds.plot_embedding(title='final embedding', ax=ax[2]) plt.draw() plt.pause(1.0)
def time(): print('\n***mpse_test.time()***') N = [int(10**a) for a in [1, 1.5, 2, 2.5]] repeats = 3 successes = np.zeros(len(N)) ratios = np.zeros(len(N)) time = np.zeros(len(N)) for i in range(len(N)): for j in range(repeats): X = misc.disk(N[i], dim=3) proj = projections.PROJ() Q = proj.generate(number=3, method='standard') D = multigraph.multigraph_from_projections(proj, Q, X) vis = mpse.MPSE(D, verbose=1) vis.gd(min_step=1e-4, verbose=1) if vis.cost < 1e-3: successes[i] += 1 time[i] += vis.H['time'] if successes[i] != 0: time[i] /= successes[i] ratios[i] = successes[i] / repeats fig = plt.plot() plt.loglog(N, time) plt.xlabel('number of points') plt.ylabel('time') plt.title('computation time') plt.show()
def stress_vs_estimate(N=128,dim=2): noise2signal = 10**np.arange(-2.5,0.5,0.5); num1=len(noise2signal) average_neighbors = [2,8,32,128,512]; num2=len(average_neighbors) its = 10 X = misc.disk(N,dim=dim) D = distance_matrix(X,X) signal_std = np.std(X) noise = np.sqrt(noise2signal)*signal_std vis = mds.MDS(D,dim=dim) for i in range(num1): X_noisy = X+np.random.randn(N,dim)*noise2signal[i] true_stress = mds.stress_function(X_noisy,D,estimate=False) print(f'exact stress : {true_stress:0.2e}') for j in range(num2): print(f' average neighbors : {average_neighbors[j]}') stress = 0 stresses = [] for k in range(its): stresses.append(mds.stress_function( X_noisy,D,estimate=average_neighbors[j])) print(f' average stress : {np.average(stresses):0.2e} '+\ f'[{abs(np.average(stresses)-true_stress)/true_stress:0.2e}]') print(f' standard deviation : {np.std(stresses):0.2e} '+\ f'[{np.std(stresses)/true_stress:0.2e}]') print(f' minimum stress : {min(stresses):0.2e} '+\ f'[{abs(min(stresses)-true_stress)/true_stress:0.2e}]') print(f' maximum stress : {max(stresses):0.2e} '+\ f'[{abs(max(stresses)-true_stress)/true_stress:0.2e}]')
def disk_compare(N=100,dim=2): ### print('\n***mds.disk_compare()***') X = misc.disk(N,2); labels = misc.labels(X) plt.figure() plt.scatter(X[:,0],X[:,1],c=labels) plt.title('original data') plt.draw() plt.pause(0.1) D = distances.compute(X) mds = MDS(D,dim=dim,verbose=1,title='disk experiments',labels=labels) mds.initialize() mds.figureX(title='initial embedding') title = 'full gradient & agd' mds.optimize(algorithm='agd',verbose=2,label=title) mds.figureX(title=title) mds.figureH(title=title) mds.forget() title = 'approx gradient & gd' mds.approximate(algorithm='gd',verbose=2,label=title) mds.figureX(title=title) mds.figureH(title=title) mds.forget() title = 'combine' mds.approximate(algorithm='gd',verbose=2,label=title) mds.optimize(verbose=2,label=title,max_iters=10) mds.figureX(title=title) mds.figureH(title=title) plt.show()
def example_fewer_edges(N=100, dim=2): print('\n***mds.example_fewer_edges()***\n') print( 'Here we explore the MDS embedding for a full graph as far way edges' + 'are removed') title = 'MDS embedding for multiple proportion of edges' X = misc.disk(N, dim) colors = misc.labels(X) D = multigraph.from_coordinates(X, colors=colors) X0 = misc.disk(N, dim) * .5 for prop in [.99, .8, .6, .4, .2]: DD = multigraph.remove_edges(D, proportion=prop) mds = MDS(DD, dim=dim, verbose=1, title=title) mds.initialize(X0=X0) mds.stochastic(verbose=1, max_iters=300, approx=.99, lr=.5) mds.adaptive(verbose=1, min_step=1e-6, max_iters=300) mds.figure(title=f'proportion = {prop:0.1f}') plt.show()
def compare_multiview_same(N=100,runs=1): noise_levels = [0.0001,0.001,0.01,0.1,0.5] stress = [] X = misc.disk(N,dim=2) persp = perspective.Persp(dimX=2,dimY=2) #persp.fix_Q(random='orthogonal',number=3) persp.fix_Q(special='identity',number=3) Y = persp.compute_Y(X) D = distances.compute(Y) persp1 = perspective.Persp(dimX=3,dimY=2) persp1.fix_Q(special='standard',number=3) persp2 = perspective.Persp(dimX=2,dimY=2) persp2.fix_Q(special='identity',number=3) persp3 = perspective.Persp(dimX=3,dimY=3) persp3.fix_Q(special='identity',number=3) cost = []; cost2 = []; cost3 = []; costm = [] for noise in noise_levels: D_noisy = distances.add_noise(D,noise) mv = Multiview(D_noisy,persp=persp1,verbose=1) mv.setup_visualization(visualization='mds') mv.initialize_X(number=1) mv.optimize_X(algorithm='agd',max_iters=200) cost.append(mv.ncost) mv = Multiview(D_noisy,persp=persp2,verbose=1) mv.setup_visualization(visualization='mds') mv.initialize_X(number=runs) mv.optimize_X(algorithm='agd',max_iters=200) cost2.append(mv.ncost) mv = Multiview(D_noisy,persp=persp3,verbose=1) mv.setup_visualization(visualization='mds') mv.initialize_X(number=1) mv.optimize_X(algorithm='agd',max_iters=200) cost3.append(mv.ncost) mv = Multiview(D_noisy,persp=persp1,verbose=1) mv.setup_visualization(visualization='mds') mv.initialize_Q() mv.initialize_X(number=1) mv.optimize_all(algorithm='agd',max_iters=[30,20],rounds=40) costm.append(mv.ncost) fig = plt.figure() plt.loglog(noise_levels,cost,linestyle='--',marker='o', label='multi-perspective') plt.loglog(noise_levels,cost2,linestyle='--',marker='o', label='combine 2') plt.loglog(noise_levels,cost3,linestyle='--',marker='o', label='combine 3') plt.loglog(noise_levels,costm,linestyle='--',marker='o', label='multi-all') plt.legend() plt.xlabel('noise level') plt.ylabel('normalized stress') plt.show()
def test1(N=100, trials=3, repeats=5, **kwargs): print('\n***mds.test1()***') cost = np.empty((repeats, trials)) for i in range(repeats): X = misc.disk(N, 2) D = multigraph.graph_from_coordinates(X, **kwargs) for j in range(trials): mds = MDS(D, dim=2, **kwargs) mds.gd(min_step=1e-3, **kwargs) cost[i, j] = mds.cost print(cost)
def example_disk_Q(N=100): X = misc.disk(N, dim=3) persp = perspective.Persp() persp.fix_Q(number=3, special='standard') D = multigraph.from_perspectives(X, persp) mv = MPSE(D, persp=persp, verbose=1) mv.setup_visualization(visualization='mds') mv.initialize_Q(random='orthogonal') mv.initialize_X(X0=X) mv.optimize_Q(verbose=2) mv.figureHY() plt.show()
def stress_vs_miss(N=128,dim=2): num = 8 misses = [1,2,4,8,16,32,64,128] fig, ax = plt.subplots(2,4,figsize=(12,6)) axs = ax.flatten() fig.suptitle('normalized-stress / number-of-misplaced-nodes') fig.tight_layout(pad=2.5) fig.subplots_adjust(top=0.88) X = misc.disk(N,dim=dim) D = distance_matrix(X,X) vis = mds.MDS(D,dim=dim) stress = np.empty(num) for i in range(num): X_misplaced = X.copy() X_misplaced[-misses[i]::] = misc.disk(misses[i],dim=dim) stress[i] = mds.stress_function(X_misplaced,D,estimate=False) vis.initialize(X0=X_misplaced) vis.figureX(title=f'{stress[i]:0.2e} / {misses[i]}', ax=axs[i]) plt.show()
def example_weights(N=100, dim=2): print('\n***mds.example_weights()***\n') print('Here we explore the MDS embedding for a full graph for different' + 'weights') title = 'MDS embedding for multiple weights' X = misc.disk(N, dim) colors = misc.labels(X) X0 = misc.disk(N, dim) D = multigraph.from_coordinates(X, colors=colors) mds = MDS(D, dim=dim, verbose=1, title=title) mds.initialize(X0=X0) mds.stochastic(verbose=1, max_iters=50, approx=.6, lr=50) mds.adaptive(verbose=1, min_step=1e-6, max_iters=300) mds.figure(title=f'absolute weights') multigraph.set_weights(D, scaling=.5) mds = MDS(D, dim=dim, verbose=1, title=title) mds.initialize(X0=X0) mds.stochastic(verbose=1, max_iters=50, approx=.6, lr=50) mds.adaptive(verbose=1, min_step=1e-6, max_iters=300) mds.figure(title=f'1/sqrt(Dij) weights') multigraph.set_weights(D, scaling=1) mds = MDS(D, dim=dim, verbose=1, title=title) mds.initialize(X0=X0) mds.stochastic(verbose=1, max_iters=50, approx=.6, lr=50) mds.adaptive(verbose=1, min_step=1e-6, max_iters=300) mds.figure(title=f'1/Dij weights') multigraph.set_weights(D, scaling=2) mds = MDS(D, dim=dim, verbose=1, title=title) mds.initialize(X0=X0) mds.stochastic(verbose=1, max_iters=50, approx=.6, lr=50) mds.adaptive(verbose=1, min_step=1e-6, max_iters=300) mds.figure(title=f'relative weights') plt.show()
def example_disk_Q(N=100): X = misc.disk(N,dim=3) persp = perspective.Persp() Q_true = persp.generate_Q(number=3,special='standard') Y_true = persp.compute_Y(X,Q=Q_true) D = distances.compute(Y_true) mv = Multiview(D,persp=persp,verbose=1) mv.setup_visualization(visualization='mds') mv.initialize_Q(random='orthogonal') mv.initialize_X(X0=X) mv.optimize_Q(verbose=2,batch_size=10) mv.optimize_Q(verbose=2) mv.figureH() plt.show()
def example_disk(N=100): X = misc.disk(N,dim=3); labels=misc.labels(X) persp = perspective.Persp() persp.fix_Q(number=3,special='standard') Y = persp.compute_Y(X) D = distances.compute(Y) mv = Multiview(D,persp=persp,verbose=1,labels=labels) mv.setup_visualization(visualization='mds') mv.initialize_X(verbose=1) mv.optimize_X(batch_size=10,max_iters=50,verbose=1) mv.figureX(save='hola') mv.figureY() mv.figureH() plt.show()
def test_gd_lr(N=100, dim=2): print('\n***mds.gd_lr()***') Y = misc.disk(N, dim) colors = misc.labels(Y) D = multigraph.from_coordinates(Y, colors=colors) title = 'recovering random coordinates for different learning rates' mds = MDS(D, dim=dim, verbose=1, title=title) mds.initialize() for lr in [100, 10, 1, .1]: mds.gd(lr=lr) mds.figure(title=f'lr = {lr}') mds.forget() plt.show()
def example_disk(N=100): X = misc.disk(N, dim=3) labels = misc.labels(X) persp = perspective.Persp() persp.fix_Q(number=3, special='standard') D = multigraph.from_perspectives(X, persp) mv = MPSE(D, persp=persp, verbose=1) mv.setup_visualization(visualization='mds') mv.initialize_X(verbose=1) mv.optimize_X(batch_size=10, max_iters=50, verbose=1) mv.figureX(save='hola') mv.figureY() mv.figureH() mv.figureHY() plt.show()
def example_disk_all(N=100): X = misc.disk(N,dim=3); labels=misc.labels(X) persp = perspective.Persp() Q_true = persp.generate_Q(number=3,special='standard') Y_true = persp.compute_Y(X,Q=Q_true) D = distances.compute(Y_true) mv = Multiview(D,persp=persp,verbose=1,labels=labels) mv.setup_visualization(visualization='mds') mv.initialize_Q() mv.initialize_X() mv.optimize_all(agd=True,batch_size=10) mv.figureX(plot=True) mv.figureY(plot=True) mv.figureH() plt.show()
def comparison(): n_samples = np.array(10**np.arange(1.5, 4.01, .5), dtype=int) N = len(n_samples) n_perspectives = [2, 3, 4, 5] K = len(n_perspectives) trials = 2 best = 3 timef = np.empty((N, K, trials)) timev = np.empty((N, K, trials)) proj = projections.PROJ() for i in range(N): for j in range(K): for k in range(trials): X = misc.disk(n_samples[i], dim=3) Q = proj.generate(number=n_perspectives[j], method='random') data = proj.project(Q, X) X0 = misc.disk(n_samples[i], dim=3) mvf = mpse.MPSE(data, fixed_projections=Q, initial_embedding=X0) mvf.gd(batch_size=20, max_iter=500, min_cost=1e-4) timef[i, j, k] = mvf.time print(i, j, k, mvf.cost, mvf.time) mvf.plot_computations() plt.show() mvv = mpse.MPSE(data, initial_embedding=X0) mvv.gd(batch_size=20, max_iter=500, min_cost=1e-4) timev[i, j, k] = mvv.time print(mvv.cost, mvv.time) mvv.plot_computations() plt.show()
def example_disk_all(N=100): X = misc.disk(N, dim=3) labels = misc.labels(X) persp = perspective.Persp() persp.fix_Q(number=3, special='standard') D = multigraph.from_perspectives(X, persp) mv = MPSE(D, persp=persp, verbose=1) mv.setup_visualization(visualization='mds') mv.initialize_Q() mv.initialize_X() mv.optimize_all(agd=True) mv.figureX(plot=True) mv.figureY(plot=True) mv.figureH() plt.show()
def stress_vs_noise(N=128, dim=2): X = misc.disk(N, dim=dim) D = distance_matrix(X, X) vis = mds.MDS(D, dim=dim) noise = 10**np.arange(-2, 3, 0.5) its = len(noise) stress = np.empty(its) for i in range(its): X_noisy = X + np.random.randn(N, dim) * noise[i] stress[i] = mds.stress_function(X_noisy, D, estimate=False) vis.initialize(X0=X_noisy) vis.figureX(title=f'noise: {noise[i]:0.2e}, stress: {stress[i]:0.2e}') plt.show() return
def example_disk_dimensions(N=100): print('\n***mds.example_disk_dimensions()***\n') dims = range(1,11) stress = [] for dim in dims: Y = misc.disk(N,dim) D = distances.compute(Y) mds = MDS(D,dim,verbose=1,label=f'dimension : {dim}') mds.initialize_Y() mds.optimize(algorithm='agd',max_iters=300) stress.append(mds.ncost) fig = plt.figure() plt.semilogy(dims,stress) plt.xlabel('dimension') plt.ylabel('stress') plt.title('Normalized MDS stress for various dimensions') plt.show()
def example_disk_noisy(N=100,dim=2): print('\n***mds.example_disk_noisy()***\n') noise_levels = [0.001,0.005,0.01,0.03,0.07,0.1,0.15,0.2,0.7,1.0] stress = [] Y = misc.disk(N,dim) D = distances.compute(Y) for noise in noise_levels: D_noisy = distances.add_noise(D,noise) mds = MDS(D_noisy,dim,verbose=1,title=f'noise : {noise:0.2f}') mds.initialize() mds.optimize(algorithm='agd',max_iters=300,verbose=1) stress.append(mds.ncost) fig = plt.figure() plt.loglog(noise_levels,stress,'.-') plt.xlabel('noise level') plt.ylabel('stress') plt.title('Normalized MDS stress for various noise levels') plt.show()
def reliability0(N=100, trials=3, repeats=5, **kwargs): """\ Check number of times the mds algorithm is able to reach the optimal solution for different random embeddings. """ print('\n***mds.reliability()***') cost = np.empty((repeats, trials)) success = np.zeros(repeats) for i in range(repeats): X = misc.disk(N, 2) D = multigraph.graph_from_coordinates(X, **kwargs) for j in range(trials): vis = mds.MDS(D, dim=2, **kwargs) vis.gd(min_step=1e-3, **kwargs) cost[i, j] = vis.cost if vis.cost < 1e-3: success[i] += 1 print(cost) print(success)
def noise_all(N=100): noise_levels = [0.001,0.01,0.07,0.15,0.4] stress = [] X = misc.disk(N,dim=3) proj = perspective.Proj(dimX=2,dimY=2) proj.set_params_list(special='identity',number=3) Y = proj.project(X) D = distances.compute(Y) for noise in noise_levels: D_noisy = distances.add_noise(D,noise) mv = Multiview(D_noisy,persp=proj) mv.setup_visualization(visualization='mds') mv.initialize_X(verbose=1) mv.optimize_X(algorithm='gd',learning_rate=1,max_iters=300, verbose=1) stress.append(mv.cost) fig = plt.figure() plt.semilogx(noise_levels,stress) plt.show()
def time(n_samples, n_perspectives, fixed_projections=False, batch_size=20, method='random', trials=50, attempts=3, best=40, verbose=0, max_iter=500): proj = projections.PROJ() times = [] for k in range(trials): X = misc.disk(n_samples, dim=3) Q = proj.generate(number=n_perspectives, method=method) data = setup.setup_distances_from_multiple_perspectives( proj.project(Q, X)) if fixed_projections: Q0 = Q else: Q0 = None best_time = np.Inf best_cost = np.Inf for i in range(attempts): mv = mpse.MPSE(data, fixed_projectiosn=Q0) mv.gd(batch_size=batch_size, max_iter=max_iter, min_cost=1e-3, min_grad=1e-8) if verbose > 1: print(k, i, mv.cost, mv.time) if mv.cost < 1.5e-3 and mv.time < best_time: best_time = mv.time best_cost = mv.cost if best_cost < 1.5e-3: times.append(best_time) #mv.plot_computations() #mv.plot_embedding() #mv.plot_images() #plt.show() print(len(times), np.average(np.sort(times)[0:best]))
def embeddability_noise(ax=None): print('\n**mds.embeddability_noise()') N=50 ncost = [] noise_list = [0]+10**np.arange(-4,0,0.5) X = misc.disk(N,4) DD = distances.compute(X) for noise in noise_list: D = DD*(1+np.random.randn(N,N)*noise) mds = MDS(D,dim=4,verbose=1) mds.initialize() mds.optimize() ncost.append(mds.ncost) if ax is None: fig, ax = plt.subplots(1) plot = True else: plot = False ax.semilogx(noise_list,ncost) if plot is True: plt.show()
def example_disk(N=100,dim=2,**kwargs): print('\n***mds.example_disk()***') Y = misc.disk(N,dim); labels = misc.labels(Y) plt.figure() plt.scatter(Y[:,0],Y[:,1],c=labels) plt.title('original data') plt.draw() plt.pause(0.1) D = distances.compute(Y) title = 'basic disk example' mds = MDS(D,dim=dim,verbose=1,title=title,labels=labels) mds.initialize() mds.figureX(title='initial embedding') mds.optimize(**kwargs) mds.figureX(title='final embedding',labels=labels,edges=.2) mds.figure(title='final embedding',labels=labels) plt.show()