def experiment(data_file, data_type, dim, distance, npoints, n_neighbors, noise_std, target_dim, max_turns): xs = None if data_file is not None: xs = multidimensional.common.load_embeddings(data_file) xs, d_goal, color = (datagen.DataBuilder().with_dim(dim).with_distance( distance).with_noise(noise_std).with_npoints(npoints).with_neighbors( n_neighbors).with_points(xs).with_type(data_type).build()) m = manifold.Isomap(n_components=target_dim, n_neighbors=n_neighbors, max_iter=max_turns) x = isomap(m, xs) fig = plt.figure() ax = plt.axes(projection='3d') ax.scatter(xs[:, 0], xs[:, 1], xs[:, 2], c=color, cmap=plt.cm.Spectral) ax.set_title("Original data") #plt.show() ax = plt.axes(projection='3d') ax.scatter(x[:, 0], x[:, 1], c=color, cmap=plt.cm.Spectral) plt.title('Projected data') plt.show()
def experiment(data_file, data_type, dim, distance, npoints, n_neighbors, noise_std, target_dim, point_filter, radius_update, radius_barrier, explore_dim_percent, starting_radius, max_turns, _run): xs = None if data_file is not None: xs = multidimensional.common.load_embeddings(data_file) xs, d_goal, color = (datagen.DataBuilder().with_dim(dim).with_distance( distance).with_noise(noise_std).with_npoints(npoints).with_neighbors( n_neighbors).with_points(xs).with_type(data_type).build()) m = multidimensional.mds.MDS(target_dim, point_filter, radius_update, starting_radius=starting_radius, radius_barrier=radius_barrier, max_turns=max_turns, explore_dim_percent=explore_dim_percent, keep_history=KEEP_HISTORY, history_color=color, history_path=EXPERIMENT_NAME, dissimilarities='precomputed') x = m.fit(d_goal) m.plot_history() history = m.history_observer.history for i, error in enumerate(history['error']): _run.log_scalar('mds.mse.error', error, i + 1) for i, radius in enumerate(history['radius']): _run.log_scalar('mds.step', radius, i + 1) start_points = history['xs_files'][0] _run.add_artifact(start_points, name='points_start') end_points = history['xs_files'][-1] _run.add_artifact(end_points, name='points_end') if len(history['xs_images']) > 0: start_image = history['xs_images'][0] _run.add_artifact(start_image, name='points_image_start') end_image = history['xs_images'][-1] _run.add_artifact(end_image, name='points_image_end') if history['animation'] is not None: _run.add_artifact(history['animation'], name='animation') return m.history_observer.history['error'][-1]
def experiment( data_file, data_type, dim, distance, npoints, n_neighbors, noise_std, target_dim, max_turns, _run): xs = None if data_file is not None: xs = multidimensional.common.load_embeddings(data_file) xs, d_goal, color = (datagen.DataBuilder() .with_dim(dim) .with_distance(distance) .with_noise(noise_std) .with_npoints(npoints) .with_neighbors(n_neighbors) .with_points(xs) .with_type(data_type) .build()) m = smacof.MDS(n_components=target_dim, n_init=1, max_iter=max_turns, verbose=2, dissimilarity='precomputed', history_path=EXPERIMENT_NAME) x = smacof_fit(m, d_goal) m.history_observer.plot(m.n_iter_, target_dim) history = m.history_observer.history for i, error in enumerate(history['error']): _run.log_scalar('mds.mse.error', error, i + 1) for i, radius in enumerate(history['radius']): _run.log_scalar('mds.step', radius, i + 1) start_points = history['xs_files'][0] _run.add_artifact(start_points, name='points_start') end_points = history['xs_files'][-1] _run.add_artifact(end_points, name='points_end') if len(history['xs_images']) > 0: start_image = history['xs_images'][0] _run.add_artifact(start_image, name='points_image_start') end_image = history['xs_images'][-1] _run.add_artifact(end_image, name='points_image_end') if history['animation'] is not None: _run.add_artifact(history['animation'], name='animation') return m.history_observer.history['error'][-1]
def experiment(data_type, dim, distance, npoints, n_neighbors, noise_std, target_dim, point_filter, radius_update, radius_barrier, explore_dim_percent, starting_radius, max_turns, _run): xs = None xs, d_goal, color = (datagen.DataBuilder().with_dim(dim).with_distance( distance).with_noise(noise_std).with_npoints(npoints).with_neighbors( n_neighbors).with_points(xs).with_type(data_type).build()) dim_reduction = namedtuple('dim_reduction', 'name method data') MDS_proposed = dim_reduction( 'MDS (proposed)', multidimensional.mds.MDS(target_dim, point_filter, radius_update, starting_radius=starting_radius, radius_barrier=radius_barrier, max_turns=max_turns, explore_dim_percent=explore_dim_percent, keep_history=KEEP_HISTORY, history_color=color, history_path=EXPERIMENT_NAME + '_mds_proposed', dissimilarities='precomputed'), d_goal) LLE = dim_reduction( 'LLE', manifold.LocallyLinearEmbedding(n_neighbors, target_dim, eigen_solver='auto', method='standard'), xs) LTSA = dim_reduction( 'LTSA', manifold.LocallyLinearEmbedding(n_neighbors, target_dim, eigen_solver='auto', method='ltsa'), xs) PCA = dim_reduction('Truncated SVD', decomposition.TruncatedSVD(n_components=target_dim), xs) HessianLLE = dim_reduction( 'HessianLLE', manifold.LocallyLinearEmbedding(n_neighbors, target_dim, eigen_solver='auto', method='hessian'), xs) ModifiedLLE = dim_reduction( 'ModifiedLLE', manifold.LocallyLinearEmbedding(n_neighbors, target_dim, eigen_solver='auto', method='modified'), xs) Isomap = dim_reduction('Isomap', manifold.Isomap(n_neighbors, target_dim), xs) mds = dim_reduction( 'MDS SMACOF', multidimensional.smacof.MDS(n_components=target_dim, n_init=1, max_iter=max_turns, verbose=2, dissimilarity='precomputed', history_path=EXPERIMENT_NAME + '_mds_smacof'), d_goal) SpectralEmbedding = dim_reduction( 'SpectralEmbedding', manifold.SpectralEmbedding(n_components=target_dim, n_neighbors=n_neighbors), xs) tSNE = dim_reduction( 'tSNE', manifold.TSNE(n_components=target_dim, init='pca', random_state=0), xs) methods = [ MDS_proposed, mds, PCA, Isomap, LLE, HessianLLE, ModifiedLLE, LTSA ] #methods = [MDS_proposed, mds] fig = plt.figure(figsize=(20, 20)) #plt.suptitle("Learning %s with %i points, %.3f noise" # % (data_type, npoints, noise_std), fontsize=14) ax = fig.add_subplot(331, projection='3d', aspect=1) ax.scatter(xs[:, 0], xs[:, 1], xs[:, 2], c=color, cmap=plt.cm.Spectral) plt.title("Original Manifold", fontsize=32) for i, method in enumerate(methods): print("Running {}".format(methods[i].name)) try: t0 = time.time() x = methods[i].method.fit_transform(methods[i].data) if method.name == 'LTSA': print(x) t1 = time.time() ax = fig.add_subplot("33{}".format(i + 2), aspect=1) # Plot the 2 dimensions. ax.scatter(x[:, 0], x[:, 1], c=color, cmap=plt.cm.Spectral) plt.title(methods[i].name + "(%.2g sec)" % (t1 - t0), fontsize=32) #ax.xaxis.set_major_formatter(NullFormatter()) #ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') #plt.show() # With high noise level, some of the models fail. except Exception as e: print(e) ax = fig.add_subplot("33{}".format(i + 2), aspect=1) plt.title(methods[i].name + " did not run", fontsize=32) # ax.xaxis.set_major_formatter(NullFormatter()) # ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') plt.tight_layout() plt.savefig(RESULT_IMAGE) plt.show() # m.plot_history() history = MDS_proposed.method.history_observer.history for i, error in enumerate(history['error']): _run.log_scalar('mds.mse.error', error, i + 1) for i, radius in enumerate(history['radius']): _run.log_scalar('mds.step', radius, i + 1) # start_points = history['xs_files'][0] # _run.add_artifact(start_points, name='points_start') # end_points = history['xs_files'][-1] # _run.add_artifact(end_points, name='points_end') # if len(history['xs_images']) > 0: # start_image = history['xs_images'][0] # _run.add_artifact(start_image, name='points_image_start') # end_image = history['xs_images'][-1] # _run.add_artifact(end_image, name='points_image_end') # if history['animation'] is not None: # _run.add_artifact(history['animation'], name='animation') history = mds.method.history_observer.history for i, error in enumerate(history['error']): _run.log_scalar('smacof.mse.error', error, i + 1) for i, radius in enumerate(history['radius']): _run.log_scalar('smacof.step', radius, i + 1) return MDS_proposed.method.history_observer.history['error'][-1]
def experiment(ground_truth_men, ground_truth_simlex, data_file, data_file_small, data_type, dim, distance, npoints, n_neighbors, noise_std, target_dim, point_filter, radius_update, radius_barrier, explore_dim_percent, starting_radius, max_turns, dataset, _run): xs = None xs_small = None words = None if data_file is not None: if data_file.endswith('pkl'): words, xs = load_pkl(data_file) else: words, xs = multidimensional.common.load_embeddings(data_file) if data_file_small is not None: _, xs_small = multidimensional.common.load_embeddings(data_file_small) men = load_ground_truth(ground_truth_men) simlex = load_ground_truth(ground_truth_simlex) print(xs.shape) xs, d_goal, color = (datagen.DataBuilder().with_dim(dim).with_distance( distance).with_noise(noise_std).with_npoints(npoints).with_neighbors( n_neighbors).with_points(xs).with_type(data_type).build()) xs_small, d_goal_small, color = ( datagen.DataBuilder().with_dim(dim).with_distance(distance) #.with_noise(noise_std) .with_npoints(npoints).with_neighbors(n_neighbors).with_points( xs_small).with_type(data_type).build()) dim_reduction = namedtuple('dim_reduction', 'name method data') MDS_proposed = dim_reduction( 'MDS proposed', multidimensional.mds.MDS(target_dim, point_filter, radius_update, starting_radius=starting_radius, radius_barrier=radius_barrier, max_turns=max_turns, explore_dim_percent=explore_dim_percent, keep_history=KEEP_HISTORY, history_color=color, history_path=EXPERIMENT_NAME + 'mds_proposed', dissimilarities='precomputed'), d_goal) mds_monotonic = dim_reduction( 'MDS (monotonic)', multidimensional.monotonic.MDS(target_dim, point_filter, radius_update, starting_radius=starting_radius, radius_barrier=radius_barrier, max_turns=max_turns, explore_dim_percent=explore_dim_percent, keep_history=KEEP_HISTORY, history_color=color, history_path=EXPERIMENT_NAME + '_mds_monotonic', dissimilarities='precomputed'), d_goal) LLE = dim_reduction( 'LLE', manifold.LocallyLinearEmbedding(n_neighbors, target_dim, eigen_solver='auto', method='standard'), xs) LTSA = dim_reduction( 'LTSA', manifold.LocallyLinearEmbedding(n_neighbors, target_dim, eigen_solver='auto', method='ltsa'), xs) HessianLLE = dim_reduction( 'HessianLLE', manifold.LocallyLinearEmbedding(n_neighbors, target_dim, eigen_solver='auto', method='hessian'), xs) ModifiedLLE = dim_reduction( 'ModifiedLLE', manifold.LocallyLinearEmbedding(n_neighbors, target_dim, eigen_solver='auto', method='modified'), xs) Isomap = dim_reduction('Isomap', manifold.Isomap(n_neighbors, target_dim), xs) mds = dim_reduction( 'MDS SMACOF', multidimensional.smacof.MDS(n_components=target_dim, n_init=1, max_iter=max_turns, verbose=2, dissimilarity='precomputed', history_path=EXPERIMENT_NAME + '_mds_smacof'), d_goal) PCA = dim_reduction('Truncated SVD', decomposition.TruncatedSVD(n_components=target_dim), xs) original = dim_reduction('Original Embeddings 300d', Identity(), xs) original_small = dim_reduction('Original Embeddings 50d', Identity(), xs_small) SpectralEmbedding = dim_reduction( 'SpectralEmbedding', manifold.SpectralEmbedding(n_components=target_dim, n_neighbors=n_neighbors), xs) tSNE = dim_reduction( 'tSNE', manifold.TSNE(n_components=target_dim, init='pca', random_state=0), xs) #methods = [original] methods = [ MDS_proposed, mds_monotonic, mds, original, Isomap, PCA, LLE, ModifiedLLE, LTSA, HessianLLE ] res = {} for method in methods: try: x = method.method.fit_transform(method.data) res[method.name] = {} if dataset == 'men': res[method.name]['men'] = semantic_similarity( method.name, men, words, x) else: res[method.name]['simlex'] = semantic_similarity( method.name, simlex, words, x) print("Model: {}\t result:{}".format(method.name, res)) except Exception as e: print(e) with open('semantic_similarity_simlex_only_bench.json', 'w') as fd: json.dump(res, fd, indent=4, sort_keys=True) # m.plot_history() history = MDS_proposed.method.history_observer.history for i, error in enumerate(history['error']): _run.log_scalar('mds.mse.error', error, i + 1) for i, radius in enumerate(history['radius']): _run.log_scalar('mds.step', radius, i + 1) for i, radius in enumerate(history['epoch_time']): _run.log_scalar('mds.epoch_time', radius, i + 1) history = mds.method.history_observer.history for i, error in enumerate(history['error']): _run.log_scalar('smacof.mse.error', error, i + 1) for i, radius in enumerate(history['radius']): _run.log_scalar('smacof.step', radius, i + 1) for i, radius in enumerate(history['epoch_time']): _run.log_scalar('smacof.epoch_time', radius, i + 1) history = mds_monotonic.method.history_observer.history for i, error in enumerate(history['error']): _run.log_scalar('mds_monotonic.mse.error', error, i + 1) for i, radius in enumerate(history['radius']): _run.log_scalar('mds_monotonic.step', radius, i + 1) for i, radius in enumerate(history['epoch_time']): _run.log_scalar('mds_monotonic.epoch_time', radius, i + 1) return MDS_proposed.method.history_observer.history['error'][-1]
def experiment(data_type, dim, distance, npoints, n_neighbors, noise_std, target_dim, point_filter, radius_update, radius_barrier, explore_dim_percent, starting_radius, max_turns, _run): xs = None xs, d_goal, color = (datagen.DataBuilder().with_dim(dim).with_distance( distance).with_noise(noise_std).with_npoints(npoints).with_neighbors( n_neighbors).with_points(xs).with_type(data_type).build()) dim_reduction = namedtuple('dim_reduction', 'name method data') mds = dim_reduction( 'r0=16', multidimensional.mds.MDS(target_dim, point_filter, radius_update, starting_radius=starting_radius, radius_barrier=radius_barrier, max_turns=max_turns, explore_dim_percent=explore_dim_percent, keep_history=KEEP_HISTORY, history_color=color, history_path=EXPERIMENT_NAME + '_mds_proposed', dissimilarities='precomputed'), d_goal) mds_pess = dim_reduction( 'r0=1', multidimensional.mds.MDS(target_dim, point_filter, radius_update, starting_radius=1, radius_barrier=radius_barrier, max_turns=max_turns, explore_dim_percent=explore_dim_percent, keep_history=KEEP_HISTORY, history_color=color, history_path=EXPERIMENT_NAME + '_mds_proposed', dissimilarities='precomputed'), d_goal) mds_opt = dim_reduction( 'r0=65536', multidimensional.mds.MDS(target_dim, point_filter, radius_update, starting_radius=65536, radius_barrier=radius_barrier, max_turns=max_turns, explore_dim_percent=explore_dim_percent, keep_history=KEEP_HISTORY, history_color=color, history_path=EXPERIMENT_NAME + '_mds_proposed', dissimilarities='precomputed'), d_goal) methods = [mds, mds_pess, mds_opt] #methods = [MDS_proposed, mds] fig = plt.figure(figsize=(20, 10)) #plt.suptitle("Learning %s with %i points, %.3f noise" # % (data_type, npoints, noise_std), fontsize=14) ax = fig.add_subplot(141, projection='3d', aspect=1) ax.scatter(xs[:, 0], xs[:, 1], xs[:, 2], c=color, cmap=plt.cm.Spectral) plt.title("Original Manifold", fontsize=32) for i, method in enumerate(methods): print("Running {}".format(methods[i].name)) try: t0 = time.time() x = methods[i].method.fit_transform(methods[i].data) t1 = time.time() ax = fig.add_subplot("14{}".format(i + 2), aspect=1) # Plot the 2 dimensions. ax.scatter(x[:, 0], x[:, 1], c=color, cmap=plt.cm.Spectral) plt.title(methods[i].name) #ax.xaxis.set_major_formatter(NullFormatter()) #ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') #plt.show() # With high noise level, some of the models fail. except Exception as e: print(e) ax = fig.add_subplot("33{}".format(i + 2), aspect=1) plt.title(methods[i].name + " did not run", fontsize=32) # ax.xaxis.set_major_formatter(NullFormatter()) # ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') plt.tight_layout() plt.savefig(RESULT_IMAGE) plt.show() # m.plot_history() history = mds.method.history_observer.history for i, error in enumerate(history['error']): _run.log_scalar('mds.mse.error', error, i + 1) for i, radius in enumerate(history['radius']): _run.log_scalar('mds.step', radius, i + 1) # start_points = history['xs_files'][0] # _run.add_artifact(start_points, name='points_start') # end_points = history['xs_files'][-1] # _run.add_artifact(end_points, name='points_end') # if len(history['xs_images']) > 0: # start_image = history['xs_images'][0] # _run.add_artifact(start_image, name='points_image_start') # end_image = history['xs_images'][-1] # _run.add_artifact(end_image, name='points_image_end') # if history['animation'] is not None: # _run.add_artifact(history['animation'], name='animation') history = mds_pess.method.history_observer.history for i, error in enumerate(history['error']): _run.log_scalar('pessimistic.mse.error', error, i + 1) for i, radius in enumerate(history['radius']): _run.log_scalar('pessimistic.step', radius, i + 1) history = mds_opt.method.history_observer.history for i, error in enumerate(history['error']): _run.log_scalar('optimistic.mse.error', error, i + 1) for i, radius in enumerate(history['radius']): _run.log_scalar('optimistic.step', radius, i + 1) return mds.method.history_observer.history['error'][-1]