def sammon(x, n, display=2, inputdist='raw', maxhalves=20, maxiter=500, tolfun=1e-9, init='default'): import numpy as np from scipy.spatial.distance import cdist """Perform Sammon mapping on dataset x y = sammon(x) applies the Sammon nonlinear mapping procedure on multivariate data x, where each row represents a pattern and each column represents a feature. On completion, y contains the corresponding co-ordinates of each point on the map. By default, a two-dimensional map is created. Note if x contains any duplicated rows, SAMMON will fail (ungracefully). [y,E] = sammon(x) also returns the value of the cost function in E (i.e. the stress of the mapping). An N-dimensional output map is generated by y = sammon(x,n) . A set of optimisation options can be specified using optional arguments, y = sammon(x,n,[OPTS]): maxiter - maximum number of iterations tolfun - relative tolerance on objective function maxhalves - maximum number of step halvings input - {'raw','distance'} if set to 'distance', X is interpreted as a matrix of pairwise distances. display - 0 to 2. 0 least verbose, 2 max verbose. init - {'pca', 'cmdscale', random', 'default'} default is 'pca' if input is 'raw', 'msdcale' if input is 'distance' The default options are retrieved by calling sammon(x) with no parameters. File : sammon.py Date : 18 April 2014 Authors : Tom J. Pollard ([email protected]) : Ported from MATLAB implementation by Gavin C. Cawley and Nicola L. C. Talbot Description : Simple python implementation of Sammon's non-linear mapping algorithm [1]. References : [1] Sammon, John W. Jr., "A Nonlinear Mapping for Data Structure Analysis", IEEE Transactions on Computers, vol. C-18, no. 5, pp 401-409, May 1969. Copyright : (c) Dr Gavin C. Cawley, November 2007. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA """ # Create distance matrix unless given by parameters if inputdist == 'distance': D = x if init == 'default': init = 'cmdscale' else: D = cdist(x, x) if init == 'default': init = 'pca' if inputdist == 'distance' and init == 'pca': raise ValueError( "Cannot use init == 'pca' when inputdist == 'distance'") if np.count_nonzero(np.diagonal(D)) > 0: raise ValueError( "The diagonal of the dissimilarity matrix must be zero") # Remaining initialisation N = x.shape[0] scale = 0.5 / D.sum() D = D + np.eye(N) if np.count_nonzero(D <= 0) > 0: raise ValueError( "Off-diagonal dissimilarities must be strictly positive") Dinv = 1 / D if init == 'pca': [UU, DD, _] = np.linalg.svd(x) y = UU[:, :n] * DD[:n] elif init == 'cmdscale': from cmdscale import cmdscale y, e = cmdscale(D) y = y[:, :n] else: y = np.random.normal(0.0, 1.0, [N, n]) one = np.ones([N, n]) d = cdist(y, y) + np.eye(N) dinv = 1. / d delta = D - d E = ((delta**2) * Dinv).sum() # Get on with it for i in range(maxiter): # Compute gradient, Hessian and search direction (note it is actually # 1/4 of the gradient and Hessian, but the step size is just the ratio # of the gradient and the diagonal of the Hessian so it doesn't # matter). delta = dinv - Dinv deltaone = np.dot(delta, one) g = np.dot(delta, y) - (y * deltaone) dinv3 = dinv**3 y2 = y**2 H = np.dot(dinv3, y2) - deltaone - np.dot(2, y) * np.dot( dinv3, y) + y2 * np.dot(dinv3, one) s = -g.flatten(order='F') / np.abs(H.flatten(order='F')) y_old = y # Use step-halving procedure to ensure progress is made for j in range(maxhalves): s_reshape = np.reshape(s, (-1, n), order='F') y = y_old + s_reshape d = cdist(y, y) + np.eye(N) dinv = 1 / d delta = D - d E_new = ((delta**2) * Dinv).sum() if E_new < E: break else: s = 0.5 * s # Bomb out if too many halving steps are required if j == maxhalves - 1: print( 'Warning: maxhalves exceeded. Sammon mapping may not converge...' ) # Evaluate termination criterion if abs((E - E_new) / E) < tolfun: if display: print('TolFun exceeded: Optimisation terminated') break # Report progress E = E_new if display > 1: print('epoch = %d : E = %12.10f' % (i + 1, E * scale)) if i == maxiter - 1: print( 'Warning: maxiter exceeded. Sammon mapping may not have converged...' ) # Fiddle stress to match the original Sammon paper E = E * scale return [y, E]
int(row[7]), int(row[8]) ] for row in reader] random.shuffle(data) data_train, data_test = train_test_split(data, test_size=0.4) data_train = np.array(data_train) data_test = np.array(data_test) cl = [0, 1] #Calculating Euclidian for all possible combinations and converting it to sqare form matrix print(list(itertools.combinations(range(len(data_train)), 2))) D = distance.pdist(data_train[:, 0:8], 'euclidean') print('D', D) z = squareform(D) #Applying classical multidimensional scaling [Y, e] = cmdscale(z) #Dividing Y class wise Y_0_cl1, Y_0_cl2 = segregate(Y[:, 0], data_train[:, -1], cl) Y_1_cl1, Y_1_cl2 = segregate(Y[:, 1], data_train[:, -1], cl) #Plotting scatterd graph plt.plot(Y_0_cl1, Y_1_cl1, '.', color='blue') plt.plot(Y_0_cl2, Y_1_cl2, 'v', color='red') plt.show() #Training SVM model using sklearn library SVMModel = svm.SVC(kernel='sigmoid', C=1.0) SVMModel.fit(data_train[:, 0:8], data_train[:, -1]) #cross validation classification CVSVMModel = cross_val_score(SVMModel, data_train[:, 0:8], data_train[:, -1], cv=5)
def main(): # Loop through HITS, concatenate response vector from each norm_cat_resp = np.array([normResp(n) for n in range(1, len(FACE_PAIR))]) rank_cat_resp = np.array([rankResp(n) for n in range(1, len(FACE_PAIR))]) # Plot responses plot_resps(norm_cat_resp, MAIN_DIR, 'normalized responses', 'norm_resps') plot_resps(rank_cat_resp, MAIN_DIR, 'ranked responses', 'rank_resps') # Calculate mean response norm_mean_resp = norm_cat_resp.mean(axis=0) rank_mean_resp = rank_cat_resp.mean(axis=0) # Reshape mean responses into similarity matrix norm_resp_mat = reshape_dsm(norm_mean_resp, FNAMES, FPAIR1) rank_resp_mat = reshape_dsm(rank_mean_resp, FNAMES, FPAIR1) # Plot similarity matrix plot_dsm(norm_resp_mat, FNAMES, FACE_URL, MAIN_DIR, 'normalized', 'norm_dsm') plot_dsm(rank_resp_mat, FNAMES, FACE_URL, MAIN_DIR, 'ranked', 'rank_dsm') # Calc dis-similarity matrix norm_dsm = 1 - norm_resp_mat rank_dsm = 1 - rank_resp_mat # Classical multidimensional scaling norm_config_vals, norm_eigvals = cmdscale(norm_dsm) rank_config_vals, rank_eigvals = cmdscale(rank_dsm) # Plot MDS results plot_mds(norm_config_vals, norm_eigvals, FNAMES, MAIN_DIR, 'normalized', 'norm_mds') plot_mds(rank_config_vals, rank_eigvals, FNAMES, MAIN_DIR, 'ranked', 'rank_mds') # Create list of image filenames img_list = [ MAIN_DIR + 'results/' + x + '.png' for x in ['norm_resps', 'norm_dsm', 'norm_mds'] ] # Load images into list and collect sizes images = map(Image.open, img_list) widths, heights = zip(*(i.size for i in images)) # Create new (scaled) sizes new_height = 1000.0 new_widths = widths * np.array([new_height / x for x in heights]) # Create new image for others to be pasted into new_im = Image.new('RGB', (int(sum(new_widths)), int(new_height))) x_offset = 0 for j, im in enumerate(images): # Scale and paste scaled_im = im.resize((new_widths.astype(int)[j], int(new_height))) new_im.paste(scaled_im, (x_offset, 0)) x_offset += scaled_im.size[0] new_im.save(MAIN_DIR + 'results/summary.png')
temp_lims = (round(min(mean_resp) * 1000) / 1000, round(max(mean_resp) * 1000) / 1000) ax1.set_xticklabels(temp_lims, color='white') #plt.show() fig.savefig(main_dir + 'results/face_dsm.png', dpi=200, facecolor=fig.get_facecolor(), edgecolor='none') plt.close(fig) # Calc dis-similarity matrix dsm = 1 - resp_mat # Classical multidimensional scaling Y, e = cmdscale(dsm) # Keep only first and second dimensions (Eigenvalues) scaled_coords = Y[:, 0:2] # Plot faces based on first 2 dimensions of configuration matrix fig = plt.figure(figsize=(7, 10)) ax = plt.subplot(211) plt.scatter(Y[:, 0], Y[:, 1]) for i, txt in enumerate(FNAMES): plt.annotate(txt, (Y[i, 0], Y[i, 1])) temp_xlim = ax.get_xlim() temp_ylim = ax.get_ylim() ax.plot(temp_xlim, [0, 0], color='black') ax.plot([0, 0], temp_ylim, color='black')
def sammon(x, n, display=2, inputdist="raw", maxhalves=20, maxiter=500, tolfun=1e-9, init="default"): """ Sammon mapping on a dataset x. Use the nonlinear mapping procedure on multivariate data with rows of patterns and columns of features. maxiter maximum number of iterations, tolfun relative tolerance for the objective function, maxhalves max number of step halvings, input can be "distance" to use pairwise distances as input, display can be 0 to 2 for verbosity, init is "pca" for raw input, "cmdscale" for distances, or can be "random" or "default". Reference : Sammon, John W. Jr., "A Nonlinear Mapping for Data Structure Analysis", IEEE Transactions on Computers, vol. C-18, no. 5, pp 401-409, May 1969. """ if inputdist == "distance": D = x if init == "default": init = "cmdscale" else: D = cdist(x, x) if init == "default": init = "pca" N = x.shape[0] scale = 0.5 / D.sum() D = D + np.eye(N) Dinv = 1 / D if init == "pca": [UU, DD, _] = np.linalg.svd(x) y = UU[:, :n] * DD[:n] elif init == "cmdscale": y, e = cmdscale(D) y = y[:, :n] else: y = np.random.normal(0.0, 1.0, [N, n]) one = np.ones([N, n]) d = cdist(y, y) + np.eye(N) dinv = 1. / d delta = D - d E = ((delta**2) * Dinv).sum() for i in range(maxiter): # Compute gradient, Hessian and search direction (note it is actually # 1/4 of the gradient and Hessian, but the step size is just the ratio # of the gradient and the diagonal of the Hessian so it doesn't # matter). delta = dinv - Dinv deltaone = np.dot(delta, one) g = np.dot(delta, y) - (y * deltaone) dinv3 = dinv**3 y2 = y**2 H = np.dot(dinv3, y2) - deltaone - np.dot(2, y) * np.dot( dinv3, y) + y2 * np.dot(dinv3, one) s = -g.flatten(order='F') / np.abs(H.flatten(order='F')) y_old = y # Use step-halving procedure to ensure progress is made for j in range(maxhalves): s_reshape = np.reshape(s, (-1, n), order='F') y = y_old + s_reshape d = cdist(y, y) + np.eye(N) dinv = 1 / d delta = D - d E_new = ((delta**2) * Dinv).sum() if E_new < E: break else: s = 0.5 * s if j == maxhalves - 1: print( "Warning: maxhalves exceeded. Sammon mapping may not converge..." ) # Evaluate termination criterion if abs((E - E_new) / E) < tolfun: if display: print("TolFun exceeded: Optimisation terminated") break E = E * scale # Fiddle stress return [y, E]