def sammon(x,
           n,
           display=2,
           inputdist='raw',
           maxhalves=20,
           maxiter=500,
           tolfun=1e-9,
           init='default'):

    import numpy as np
    from scipy.spatial.distance import cdist
    """Perform Sammon mapping on dataset x
    y = sammon(x) applies the Sammon nonlinear mapping procedure on
    multivariate data x, where each row represents a pattern and each column
    represents a feature.  On completion, y contains the corresponding
    co-ordinates of each point on the map.  By default, a two-dimensional
    map is created.  Note if x contains any duplicated rows, SAMMON will
    fail (ungracefully). 
    [y,E] = sammon(x) also returns the value of the cost function in E (i.e.
    the stress of the mapping).
    An N-dimensional output map is generated by y = sammon(x,n) .
    A set of optimisation options can be specified using optional
    arguments, y = sammon(x,n,[OPTS]):
       maxiter        - maximum number of iterations
       tolfun         - relative tolerance on objective function
       maxhalves      - maximum number of step halvings
       input          - {'raw','distance'} if set to 'distance', X is 
                        interpreted as a matrix of pairwise distances.
       display        - 0 to 2. 0 least verbose, 2 max verbose.
       init           - {'pca', 'cmdscale', random', 'default'}
                        default is 'pca' if input is 'raw', 
                        'msdcale' if input is 'distance'
    The default options are retrieved by calling sammon(x) with no
    parameters.
    File        : sammon.py
    Date        : 18 April 2014
    Authors     : Tom J. Pollard ([email protected])
                : Ported from MATLAB implementation by 
                  Gavin C. Cawley and Nicola L. C. Talbot
    Description : Simple python implementation of Sammon's non-linear
                  mapping algorithm [1].
    References  : [1] Sammon, John W. Jr., "A Nonlinear Mapping for Data
                  Structure Analysis", IEEE Transactions on Computers,
                  vol. C-18, no. 5, pp 401-409, May 1969.
    Copyright   : (c) Dr Gavin C. Cawley, November 2007.
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
    """

    # Create distance matrix unless given by parameters
    if inputdist == 'distance':
        D = x
        if init == 'default':
            init = 'cmdscale'
    else:
        D = cdist(x, x)
        if init == 'default':
            init = 'pca'

    if inputdist == 'distance' and init == 'pca':
        raise ValueError(
            "Cannot use init == 'pca' when inputdist == 'distance'")

    if np.count_nonzero(np.diagonal(D)) > 0:
        raise ValueError(
            "The diagonal of the dissimilarity matrix must be zero")

    # Remaining initialisation
    N = x.shape[0]
    scale = 0.5 / D.sum()
    D = D + np.eye(N)

    if np.count_nonzero(D <= 0) > 0:
        raise ValueError(
            "Off-diagonal dissimilarities must be strictly positive")

    Dinv = 1 / D
    if init == 'pca':
        [UU, DD, _] = np.linalg.svd(x)
        y = UU[:, :n] * DD[:n]
    elif init == 'cmdscale':
        from cmdscale import cmdscale
        y, e = cmdscale(D)
        y = y[:, :n]
    else:
        y = np.random.normal(0.0, 1.0, [N, n])
    one = np.ones([N, n])
    d = cdist(y, y) + np.eye(N)
    dinv = 1. / d
    delta = D - d
    E = ((delta**2) * Dinv).sum()

    # Get on with it
    for i in range(maxiter):

        # Compute gradient, Hessian and search direction (note it is actually
        # 1/4 of the gradient and Hessian, but the step size is just the ratio
        # of the gradient and the diagonal of the Hessian so it doesn't
        # matter).
        delta = dinv - Dinv
        deltaone = np.dot(delta, one)
        g = np.dot(delta, y) - (y * deltaone)
        dinv3 = dinv**3
        y2 = y**2
        H = np.dot(dinv3, y2) - deltaone - np.dot(2, y) * np.dot(
            dinv3, y) + y2 * np.dot(dinv3, one)
        s = -g.flatten(order='F') / np.abs(H.flatten(order='F'))
        y_old = y

        # Use step-halving procedure to ensure progress is made
        for j in range(maxhalves):
            s_reshape = np.reshape(s, (-1, n), order='F')
            y = y_old + s_reshape
            d = cdist(y, y) + np.eye(N)
            dinv = 1 / d
            delta = D - d
            E_new = ((delta**2) * Dinv).sum()
            if E_new < E:
                break
            else:
                s = 0.5 * s

        # Bomb out if too many halving steps are required
        if j == maxhalves - 1:
            print(
                'Warning: maxhalves exceeded. Sammon mapping may not converge...'
            )

        # Evaluate termination criterion
        if abs((E - E_new) / E) < tolfun:
            if display:
                print('TolFun exceeded: Optimisation terminated')
            break

        # Report progress
        E = E_new
        if display > 1:
            print('epoch = %d : E = %12.10f' % (i + 1, E * scale))

    if i == maxiter - 1:
        print(
            'Warning: maxiter exceeded. Sammon mapping may not have converged...'
        )

    # Fiddle stress to match the original Sammon paper
    E = E * scale

    return [y, E]
Ejemplo n.º 2
0
        int(row[7]),
        int(row[8])
    ] for row in reader]

    random.shuffle(data)
    data_train, data_test = train_test_split(data, test_size=0.4)
data_train = np.array(data_train)
data_test = np.array(data_test)
cl = [0, 1]
#Calculating Euclidian for all possible combinations and converting it to sqare form matrix
print(list(itertools.combinations(range(len(data_train)), 2)))
D = distance.pdist(data_train[:, 0:8], 'euclidean')
print('D', D)
z = squareform(D)
#Applying classical multidimensional scaling
[Y, e] = cmdscale(z)
#Dividing Y class wise
Y_0_cl1, Y_0_cl2 = segregate(Y[:, 0], data_train[:, -1], cl)
Y_1_cl1, Y_1_cl2 = segregate(Y[:, 1], data_train[:, -1], cl)
#Plotting scatterd graph
plt.plot(Y_0_cl1, Y_1_cl1, '.', color='blue')
plt.plot(Y_0_cl2, Y_1_cl2, 'v', color='red')
plt.show()
#Training SVM model using sklearn library
SVMModel = svm.SVC(kernel='sigmoid', C=1.0)
SVMModel.fit(data_train[:, 0:8], data_train[:, -1])
#cross validation classification
CVSVMModel = cross_val_score(SVMModel,
                             data_train[:, 0:8],
                             data_train[:, -1],
                             cv=5)
Ejemplo n.º 3
0
def main():
    # Loop through HITS, concatenate response vector from each
    norm_cat_resp = np.array([normResp(n) for n in range(1, len(FACE_PAIR))])
    rank_cat_resp = np.array([rankResp(n) for n in range(1, len(FACE_PAIR))])

    # Plot responses
    plot_resps(norm_cat_resp, MAIN_DIR, 'normalized responses', 'norm_resps')
    plot_resps(rank_cat_resp, MAIN_DIR, 'ranked responses', 'rank_resps')

    # Calculate mean response
    norm_mean_resp = norm_cat_resp.mean(axis=0)
    rank_mean_resp = rank_cat_resp.mean(axis=0)

    # Reshape mean responses into similarity matrix
    norm_resp_mat = reshape_dsm(norm_mean_resp, FNAMES, FPAIR1)
    rank_resp_mat = reshape_dsm(rank_mean_resp, FNAMES, FPAIR1)

    # Plot similarity matrix
    plot_dsm(norm_resp_mat, FNAMES, FACE_URL, MAIN_DIR, 'normalized',
             'norm_dsm')
    plot_dsm(rank_resp_mat, FNAMES, FACE_URL, MAIN_DIR, 'ranked', 'rank_dsm')

    # Calc dis-similarity matrix
    norm_dsm = 1 - norm_resp_mat
    rank_dsm = 1 - rank_resp_mat

    # Classical multidimensional scaling
    norm_config_vals, norm_eigvals = cmdscale(norm_dsm)
    rank_config_vals, rank_eigvals = cmdscale(rank_dsm)

    # Plot MDS results
    plot_mds(norm_config_vals, norm_eigvals, FNAMES, MAIN_DIR, 'normalized',
             'norm_mds')
    plot_mds(rank_config_vals, rank_eigvals, FNAMES, MAIN_DIR, 'ranked',
             'rank_mds')

    # Create list of image filenames
    img_list = [
        MAIN_DIR + 'results/' + x + '.png'
        for x in ['norm_resps', 'norm_dsm', 'norm_mds']
    ]

    # Load images into list and collect sizes
    images = map(Image.open, img_list)
    widths, heights = zip(*(i.size for i in images))

    # Create new (scaled) sizes
    new_height = 1000.0
    new_widths = widths * np.array([new_height / x for x in heights])

    # Create new image for others to be pasted into
    new_im = Image.new('RGB', (int(sum(new_widths)), int(new_height)))

    x_offset = 0

    for j, im in enumerate(images):
        # Scale and paste
        scaled_im = im.resize((new_widths.astype(int)[j], int(new_height)))
        new_im.paste(scaled_im, (x_offset, 0))
        x_offset += scaled_im.size[0]

    new_im.save(MAIN_DIR + 'results/summary.png')
Ejemplo n.º 4
0
temp_lims = (round(min(mean_resp) * 1000) / 1000,
             round(max(mean_resp) * 1000) / 1000)
ax1.set_xticklabels(temp_lims, color='white')

#plt.show()

fig.savefig(main_dir + 'results/face_dsm.png',
            dpi=200,
            facecolor=fig.get_facecolor(),
            edgecolor='none')
plt.close(fig)

# Calc dis-similarity matrix
dsm = 1 - resp_mat
# Classical multidimensional scaling
Y, e = cmdscale(dsm)
# Keep only first and second dimensions (Eigenvalues)
scaled_coords = Y[:, 0:2]

# Plot faces based on first 2 dimensions of configuration matrix
fig = plt.figure(figsize=(7, 10))
ax = plt.subplot(211)
plt.scatter(Y[:, 0], Y[:, 1])
for i, txt in enumerate(FNAMES):
    plt.annotate(txt, (Y[i, 0], Y[i, 1]))

temp_xlim = ax.get_xlim()
temp_ylim = ax.get_ylim()

ax.plot(temp_xlim, [0, 0], color='black')
ax.plot([0, 0], temp_ylim, color='black')
Ejemplo n.º 5
0
def sammon(x,
           n,
           display=2,
           inputdist="raw",
           maxhalves=20,
           maxiter=500,
           tolfun=1e-9,
           init="default"):
    """
    Sammon mapping on a dataset x. Use the nonlinear mapping procedure on 
    multivariate data with rows of patterns and columns of features. 
    maxiter maximum number of iterations, tolfun relative tolerance for the objective
    function, maxhalves max number of step halvings, input can be "distance" to 
    use pairwise distances as input, display can be 0 to 2 for verbosity, 
    init is "pca" for raw input, "cmdscale" for distances, or can be "random"
    or "default".
    Reference : Sammon, John W. Jr., "A Nonlinear Mapping for Data
                  Structure Analysis", IEEE Transactions on Computers,
                  vol. C-18, no. 5, pp 401-409, May 1969.
    """
    if inputdist == "distance":
        D = x
        if init == "default":
            init = "cmdscale"
    else:
        D = cdist(x, x)
        if init == "default":
            init = "pca"
    N = x.shape[0]
    scale = 0.5 / D.sum()
    D = D + np.eye(N)
    Dinv = 1 / D
    if init == "pca":
        [UU, DD, _] = np.linalg.svd(x)
        y = UU[:, :n] * DD[:n]
    elif init == "cmdscale":
        y, e = cmdscale(D)
        y = y[:, :n]
    else:
        y = np.random.normal(0.0, 1.0, [N, n])
    one = np.ones([N, n])
    d = cdist(y, y) + np.eye(N)
    dinv = 1. / d
    delta = D - d
    E = ((delta**2) * Dinv).sum()
    for i in range(maxiter):
        # Compute gradient, Hessian and search direction (note it is actually
        # 1/4 of the gradient and Hessian, but the step size is just the ratio
        # of the gradient and the diagonal of the Hessian so it doesn't
        # matter).
        delta = dinv - Dinv
        deltaone = np.dot(delta, one)
        g = np.dot(delta, y) - (y * deltaone)
        dinv3 = dinv**3
        y2 = y**2
        H = np.dot(dinv3, y2) - deltaone - np.dot(2, y) * np.dot(
            dinv3, y) + y2 * np.dot(dinv3, one)
        s = -g.flatten(order='F') / np.abs(H.flatten(order='F'))
        y_old = y
        # Use step-halving procedure to ensure progress is made
        for j in range(maxhalves):
            s_reshape = np.reshape(s, (-1, n), order='F')
            y = y_old + s_reshape
            d = cdist(y, y) + np.eye(N)
            dinv = 1 / d
            delta = D - d
            E_new = ((delta**2) * Dinv).sum()
            if E_new < E:
                break
            else:
                s = 0.5 * s
        if j == maxhalves - 1:
            print(
                "Warning: maxhalves exceeded. Sammon mapping may not converge..."
            )
        # Evaluate termination criterion
        if abs((E - E_new) / E) < tolfun:
            if display:
                print("TolFun exceeded: Optimisation terminated")
            break
    E = E * scale  # Fiddle stress
    return [y, E]