Esempi in Python per clusterplot, esempi in Python per toolbox_02450.clusterplot

Esempio n. 1

0

Mostra file

def Agglomerative(input_data, index_to_check):
    # ros = RandomOverSampler(random_state=0)

    # X, y = split_train_test(input_data, index_to_check)

    y = k_to_one(input_data[:, 7:10])
    print(y)

    X = input_data[:, :7]

    # X = StandardScaler().fit_transform(X)

    # X, y = ros.fit_sample(X, y)

    U, S, V = svd(X, full_matrices=False)

    datamatrix_projected = np.dot(X, V[1:3].T)

    N, M = X.shape

    # Perform hierarchical/agglomerative clustering on data matrix
    Maxclust = 6
    Methods = ['average', 'complete', 'single']
    Metrics = ['mahalanobis', 'euclidean']

    fignumber = 1
    for i in Methods:

        for j in Metrics:
            Method = i
            Metric = j

            Z = linkage(X, method=Method, metric=Metric)

            # Compute and display clusters by thresholding the dendrogram
            cls = fcluster(Z, criterion='maxclust', t=Maxclust)
            figure(fignumber)
            fignumber += 1
            clusterplot(datamatrix_projected,
                        cls.reshape(cls.shape[0], 1),
                        y=y)
            title(Method + ' ' + Metric)

            # Display dendrogram
            # max_display_levels = 6
            # figure(fignumber, figsize=(10, 4))
            # fignumber+=1
            # dendrogram(Z, truncate_mode='level', p=max_display_levels)

    show()

    print('Ran Exercise 10.2.1')

Esempio n. 2

0

Mostra file

def hierarchical_cluster(remove_doc_index=None):
    '''

    :param remove_doc_index: The index of a doc instance to be removed .It is used for removing potential outlier
    :return:
    '''
    from Reading_data import *
    # Normalize data
    X = stats.zscore(X)
    # shuffle data
    # X_sparse = coo_matrix(X)
    # X, X_sparse, y = shuffle(X, X_sparse, y)
    if (remove_doc_index is not None):
        X = np.delete(X, (remove_doc_index), axis=0)
        y = np.delete(y, (remove_doc_index), None)
        y = y.T

    # Perform hierarchical/agglomerative clustering on data matrix

    methods = [
        'single', 'complete', 'average', 'weighted', 'centroid', 'median',
        'ward'
    ]
    metrics = ['euclidean', 'cityblock', 'cosine']
    Method = 'single'
    Metric = 'euclidean'

    Maxclust = 2
    Z = linkage(X, method=Method, metric=Metric)
    # Compute and display clusters by thresholding the dendrogram
    cls = fcluster(Z, criterion='maxclust', t=Maxclust)
    F_Measure = mcs.f1_score(y, cls, average='weighted')
    figure(1)
    pca = PCA(n_components=2)
    X_r = pca.fit(X).transform(X)
    #If data is more than 2-dimensional it should be first projected onto the first two principal components
    clusterplot(X_r,
                cls.reshape(cls.shape[0], 1),
                y=y,
                x_label="PCA1",
                y_label="PCA2")

    # Display dendrogram
    max_display_levels = 20
    figure(2)
    xlabel("doc instance")
    ylabel('Distance')
    title('F Measure: {0}'.format(round(F_Measure, 2)))
    dendrogram(Z, truncate_mode='level', p=max_display_levels)
    show()

Esempio n. 3

0

Mostra file

File: project3.py Progetto: BahramMoradi/Machine-Learning-and-Data-Mining

def hierarchical_cluster(remove_doc_index=None ):
    '''

    :param remove_doc_index: The index of a doc instance to be removed .It is used for removing potential outlier
    :return:
    '''
    from Reading_data import *
    # Normalize data
    X = stats.zscore(X)
    # shuffle data
    # X_sparse = coo_matrix(X)
    # X, X_sparse, y = shuffle(X, X_sparse, y)
    if (remove_doc_index is not None):
        X = np.delete(X, (remove_doc_index), axis=0)
        y=np.delete(y,(remove_doc_index),None)
        y=y.T

    # Perform hierarchical/agglomerative clustering on data matrix

    methods = ['single', 'complete', 'average', 'weighted', 'centroid', 'median', 'ward']
    metrics = ['euclidean', 'cityblock', 'cosine']
    Method = 'single'
    Metric = 'euclidean'

    Maxclust = 2
    Z = linkage(X, method=Method, metric=Metric)
    # Compute and display clusters by thresholding the dendrogram
    cls = fcluster(Z, criterion='maxclust', t=Maxclust)
    F_Measure=mcs.f1_score(y, cls, average='weighted')
    figure(1)
    pca = PCA(n_components=2)
    X_r = pca.fit(X).transform(X)
    #If data is more than 2-dimensional it should be first projected onto the first two principal components
    clusterplot(X_r, cls.reshape(cls.shape[0], 1), y=y, x_label="PCA1", y_label="PCA2")

    # Display dendrogram
    max_display_levels = 20
    figure(2)
    xlabel("doc instance")
    ylabel('Distance')
    title('F Measure: {0}'.format(round(F_Measure,2)))
    dendrogram(Z, truncate_mode='level', p=max_display_levels)
    show()

Esempio n. 4

0

Mostra file

File: machineLearning.py Progetto: dthandler/Twittermining

    def _clustering(self):

        # Get data
        print 'Get cluster data..'
        H = np.asmatrix(np.loadtxt(factoredHMatrix)).T
        words = set(open(attributFile).read().split())

        y = range(len(words))

        # clustering
        clusterNumber = 4
        runNumber = 10
        N, M = H.shape

        print 'Calculate k-means..'
        # K-means clustering:
        centroids, cls, inertia = k_means(H, clusterNumber, n_init=runNumber)
        print 'Plotting results..'
        # Plot results:
        figure(figsize=(14, 9))
        clusterplot(H, cls, centroids, y)
        show()

Esempio n. 5

0

Mostra file

File: project3_clustering.py Progetto: romancardenas/ml_project

K_optimal = KRange[index_of_max]
print(
    'The optimal number of clusters, according to GMM cross-validation, is {}'.
    format(K_optimal))

# Fit best Gaussian mixture model to X and plot result
gmm = GaussianMixture(n_components=K_optimal,
                      covariance_type=covar_type,
                      n_init=reps).fit(X)
cls = gmm.predict(X)
# extract cluster labels
cds = gmm.means_
# extract cluster centroids (means of gaussians)
covs = gmm.covariances_
plt.figure(figsize=(12, 9))
clusterplot(X, clusterid=cls, centroids=cds, y=y, covars=covs)
plt.title('Gaussian Mixture Model using {} clusters'.format(K_optimal))
plt.xlabel('PC 1')
plt.ylabel('PC 2')
plt.show()
# Evaluate GMM model
Rand_gmm, Jaccard_gmm, NMI_gmm = clusterval(y, cls)

print('###################################################')
print('#             HIERARCHICAL CLUSTERING             #')
print('###################################################')
Metric = 'euclidean'
Maxclust = K_optimal
max_display_levels = K_optimal
Methods = ['single', 'complete', 'average', 'weighted', 'median',
           'ward']  # We will try all these methods

Esempio n. 6

0

Mostra file

File: ex9_2_1.py Progetto: pietermarsman/MLiP

from scipy.cluster.hierarchy import linkage, fcluster, dendrogram

# Load Matlab data file and extract variables of interest
mat_data = loadmat('../Data/synth1.mat')
X = np.matrix(mat_data['X'])
y = np.matrix(mat_data['y'])
attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
classNames = [name[0][0] for name in mat_data['classNames']]
N, M = X.shape
C = len(classNames)


# Perform hierarchical/agglomerative clustering on data matrix
Method = 'single'
Metric = 'euclidean'

Z = linkage(X, method=Method, metric=Metric)

# Compute and display clusters by thresholding the dendrogram
Maxclust = 4
cls = fcluster(Z, criterion='maxclust', t=Maxclust)
figure(1)
clusterplot(X, cls.reshape(cls.shape[0],1), y=y)

# Display dendrogram
max_display_levels=6
figure(2)
dendrogram(Z, truncate_mode='level', p=max_display_levels)

show()

Esempio n. 7

0

Mostra file

# exercise 10.1.1
from pylab import *
from scipy.io import loadmat
from toolbox_02450 import clusterplot
from sklearn.mixture import GMM
# Load Matlab data file and extract variables of interest
mat_data = loadmat('../Data/synth1.mat')
X = np.matrix(mat_data['X'])
y = np.matrix(mat_data['y'])
attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
classNames = [name[0][0] for name in mat_data['classNames']]
N, M = X.shape
C = len(classNames)
# Number of clusters
K = 4
cov_type = 'diag'  # type of covariance, you can try out 'diag' as well
reps = 1  # number of fits with different initalizations, best result will be kept
# Fit Gaussian mixture model
gmm = GMM(n_components=K, covariance_type=cov_type, n_init=reps,
          params='wmc').fit(X)
cls = gmm.predict(X)  # extract cluster labels
cds = gmm.means_  # extract cluster centroids (means of gaussians)
covs = gmm.covars_  # extract cluster shapes (covariances of gaussians)
if cov_type == 'diag':
    new_covs = np.zeros([K, M, M])
    count = 0
    for elem in covs:
        temp_m = np.zeros([M, M])
        for i in range(len(elem)):
            temp_m[i][i] = elem[i]

Esempio n. 8

0

Mostra file

File: ex10_1_1.py Progetto: Opstrup/dtu-imldm-2016

# exercise 10.1.1
from pylab import *
from scipy.io import loadmat
from toolbox_02450 import clusterplot
from sklearn.cluster import k_means

# Load Matlab data file and extract variables of interest
mat_data = loadmat('../Data/synth1.mat')
X = np.matrix(mat_data['X'])
y = np.matrix(mat_data['y'])
attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
classNames = [name[0][0] for name in mat_data['classNames']]
N, M = X.shape
C = len(classNames)

# Number of clusters:
K = 4

# K-means clustering:
centroids, cls, inertia = k_means(X, K)

# Plot results:
figure(figsize=(14, 9))
clusterplot(X, cls, centroids, y)
show()

Esempio n. 9

0

Mostra file

File: Clustering.py Progetto: jhven/02450_IntroToMachineLearning

# Perform hierarchical/agglomerative clustering on data matrix
#Method = 'single'
Method = 'complete'
#Method = 'centroid'
Metric = 'euclidean'

Z = linkage(X, method=Method, metric=Metric)

# Compute and display clusters by thresholding the dendrogram
Maxclust = C
cls = fcluster(Z, criterion='maxclust', t=Maxclust)
figure(1, figsize=(15, 15))
xlabel('PCA1')
ylabel('PCA2')
clusterplot(X, cls.reshape(cls.shape[0], 1), y=y)

# Display dendrogram
max_display_levels = 6
figure(2, figsize=(10, 4))
dendrogram(Z, truncate_mode='level', p=max_display_levels)

show()

# Calculate accuracy
accuracy_hierarchical = sum(
    [cls[i] == y_classNames[i] for i in range(len(cls))]) / N
print("Accuracy of the heirarchical clustering:", accuracy_hierarchical)

################################################
# GAUSSIAN MIXTURE MODEL

Esempio n. 10

0

Mostra file

from toolbox_02450 import clusterplot
from scipy.cluster.hierarchy import linkage, fcluster, dendrogram

# Load Matlab data file and extract variables of interest
mat_data = loadmat('../Data/synth1.mat')
X = np.matrix(mat_data['X'])
y = np.matrix(mat_data['y'])
attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
classNames = [name[0][0] for name in mat_data['classNames']]
N, M = X.shape
C = len(classNames)

# Perform hierarchical/agglomerative clustering on data matrix
Method = 'single'
Metric = 'euclidean'

Z = linkage(X, method=Method, metric=Metric)

# Compute and display clusters by thresholding the dendrogram
Maxclust = 4
cls = fcluster(Z, criterion='maxclust', t=Maxclust)
figure(1)
clusterplot(X, cls.reshape(cls.shape[0], 1), y=y)

# Display dendrogram
max_display_levels = 6
figure(2)
dendrogram(Z, truncate_mode='level', p=max_display_levels)

show()

Esempio n. 11

0

Mostra file

# Perform hierarchical/agglomerative clustering on data matrix
Method = 'ward'
Metric = 'euclidean'

Z = linkage(X, method=Method, metric=Metric)

# Compute and display clusters by thresholding the dendrogram
Maxclust = 2
cls = fcluster(Z, criterion='maxclust', t=Maxclust)
#figure(1)
#clusterplot(X, cls.reshape(cls.shape[0],1), y=y)
figure(figsize=(14, 9))
idx = [
    4, 1
]  # feature index, choose two features to use as x and y axis in the plot
clusterplot(X[:, idx], clusterid=cls, y=y)
#ylabel("glucose")
#xlabel("insulin")
show()
# Display dendrogram
max_display_levels = 6
figure(2, figsize=(10, 4))
dendrogram(Z, truncate_mode='level', p=max_display_levels)

show()

Rand, Jaccard, NMI = clusterval(y, cls)
print(Rand, Jaccard, NMI)

print('Ran Exercise 10.2.1')

Esempio n. 12

0

Mostra file

File: gmm_auxiliary.py Progetto: Glaadiss/ML_Project

cov_type = 'full'  # e.g. 'full' or 'diag'

# define the initialization procedure (initial value of means)
initialization_method = 'random'  # 'random' or 'kmeans'
reps = 20
# number of fits with different initalizations, best result will be kept
# Fit Gaussian mixture model
gmm = GaussianMixture(n_components=K, covariance_type=cov_type, n_init=reps,
                      tol=1e-6, reg_covar=1e-6, init_params=initialization_method).fit(X)
cls = gmm.predict(X)
# extract cluster labels
cds = gmm.means_
# extract cluster centroids (means of gaussians)
covs = gmm.covariances_
# extract cluster shapes (covariances of gaussians)
if cov_type.lower() == 'diag':
    new_covs = np.zeros([K, M, M])

    count = 0
    for elem in covs:
        temp_m = np.zeros([M, M])
        new_covs[count] = np.diag(elem)
        count += 1

    covs = new_covs

# Plot results:
figure(figsize=(14,9))
clusterplot(X, clusterid=cls, centroids=cds, y=y, covars=covs)
show()
print('Ran Exercise 11.1.1')

Esempio n. 13

0

Mostra file

File: Hierarchical_clustering.py Progetto: civilsyg/machinelearning

classNames = ['Non diabetes', 'Diabetes']
N, M = X.shape
C = len(classNames)

# Perform hierarchical/agglomerative clustering on data matrix
Method = 'ward'  # complete #average # weighted # centroid
Metric = 'euclidean'

Z = linkage(X, method=Method, metric=Metric)

# Compute and display clusters by thresholding the dendrogram
Maxclust = 2
cls = fcluster(Z, criterion='maxclust', t=Maxclust)
clsHie = pd.DataFrame(cls)
clsHie.to_csv("clsHie.csv")
figure(1)

clusterplot(X[:, [0, 1]], cls.reshape(cls.shape[0], 1), y=y)

savefig('hierarchicalScatterPlot.png', dpi=300)
show()

# Display dendrogram
max_display_levels = 4
figure(2, figsize=(10, 4))
dendrogram(Z, truncate_mode='level', p=max_display_levels)
savefig('hierachicalDenrogram', dpi=300)
show()

print('Ran Exercise 10.2.1')

Esempio n. 14

0

Mostra file

File: ex10_1_1.py Progetto: dzitkowskik/Introduction-To-Machine-Learning-And-Data-Mining

N, M = X.shape
C = len(classNames)


# Number of clusters
K = 4
cov_type = 'diag'       # type of covariance, you can try out 'diag' as well
reps = 1                # number of fits with different initalizations, best result will be kept

# Fit Gaussian mixture model
gmm = GMM(n_components=K, covariance_type=cov_type, n_init=reps, params='wmc').fit(X)
cls = gmm.predict(X)    # extract cluster labels
cds = gmm.means_        # extract cluster centroids (means of gaussians)
covs = gmm.covars_      # extract cluster shapes (covariances of gaussians)

if cov_type == 'diag':
    new_covs = np.zeros([K,M,M])
    count = 0
    for elem in covs:
        temp_m = np.zeros([M,M])
        for i in range(len(elem)):
            temp_m[i][i] = elem[i]
        new_covs[count] = temp_m
        count += 1
    covs = new_covs

# Plot results:
figure(figsize=(14,9))
clusterplot(X, clusterid=cls, centroids=cds, y=y, covars=covs)
show()

Esempio n. 15

0

Mostra file

File: project3code_clustering1b.py Progetto: WestonMJones/Coursework

    "13. Private Room", "14. Entire Home", "15. Shared Room"
]
for i in cols:
    print(x_labels[i - 1])
for i in range(len(cols)):
    cols[i] = cols[i] - 1
X = X[:, cols]

# Do PCA for plot
pca = PCA(n_components=2)
PCASpace = pca.fit_transform(X)

# Perform hierarchical/agglomerative clustering on data matrix
Method = 'complete'
Metric = 'euclidean'

Z = linkage(X, method=Method, metric=Metric)

# Compute and display clusters by thresholding the dendrogram
Maxclust = 6
cls = fcluster(Z, criterion='maxclust', t=Maxclust)
figure(1)
clusterplot(PCASpace, cls.reshape(cls.shape[0], 1), y=Y)

# Display dendrogram
max_display_levels = 6
figure(2, figsize=(10, 4))
dendrogram(Z, truncate_mode='level', p=max_display_levels)

show()

Esempio n. 16

0

Mostra file

print "z Shape: " + str(z.shape)
print "Y Shape: " + str(Y.shape)
print "V Shape: " + str(V.shape)
print "X Shape: " + str(X.shape)
print "y Shape: " + str(y.shape)
print "cls Shape: " + str(cls.shape)
print "cds Shape: " + str(cds.shape)
print "covs Shape: " + str(covs.shape)

if cov_type == 'diag':
    new_covs = np.zeros([K, M, M])
    count = 0
    for elem in covs:
        temp_m = np.zeros([M, M])
        for i in range(len(elem)):
            temp_m[i][i] = elem[i]
        new_covs[count] = temp_m
        count += 1
    covs = new_covs

np.savetxt("cls.txt", cls)
result = [abs(cls[i] - y[i]) for i in range(len(cls))]
np.savetxt("result.txt", result)
print "Result mean: " + str(np.mean(result))

#Plot results:
figure(figsize=(10, 6))
clusterplot(z, clusterid=cls, centroids=cds, y=y, covars=5)

show()

Esempio n. 17

0

Mostra file

File: ex10_1_1.py Progetto: BahramMoradi/Machine-Learning-and-Data-Mining

# exercise 10.1.1
from pylab import *
from scipy.io import loadmat
from toolbox_02450 import clusterplot
from sklearn.mixture import GMM

# Load Matlab data file and extract variables of interest
mat_data = loadmat('../Data/synth1.mat')
X = np.matrix(mat_data['X'])
y = np.matrix(mat_data['y'])
attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
classNames = [name[0][0] for name in mat_data['classNames']]
N, M = X.shape
C = len(classNames)




# Number of clusters
K = 4
cov_type = 'diag'       # type of covariance, you can try out 'diag' as well
reps = 1                # number of fits with different initalizations, best result will be kept

# Fit Gaussian mixture model
gmm = GMM(n_components=K, covariance_type=cov_type, n_init=reps, params='wmc').fit(X)
cls = gmm.predict(X)    # extract cluster labels
cds = gmm.means_        # extract cluster centroids (means of gaussians)
covs = gmm.covars_      # extract cluster shapes (covariances of gaussians)

Esempio n. 18

0

Mostra file

if cov_type.lower() == 'diag':
    new_covs = np.zeros([K, M, M])

    count = 0
    for elem in covs:
        temp_m = np.zeros([M, M])
        new_covs[count] = np.diag(elem)
        count += 1

    covs = new_covs

## In case the number of features != 2, then a subset of features most be plotted instead.
figure(figsize=(14, 9))
#idx = [3,4] # feature index, choose two features to use as x and y axis in the plot
#clusterplot(X[:,idx], clusterid=cls, centroids=cds[:,idx], y=y2, covars=covs[:,idx,:][:,:,idx])
clusterplot(X, clusterid=cls, centroids=cds, y=y2, covars=covs)
savefig('figures/GMM/clustering_GMM.png', bbox_inches='tight')
show()

# CLUSTERING 2 ########################################################################## Perform hierarchical/agglomerative clustering on data matrix
#Method = 'single'
#Method = 'complete'
#Method = 'average'
Method = 'weighted'
#Method = 'centroid'
#Method = 'median'
#Method = 'ward'

Metric = 'euclidean'

Z = linkage(X, method=Method, metric=Metric)

Esempio n. 19

0

Mostra file

# extract cluster centroids (means of gaussians)
covs = gmm.covariances_
# extract cluster shapes (covariances of gaussians)
if cov_type.lower() == 'diag':
    new_covs = np.zeros([K, M, M])

    count = 0
    for elem in covs:
        temp_m = np.zeros([M, M])
        new_covs[count] = np.diag(elem)
        count += 1

    covs = new_covs

# Plot results:
#figure(figsize=(14,9))
#clusterplot(X, clusterid=cls, centroids=cds, y=y, covars=covs)
#show()
print(cds)
## In case the number of features != 2, then a subset of features most be plotted instead.
figure(figsize=(14, 9))
idx = [0, 1]
# feature index, choose two features to use as x and y axis in the plot
clusterplot(X[:, idx],
            clusterid=cls,
            centroids=cds[:, idx],
            y=y,
            covars=covs[:, idx, :][:, :, idx])
show()

print(Rand, Jaccard, NMI)

Esempio n. 20

0

Mostra file

File: CV_GMM.py Progetto: LHerskind/ML_projekt_1

def draw_GMM(input_data):
    X, y = split_train_test(input_data, 9)
    y = np.argmax(input_data[:, 7:], 1)

    U, S, V = svd(input_data[:, :], full_matrices=False)

    X = np.dot(input_data[:, :], V.T)

    # X = input_data
    N, M = X.shape

    # Number of clusters
    K = 5
    cov_type = 'full'
    # type of covariance, you can try out 'diag' as well
    reps = 10
    # number of fits with different initalizations, best result will be kept
    # Fit Gaussian mixture model
    gmm = GaussianMixture(n_components=K,
                          covariance_type=cov_type,
                          n_init=reps).fit(X)
    cls = gmm.predict(X)
    # extract cluster labels
    cds = gmm.means_
    # extract cluster centroids (means of gaussians)
    covs = gmm.covariances_
    # extract cluster shapes (covariances of gaussians)
    if cov_type == 'diag':
        new_covs = np.zeros([K, M, M])

    if cov_type == 'full':
        new_covs = np.zeros([K, M, M])

    count = 0
    for elem in covs:
        temp_m = np.zeros([M, M])
        for i in range(len(elem)):
            for j in range(len(elem)):
                temp_m[i][j] = elem[i][j]

        new_covs[count] = temp_m
        count += 1

    covs = new_covs

    print(cds)

    # Plot results:
    # figure(figsize=(14, 9))
    # clusterplot(X, clusterid=cls, centroids=cds, y=y, covars=covs)
    # show()

    ## In case the number of features != 2, then a subset of features most be plotted instead.
    figure(figsize=(14, 9))
    idx = [
        0, 1
    ]  # feature index, choose two features to use as x and y axis in the plot
    clusterplot(X[:, idx],
                clusterid=cls,
                centroids=cds[:, idx],
                y=y,
                covars=covs[:, idx, :][:, :, idx])
    title('Clusterplot with GMM with origin')
    show()

Esempio n. 21

0

Mostra file

eval = pickle.load(eval_f)
eval_f.close()
rand = eval[0]
jaccard = eval[1]
nmi = eval[2]
########################################################

########################################################
# PART 1 - GMM CLUSTERING
print('==============================================')
print('Best K: {0}'.format(bestK))
print('==============================================')

# Cluster Plot
figure
clusterplot(PC, clusterid=clsGMM, centroids=cdsGMM, covars=covsGMM, y=Y)
xticks(fontsize=14)
yticks(fontsize=14)
show()

# Plot CV error per K
figure
plot(KRange, 2*CVE,'-ok')
ylabel('Cross-validation Error', fontsize=14)
xticks(fontsize=14)
yticks(fontsize=14)
xlabel('Number of clusters (K)', fontsize=14)
show()
########################################################

Esempio n. 22

0

Mostra file

File: hierarchical_clustering.py Progetto: cdjamrozy/machine-learning-tennis

for i, col_id in enumerate(range(2, 27)):
    X[:, i] = np.mat(doc.col_values(col_id, 2, 345)).T

# Compute values of N, M and C.
N = len(y)
M = len(attributeNames)
C = len(classNames)

# Perform hierarchical/agglomerative clustering on data matrix
Method = 'complete'
Metric = 'euclidean'

Z = linkage(X, method=Method, metric=Metric)

# Compute and display clusters by thresholding the dendrogram
Y = X - np.ones((N,1))*X.mean(0)
U,S,V = svd(Y,full_matrices=False)
V = V.T
rho = (S*S) / (S*S).sum()

Maxclust = 9
cls = fcluster(Z, criterion='maxclust', t=Maxclust)
figure(1, figsize=(14,9))
clusterplot((Y*V)[:, :2], cls.reshape(cls.shape[0],1), y=y)

# Display dendrogram
max_display_levels=50
figure(2,figsize=(10,4))
dendrogram(Z, truncate_mode='level', p=max_display_levels)

show()

Esempio n. 23

0

Mostra file

File: ex9_1_1.py Progetto: maddymanu/machinelearning-project

# exercise 9.1.1
from pylab import *
from scipy.io import loadmat
from toolbox_02450 import clusterplot
from sklearn.cluster import k_means

# Load Matlab data file and extract variables of interest
mat_data = loadmat('../Data/synth1.mat')
X = np.matrix(mat_data['X'])
y = np.matrix(mat_data['y'])
attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
classNames = [name[0][0] for name in mat_data['classNames']]
N, M = X.shape
C = len(classNames)

# Number of clusters:
K = 4

# K-means clustering:
centroids, cls, inertia = k_means(X,K)
    
# Plot results:
figure(figsize=(14,9))
clusterplot(X, cls, centroids, y)
show()

Esempio n. 24

0

Mostra file

    elif item-1 == 0 and y[index] == 1:
        falseneg += 1
    elif item-1 == 1 and y[index] == 0:
        falsepos += 1
    else:
        print("something weird", index, item)
print("Method:",Method)
print("truepos:", truepos)
print("trueneg:", trueneg)
print("falsepos:", falsepos)
print("falseneg:", falseneg)
print("Percent right:", (truepos + trueneg)/len(a) * 100)

# Plot clusters
plt.figure(1, figsize=(10,8))
clusterplot(Z[:,0:2], a, y=y)
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.title("Hierarchical clustering")

# Display dendrogram
max_display_levels=4
plt.figure(2,figsize=(11,4))
dendrogram(link, truncate_mode='level', p=max_display_levels)

plt.show()

#%% testing all posibilites
highscores=[] 
def testAll():
    Maxclust = 2