Y = UCData attrind = np.array(range(1, 51) + range(62, 78, 3)) Field = [Field[i] for i in range(1, 51) + range(62, 78, 3)] X = AttrData[:, attrind] X[np.isnan(X)] = 0 scaler = preprocessing.StandardScaler().fit(X) Xn = scaler.fit_transform(X) ### cluster model = KMeans(init='k-means++', n_clusters=6, n_init=10, max_iter=1000) model = AffinityPropagation(preference=-150, verbose=True) #model = Birch(branching_factor=10, n_clusters=4, threshold=0.3, compute_labels=True) model = MeanShift(bandwidth=estimate_bandwidth(X, quantile=0.1, n_samples=100), bin_seeding=True) label = SSRS.Cluster(X, model) ### classification model = tree.DecisionTreeClassifier() model = GaussianNB() model = svm.SVC() model = SGDClassifier() Tp = SSRS.Classification_cross(XXn, T=label, nfold=10, model=model) SSRS.plotErrorMap(label, Tp) ### regression regModel = linear_model.LinearRegression() #regModel=svm.SVC() regModel = KNeighborsRegressor(n_neighbors=10) regModel = tree.DecisionTreeRegressor()
X = np.delete(X, indnan, 0) Y = np.delete(Y, indnan, 0) indvalid = np.delete(indvalid, indnan, 0) scaler = preprocessing.StandardScaler().fit(X) Xn = scaler.fit_transform(X) [nind, nband] = Y.shape [nind, nattr] = X.shape # test for k in kmean score_cluster = np.zeros(8) for i in range(2, 10): print(i) nc = i model = KMeans(init='k-means++', n_clusters=nc, n_init=10, max_iter=1000) label, center = SSRS.Cluster(Y, model, doplot=0) score_cluster[i - 2] = metrics.silhouette_score(Y, label) plt.plot(range(2, 10), score_cluster, '-*') ## cluster nc = 6 model = KMeans(init='k-means++', n_clusters=nc, n_init=15, max_iter=1000, tol=1e-15, verbose=True) label, center = SSRS.Cluster(Y, model, doplot=0) ## PCA pca = PCA(n_components=nband)