Ejemplo n.º 1
0
Y = UCData
attrind = np.array(range(1, 51) + range(62, 78, 3))
Field = [Field[i] for i in range(1, 51) + range(62, 78, 3)]
X = AttrData[:, attrind]
X[np.isnan(X)] = 0
scaler = preprocessing.StandardScaler().fit(X)
Xn = scaler.fit_transform(X)

### cluster
model = KMeans(init='k-means++', n_clusters=6, n_init=10, max_iter=1000)
model = AffinityPropagation(preference=-150, verbose=True)
#model = Birch(branching_factor=10, n_clusters=4, threshold=0.3, compute_labels=True)
model = MeanShift(bandwidth=estimate_bandwidth(X, quantile=0.1, n_samples=100),
                  bin_seeding=True)

label = SSRS.Cluster(X, model)

### classification
model = tree.DecisionTreeClassifier()
model = GaussianNB()
model = svm.SVC()
model = SGDClassifier()

Tp = SSRS.Classification_cross(XXn, T=label, nfold=10, model=model)
SSRS.plotErrorMap(label, Tp)

### regression
regModel = linear_model.LinearRegression()
#regModel=svm.SVC()
regModel = KNeighborsRegressor(n_neighbors=10)
regModel = tree.DecisionTreeRegressor()
Ejemplo n.º 2
0
X = np.delete(X, indnan, 0)
Y = np.delete(Y, indnan, 0)
indvalid = np.delete(indvalid, indnan, 0)

scaler = preprocessing.StandardScaler().fit(X)
Xn = scaler.fit_transform(X)
[nind, nband] = Y.shape
[nind, nattr] = X.shape

# test for k in kmean
score_cluster = np.zeros(8)
for i in range(2, 10):
    print(i)
    nc = i
    model = KMeans(init='k-means++', n_clusters=nc, n_init=10, max_iter=1000)
    label, center = SSRS.Cluster(Y, model, doplot=0)
    score_cluster[i - 2] = metrics.silhouette_score(Y, label)
plt.plot(range(2, 10), score_cluster, '-*')

## cluster
nc = 6
model = KMeans(init='k-means++',
               n_clusters=nc,
               n_init=15,
               max_iter=1000,
               tol=1e-15,
               verbose=True)
label, center = SSRS.Cluster(Y, model, doplot=0)

## PCA
pca = PCA(n_components=nband)