Exemple #1
0
                         '\nKmeans',
                         'nr estimators',
                         'silhouete',
                         percentage=False)

plt.show()

#%%
n_clusters = 5
algs = ["PCA", "selectkbest"]
plt.figure()
fig, axs = plt.subplots(2, len(algs), figsize=(14, 8), squeeze=False)
for a in range(len(algs)):
    datar = datapp.feature_reduction(df,
                                     "class", ["class", "id"],
                                     n_features=2,
                                     as_int=True,
                                     alg=algs[a])

    y: np.ndarray = datar[to_clf].values
    X: np.ndarray = datar.drop([to_clf, "id"], axis=1).values

    kmeans_model = cluster.KMeans(n_clusters=n_clusters,
                                  random_state=rs).fit(X)
    labels = kmeans_model.labels_
    cluster_centers = kmeans_model.cluster_centers_
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)

    #plot
    #plt.clf()
print(sil[4])

plot.multiple_line_chart(axs[0, 0], n_clusters, ivalues, '\nKmeans',
                         'nr estimators', 'inertia', percentage=False)
plot.multiple_line_chart(axs[0, 1], n_clusters, svalues, '\nKmeans',
                         'nr estimators', 'silhouete', percentage=False)

plt.show()

#%%
n_clusters=6
algs = ["PCA", "selectkbest"]
plt.figure()
fig, axs = plt.subplots(2 ,len(algs), figsize=(14, 8), squeeze=False)
for a in range(len(algs)):
    datar = datapp.feature_reduction(df, to_clf,categoric+[to_clf], n_features=2, as_int=True, alg=algs[a])

    y: np.ndarray = datar[to_clf].values
    X: np.ndarray = datar.drop([to_clf], axis=1).values

    kmeans_model = cluster.KMeans(n_clusters=n_clusters, random_state=rs).fit(X)
    labels = kmeans_model.labels_
    cluster_centers = kmeans_model.cluster_centers_
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)

    #plot
    #plt.clf()
    colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
    for k, col in zip(range(n_clusters_), colors):
        my_members = labels == k
Exemple #3
0
thresholds = [1 ,0.95, 0.90, 0.8]
selects = [1, 0.9, 0.8, 0.75, 0.6]
algs = ["selectkbest", "PCA"]
plt.figure()
fig, axs = plt.subplots(2, 2, figsize=(12, 7), squeeze=False)
for k in range(len(algs)):
    f = algs[k]
    values = {}
    svalues = {}
    for d in selects:
        yvalues = []
        syvalues = []
        for tr in thresholds:
            datared = datapp.preprocess_alt(data, "class", red_corr=True, tr=tr, n=5, normalization=normalization,
                              ignore_classes=categoric, as_df=True)
            df = datapp.feature_reduction(datared, "class",["class","id"], d, alg=f)
            rf = RandomForestClassifier(random_state=rs)
            acc, sens, _ = eval.train_predict_kfold(df, "class", rf, bal=bal)
            yvalues.append(acc)
            syvalues.append(sens)
        values[d] = yvalues
        svalues[d] = syvalues
    plot.multiple_line_chart(axs[0, k], thresholds, values, 'Random Forests with %s reduction' % f,
                             'threshold of reduction', 'accuracy')
    plot.multiple_line_chart(axs[1, k], thresholds, svalues, 'Random Forests with %s reduction' % f,
                             'threshold of reduction', 'sensitivity', percentage=False)

plt.show()
#%%
tr=0.95
f= "selectkbest"