df = pd.read_csv("letter-recognition.csv",
                 header=None)  # !!!type is dataframe, not ndarray!!
df = np.array(df)  # type conversion needed to use slicing
# dfs = df[(df[:, 0] == 'A') | (df[:, 0] == 'B') | (df[:, 0] == 'C') | (df[:, 0] == 'D') |
#          (df[:, 0] == 'E') | (df[:, 0] == 'F') | (df[:, 0] == 'G') | (df[:, 0] == 'H') |
#          (df[:, 0] == 'I') | (df[:, 0] == 'J') | (df[:, 0] == 'K') | (df[:, 0] == 'L') |
#          (df[:, 0] == 'M') | (df[:, 0] == 'N') | (df[:, 0] == 'O')]
data = df[:, 1:]
data = scale(data)
labels = df[:, 0]
n_digits = len(np.unique(labels))

print(data.std())
ica = FastICA(n_components=16, max_iter=200).fit_transform(data)
print(ica.shape)
print(ica.std())

# ###############################################################################
# # Visualize the results on PCA-reduced data
#
# reduced_data = PCA(n_components=2).fit_transform(data)
# kmeans = KMeans(init='k-means++', n_clusters=n_digits, n_init=10)
# kmeans.fit(reduced_data)
#
# # Step size of the mesh. Decrease to increase the quality of the VQ.
# h = .02     # point in the mesh [x_min, x_max]x[y_min, y_max].
#
# # Plot the decision boundary. For that, we will assign a color to each
# x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1
# y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1
# xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Example #2
0
    tmp = tmp.kurt(axis=0)
    kurt.append(tmp.abs().mean())

# Plot ICA
plt.figure()
plt.title("ICA Kurtosis: " + Dataset)
plt.xlabel("Independent Components")
plt.ylabel("Avg Kurtosis Across IC")
plt.plot(dims, kurt, 'b-')
plt.grid(False)
plt.show()

###################################################################
# ICA Dimensional Reduction
X1 = ICA(n_components=5, random_state=5).fit_transform(X1)
X1 /= X1.std(axis=0)
###################################################################

Kclusters = range(2, 50, 2)
km_sil_scores = []
km_homo_scores = []
km_inertia_scores = []
km_fitness_times = []

for k in Kclusters:
    t1 = time.time()
    km = KMeans(n_clusters=k, n_init=10, random_state=100, n_jobs=-1).fit(X1)
    t2 = time.time()

    km_fitness_times.append(t2 - t1)
    km_sil_scores.append(silhouette_score(X1, km.labels_))
Example #3
0
#https://scikit-learn.org/stable/datasets/index.html
from sklearn.datasets import load_digits
data_digits = load_digits()
#X1, Y1 = pd.DataFrame(data_digits["data"]), pd.Series(data_digits["target"])
#Dataset = "digits"

from sklearn.datasets import load_wine
data_wine = load_wine()
X1, Y1 = pd.DataFrame(data_wine["data"],columns=data_wine.feature_names), pd.Series(data_wine["target"])
Dataset = "wine"


Xraw = X1
Xpca = PCA(n_components=5,random_state=5).fit_transform(X1)
Xica = ICA(n_components=5,random_state=5).fit_transform(X1)
Xica /= Xica.std(axis=0)
Xrca = RCA(n_components=5,random_state=5).fit_transform(X1)

# Run RFC
#rfc = RFC(n_estimators=500,min_samples_leaf=round(len(X1)*.01),random_state=5,n_jobs=-1)
#imp = rfc.fit(X1,Y1).feature_importances_
#imp = pd.DataFrame(imp,columns=['Feature Importance'])
#imp.sort_values(by=['Feature Importance'],inplace=True,ascending=False)
#imp['Cum Sum'] = imp['Feature Importance'].cumsum()
#imp = imp[imp['Cum Sum']<=0.35]
#top_cols = imp.index.tolist()
#Xrfc = X1[top_cols]



def MLP_classifier(X, Y, datasource):