X_train = X_train.as_matrix() X_test = X_test.as_matrix() y_train = y_train.as_matrix() y_test = y_test.as_matrix() #Preprocessing the data between 0 and 1 scaler = preprocessing.StandardScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) means_init = np.array([X[y == i].mean(axis=0) for i in range(3)]) ############################################################################################################################## #For Expected Maximization em(X_train, X_test, y_train, y_test,component_list = [3,4,5,6,7,8,9,10,11], num_class = 7, file_no = "phish") ############################################################################################################################# #For KMeans kmeans(X_train, X_test, y_train, y_test, component_list = [3,4,5,6,7,8,9,10,11], num_class = 7, file_no = "phish")
ica_new.fit(X1) X_transformed_f = ica_new.transform(X1) #Clustering after dimensionality reduction print("Clustering ICA") means_init = np.array( [X_transformed_f[Y1 == i].mean(axis=0) for i in range(2)]) #clustering experiments print("Expected Maximization") component_list, array_aic, array_bic, array_homo_1, array_comp_1, array_sil_1, array_avg_log = em( X_train_transformed, X_test_transformed, y_train, y_test, init_means=means_init, component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11], num_class=2, toshow=0) print("KMeans") component_list, array_homo_2, array_comp_2, array_sil_2, array_var = kmeans( X_train_transformed, X_test_transformed, y_train, y_test, init_means=means_init, component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11], num_class=2, toshow=0)
# ### 1. NBA games datasets # In[7]: # Splitting data into training sets and testing sets X_train, X_test, y_train, y_test = train_test_split(X1,Y1, test_size = 0.2) #Preprocessing the data between 0 and 1 scaler = preprocessing.StandardScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) means_init = np.array([X1[Y1 == i].mean(axis=0) for i in range(2)]) em(X_train, X_test, y_train, y_test, init_means = means_init, component_list = [3,4,5,6,7,8,9,10,11], num_class = 2) # ### 2. LOL games datasets # In[8]: # Splitting data into training sets and testing sets X_train, X_test, y_train, y_test = train_test_split(X2,Y2, test_size = 0.2) #Preprocessing the data between 0 and 1 scaler = preprocessing.StandardScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) means_init = np.array([X1[Y1 == i].mean(axis=0) for i in range(7)])
y_train = y_train.as_matrix() y_test = y_test.as_matrix() #Preprocessing the data between 0 and 1 scaler = preprocessing.StandardScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) means_init = np.array([X[y == i].mean(axis=0) for i in range(3)]) ############################################################################################################################## #For Expected Maximization em(X_train, X_test, y_train, y_test, init_means=means_init, component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11], num_class=7, file_no="wine") ############################################################################################################################# #For KMeans kmeans(X_train, X_test, y_train, y_test, init_means=means_init, component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11], num_class=7, file_no="wine")
scaler = preprocessing.StandardScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) # In[30]: means_init = np.array([X[y == i].mean(axis=0) for i in range(2)]) # In[31]: ############################################################################################################################## #For Expected Maximization em(dataset, X_train, X_test, y_train, y_test, init_means=means_init, component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], num_class=2) # In[32]: ############################################################################################################################# #For KMeans kmeans(dataset, X_train, X_test, y_train, y_test, init_means=means_init, component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
################################################################################################################################ #Clustering after dimensionality reduction print("Clustering PCA") #Reducing the dimensions with optimal number of components pca_new = PCA(n_components = gridSearch.best_estimator_.named_steps['pca'].n_components) pca_new.fit(X) X_transformed_f = pca_new.transform(X) #clustering experiments #clustering experiments print("Expected Maximization") component_list, array_aic, array_bic, array_homo_1, array_comp_1, array_sil_1, array_avg_log = em(X_train_transformed, X_test_transformed, y_train, y_test, component_list = [3,4,5,6,7,8,9,10,11], num_class = 7, toshow =0, file_no = "wine_pca") print("KMeans") component_list, array_homo_2, array_comp_2, array_sil_2, array_var = kmeans(X_train_transformed, X_test_transformed, y_train, y_test, component_list = [3,4,5,6,7,8,9,10,11], num_class = 7, toshow =0, file_no = "wine_pca") #Writing data to file component_list = np.array(component_list).reshape(-1,1) array_aic = np.array(array_aic).reshape(-1,1) array_bic = np.array(array_bic).reshape(-1,1) array_homo_1 = np.array(array_homo_1).reshape(-1,1) array_comp_1 = np.array(array_comp_1).reshape(-1,1) array_sil_1 = np.array(array_sil_1).reshape(-1,1) array_avg_log = np.array(array_avg_log).reshape(-1,1) array_homo_2 = np.array(array_homo_2).reshape(-1,1)
y_train = y_train.as_matrix() - 1 y_test = y_test.as_matrix() - 1 #Preprocessing the data between 0 and 1 scaler = preprocessing.StandardScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) means_init = np.array([X[y == i].mean(axis=0) for i in range(11)]) ############################################################################################################################## #For Expected Maximization _ = em(X_train, X_test, y_train, y_test, init_means=means_init, component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], num_class=11, toshow=1) ############################################################################################################################# #For KMeans _ = kmeans(X_train, X_test, y_train, y_test, init_means=means_init, component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], num_class=11, toshow=1)