def ica(self, n_comp, data=None): if data is None: data = self.train # Initializing the PCA instance with a percentage is telling the algorithm how much variance we wish to keep within the # dataset. Dropping below 80% we would be losing a lot of data ica_features = ICA(n_comp) ica_features.fit(data) self.ica_train_data = ica_features.transform(data) self.ICA = ica_features ica_test = ICA(n_comp) ica_test.fit(self.test) self.ica_test_data = ica_test.transform(self.test)
def ica(tx, ty, rx, ry, dataset): reduced_data = ICA(n_components=2).fit_transform(tx) em(tx, ty, rx, ry, reduced_data, add="", times=4, dataset=dataset, alg="ICA") x,y = tx.shape for i in range(0, y): print(kurtosis(tx[:,i], fisher=False)) compressor = ICA(n_components = tx[1].size/2, max_iter=10000, tol=0.001) # for some people, whiten needs to be off compressor.fit(tx, y=ty) newtx = compressor.transform(tx) newrx = compressor.transform(rx) # km(newtx, ty, newrx, ry, [], add="", times=7, dataset=dataset, alg="ICA") # Store results of PCA in a data frame pca = ICA(n_components=2) pca.fit(tx) result=pd.DataFrame(pca.transform(tx), columns=['PCA%i' % i for i in range(2)]) my_color = pd.Series(ty).astype('category').cat.codes fig = plt.figure() # ax = fig.add_subplot(111, projection='2d') ax = fig.add_subplot(111) ax.scatter(result['PCA0'], result['PCA1'], c=my_color, cmap="Dark2_r", s=60) plt.show() result=pd.DataFrame(compressor.transform(tx), columns=['ICA%i' % i for i in range(3)]) my_color = pd.Series(ty).astype('category').cat.codes fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(result['ICA0'], result['ICA1'], result['ICA2'], c=my_color, cmap="Dark2_r", s=60) xAxisLine = ((min(result['ICA0']), max(result['ICA0'])), (0, 0), (0,0)) ax.plot(xAxisLine[0], xAxisLine[1], xAxisLine[2], 'r') yAxisLine = ((0, 0), (min(result['ICA1']), max(result['ICA1'])), (0,0)) ax.plot(yAxisLine[0], yAxisLine[1], yAxisLine[2], 'r') zAxisLine = ((0, 0), (0,0), (min(result['ICA2']), max(result['ICA2']))) ax.plot(zAxisLine[0], zAxisLine[1], zAxisLine[2], 'r') ax.set_xlabel("IC1") ax.set_ylabel("IC2") ax.set_zlabel("IC3") ax.set_title("ICA on the Phishing data set") plt.show() print("-----------") x,y = newtx.shape for i in range(0, y): print(kurtosis(newtx[:,i], fisher=False)) em(newtx, ty, newrx, ry, add="wICAtr", times=21, dataset=dataset, alg="ICA") em(newtx, ty, newrx, ry, ICA(n_components=2).fit_transform(tx), add="wICAtr", times=9, dataset=dataset, alg = "Ica") nn(newtx, ty, newrx, ry, add="wICAtr") myNN(newtx, ty, newrx, ry, "ica")
def reduce_dim(data, labels, n_components, **kwargs): ''' performs dimensionality reduction''' if kwargs['method'] == 'pca': matrix = data #transformer = Normalizer() #transformer.fit(matrix) pca = PCA(n_components=n_components, svd_solver='full') pca.fit(matrix) #return pca.fit_transform(matrix) #pass return pca.transform(matrix) if kwargs['method'] == 'lda': transformer = Normalizer() label = labels matrix = data transformer.fit(matrix) lda = LDA(n_components=n_components) lda.fit(transformer.transform(matrix), label) return lda.transform(matrix) #pass if kwargs['method'] == 'ica': matrix = data ica = ICA(n_components=n_components, random_state=0) return ica.fit_transform(matrix)
def ica_analysis(X, y, plot_path): feat_cols = list(X) df = X #pd.DataFrame(X,columns=feat_cols) df['y'] = y df['label'] = df['y'].apply(lambda i: str(i)) X, y = None, None print('Size of the dataframe: {}'.format(df.shape)) # For reproducability of the results #np.random.seed(42) #rndperm = np.random.permutation(df.shape[0]) label_list = df['y'].tolist() label_list = list(map(int, label_list)) print(label_list) ica = ICA(n_components=3) ica_result = ica.fit_transform(df[feat_cols].values) df['ica-one'] = ica_result[:, 0] df['ica-two'] = ica_result[:, 1] df['ica-three'] = ica_result[:, 2] ax = plt.figure(figsize=(16, 10)).gca(projection='3d') ax.scatter(xs=df["ica-one"], ys=df["ica-two"], zs=df["ica-three"], c=label_list, cmap='tab10') ax.set_xlabel('ica-one') ax.set_ylabel('ica-two') ax.set_zlabel('ica-three') plt.savefig(plot_path + '/ica_3D')
def reduce_dim(data,labels,n_components,**kwargs): ''' performs dimensionality reduction''' if kwargs['method'] == 'pca': matrix = data pca = PCA(n_components=n_components) pca.fit(matrix) #return pca.fit_transform(matrix) #pass return pca.transform(matrix) if kwargs['method'] == 'lda': label = labels matrix = data lda = LDA(n_components = n_components) lda.fit(matrix,label) LDA(n_components= n_components, priors=None, shrinkage=None, solver='svd', store_covariance=False, tol=0.0001) #return lda.fit_transform(matrix,label) return lda.transform(matrix) pass if kwargs['method'] == 'ica': matrix = data ica = ICA(n_components = n_components,random_state = 0) return ica.fit_transform(matrix)
def split_ica(combined_data, label_1, label_2): ica = ICA() result = ica.fit(combined_data).transform(combined_data) plt.plot(result[0:100, 0], result[0:100, 1], 'o', markersize=7, color='blue', alpha=0.5, label=label_1) plt.plot(result[100:200, 0], result[100:200, 1], '^', markersize=7, color='red', alpha=0.5, label=label_2) plt.xlabel('x_values') plt.ylabel('y_values') plt.xlim([-0.3, 0.3]) plt.ylim([-0.3, 0.3]) plt.legend() #plt.title('Transformed samples with class labels from matplotlib.mlab.PCA()') plt.show() return result
def ica(tx, ty, rx, ry): compressor = ICA(whiten=True) # for some people, whiten needs to be off newtx = compressor.fit_transform(tx) newrx = compressor.fit_transform(rx) em(newtx, ty, newrx, ry, add="wICAtr", times=10) km(newtx, ty, newrx, ry, add="wICAtr", times=10) nn(newtx, ty, newrx, ry, add="wICAtr")
def caller(tx, ty, rx, ry): nums = [4,8,12,16] for n in nums: print("PCA") print(n) compressor = PCA(n_components = n) compressor.fit(tx, y=ty) newtx = compressor.transform(tx) newrx = compressor.transform(rx) nnTable(newtx, ty, newrx, ry, alg="PCA") for n in nums: print("ICA") print(n) compressor = ICA(n_components = n) compressor.fit(tx, y=ty) newtx = compressor.transform(tx) newrx = compressor.transform(rx) nnTable(newtx, ty, newrx, ry, alg="ICA") for n in nums: print("RandProj") print(n) compressor = RandomProjection(n) compressor.fit(tx, y=ty) newtx = compressor.transform(tx) newrx = compressor.transform(rx) nnTable(newtx, ty, newrx, ry, alg="PCA") for n in nums: print("kbest") print(n) compressor = best(k=n) compressor.fit(tx, y=ty) newtx = compressor.transform(tx) newrx = compressor.transform(rx) nnTable(newtx, ty, newrx, ry, alg="PCA")
def graphCallerNN(tx, ty, rx, ry): n = tx[1].size/2 compressor = PCA(n_components = n) compressor.fit(tx, y=ty) newtx = compressor.transform(tx) newrx = compressor.transform(rx) newtx = oneem(newtx, ty, newrx, ry) myNN(newtx, ty, newrx, ry, "EM-PCA") # nnTable(newtx, ty, newrx, ry, alg="EM-PCA") compressor = ICA(n_components = n) compressor.fit(tx, y=ty) newtx = compressor.transform(tx) newrx = compressor.transform(rx) newtx = oneem(newtx, ty, newrx, ry) nnTable(newtx, ty, newrx, ry, alg="EM-ICA") myNN(newtx, ty, newrx, ry, "EM-Ica") compressor = RandomProjection(n) compressor.fit(tx, y=ty) newtx = compressor.transform(tx) newrx = compressor.transform(rx) newtx = oneem(newtx, ty, newrx, ry) nnTable(newtx, ty, newrx, ry, alg="EM-RP") myNN(newtx, ty, newrx, ry, "EM-RP") compressor = best(k=n) compressor.fit(tx, y=ty) newtx = compressor.transform(tx) newrx = compressor.transform(rx) newtx = oneem(newtx, ty, newrx, ry) nnTable(newtx, ty, newrx, ry, alg="EM-KB") myNN(newtx, ty, newrx, ry, "EM-KB")
def ica_orth(A, r=None): if r is None: r = pca_rank_est(A) I = ICA(n_components=r).fit(A.T) P = I.transform(A.T) K = A @ P return K
def apply_ica(): cons = np.mat(np.zeros((data.shape[1], data.shape[1]))) for components in num_components: pca = ICA(n_components=components) X_r = pca.fit_transform(data.T) cons += connectivity(data, X_r.T) C = 1 - (cons / len(num_components)) return C, X_r
def credit_risk_data(): data_X = credit_data.drop([ 'credit_amount', 'other_parties', 'purpose', 'own_telephone', 'foreign_worker' ], axis=1) data_y = credit_data[['class']] features_to_encode = [ 'personal_status', 'checking_status', 'credit_history', 'savings_status', 'employment', 'property_magnitude', 'other_payment_plans', 'housing', 'job', 'class' ] enc = my_encoder() enc.fit(data_X, features_to_encode) X_train = enc.transform(data_X) # X_test = enc.transform(X_test) run_PCA(X_train, "Credit Data") run_ICA(X_train, "Credit Data") run_RCA(X_train, "Credit Data") pca_credit = PCA(n_components=3, random_state=5).fit_transform(X_train) ica_credit = ICA(n_components=2, random_state=5).fit_transform(X_train) rca_credit = RCA(n_components=29, random_state=5).fit_transform(X_train) run_kmeans(pca_credit, X_train, "KMEANS") run_kmeans(ica_credit, X_train, "KMEANS") run_kmeans(rca_credit, X_train, "KMEANS") run_EM(pca_credit, X_train, 'PCA Credit Risk Data') run_EM(ica_credit, X_train, 'ICA Credit Risk Data') run_EM(rca_credit, X_train, 'RCA Credit Risk Data') km = KMeans(n_clusters=3, random_state=0) y_km = km.fit_predict(X_train) score = silhouette_score(X_train, km.labels_, metric='euclidean') print('Silhouetter Score: %.3f' % score) # kmeans_silhoutte_analysis(X_train) elbow_function(X_train) run_kmeans(X_train, y_km, "KMEANS") em = EM(n_components=2, covariance_type='spherical', random_state=100) y_em = em.fit_predict(X_train) plot_EM(em, X_train) run_EM(X_train, y_em, "EM") # evaluate_EM(em, X_train, y_em) X_train, X_test, y_train, y_test = train_test_split(data_X, data_y, test_size=0.2, random_state=0)
def ica(tx, ty, rx, ry): compressor = ICA(whiten=True) # for some people, whiten needs to be off newtx = compressor.fit_transform(tx) newrx = compressor.fit_transform(rx) kurtS = kurtosis(compressor.components_, axis = 1) kurtIdx = np.argmax(kurtS) print kurtS print 'Kurtosis: ' +str(kurtS[kurtIdx]) # em(newtx, ty, newrx, ry, add="wICAtr", times=10) # km(newtx, ty, newrx, ry, add="wICAtr", times=10) nn(newtx, ty, newrx, ry, add="wICA")
def transform(X, factors, get_model, method, y=None): if method == "raw" or method is None: return X if not factors or factors == "full": factors = np.prod(X.shape[1:]) if method == "lda": factors -= 1 if not isinstance(method, str): raise RuntimeError("Please supply a method name (pca, lda, ica, cca, pls)") method = method.lower() if method == "pca": from sklearn.decomposition import PCA model = PCA(n_components=factors, whiten=True) elif method == "lda": from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA model = LDA(n_components=factors) elif method == "ica": from sklearn.decomposition import FastICA as ICA model = ICA(n_components=factors) elif method == "cca": from sklearn.cross_decomposition import CCA model = CCA(n_components=factors) elif method == "pls": from sklearn.cross_decomposition import PLSRegression as PLS model = PLS(n_components=factors) if str(y.dtype)[:3] not in ("flo", "int"): y = dummycode(y, get_translator=False) else: raise ValueError("Method {} unrecognized!".format(method)) X = rtm(X) if method in ("lda", "cca", "pls"): if y is None: raise RuntimeError("y must be supplied for {}!".format(method)) latent = model.fit_transform(X, y) else: if y is not None: warnings.warn("y supplied for {}. Ignoring!".format(method)) latent = model.fit_transform(X) if isinstance(latent, tuple): latent = latent[0] if get_model: return latent, model else: return latent
def find_ica(self, n_comp, data=None): if data is None: data = self.train ica_kutosis = pd.DataFrame() ss = ShuffleSplit(n_splits=10, test_size=0.2) for x in range(2, n_comp + 1): ica_kutosis_train = 0 ica_kutosis_cv = 0 for train, cv in ss.split(self.train): ica_features = ICA(n_components=x, algorithm='parallel', max_iter=500) ica_features.fit(data.iloc[train]) ica_train_data = ica_features.transform(data.iloc[train]) ica_kutosis_train += np.sum(np.abs(kurtosis(ica_train_data))) ica_features.fit(data.iloc[cv]) ica_cv_data = ica_features.transform(data.iloc[cv]) ica_kutosis_cv += np.sum(np.abs(kurtosis(ica_cv_data))) ica_kutosis_train = ica_kutosis_train / 10 ica_kutosis_cv = ica_kutosis_cv / 10 ica_kutosis = ica_kutosis.append( { "K": x, "Kurtosis_Train": ica_kutosis_train, "Kurtosis_CV": ica_kutosis_cv }, ignore_index=True) print ica_kutosis plt.plot(ica_kutosis["K"], ica_kutosis["Kurtosis_Train"], color='red', label="Train") plt.plot(ica_kutosis["K"], ica_kutosis["Kurtosis_CV"], color='blue', label="CV") plt.xlabel("N Components") plt.ylabel("Absolute Sum Kurtosis") plt.title("ICA: N Components vs Kurtosis") plt.grid() plt.legend() plt.show()
def ICA_experiment(X, y, title, folder=""): n_components_range = list(np.arange(2, X.shape[1], 1)) ica = ICA(random_state=200) kurtosis_scores = [] for n in n_components_range: ica.set_params(n_components=n) ice_score = ica.fit_transform(X) ice_score = pd.DataFrame(ice_score) ice_score = ice_score.kurt(axis=0) kurtosis_scores.append(ice_score.abs().mean()) plt.figure() plt.title("ICA Kurtosis: " + title) plt.xlabel("Independent Components") plt.ylabel("Avg Kurtosis Across IC") plt.plot(n_components_range, kurtosis_scores) plt.savefig(folder + '/ICA.png') plt.close()
def run_ICA(X, title): dims = list(np.arange(2, (X.shape[1] - 1), 3)) dims.append(X.shape[1]) ica = ICA(random_state=5) kurt = [] for dim in dims: ica.set_params(n_components=dim) tmp = ica.fit_transform(X) tmp = pd.DataFrame(tmp) tmp = tmp.kurt(axis=0) kurt.append(tmp.abs().mean()) plt.figure() plt.title("ICA Kurtosis: " + title) plt.xlabel("Independent Components") plt.ylabel("Avg Kurtosis Across IC") plt.plot(dims, kurt, 'b-') plt.grid(False) plt.show()
def do_ica(data, class_label): # ica ica = ICA() result = ica.fit(data).transform(data) plt.plot(result[:, 0], result[:, 1], 'o', markersize=7, color='blue', alpha=0.5, label=class_label) plt.xlabel('x_values') plt.ylabel('y_values') plt.xlim([-0.5, 0.5]) plt.ylim([-0.5, 0.5]) plt.legend() plt.title('Transformed samples versus original data') plt.show()
def run_ICA(X, y, plot_path): dims = list(np.arange(2, (X.shape[1] - 1), 3)) #dims = list(np.arange(2,80,3)) dims.append(X.shape[1]) ica = ICA(random_state=1, max_iter=10) kurt = [] for dim in dims: print(dim) ica.set_params(n_components=dim) tmp = ica.fit_transform(X) tmp = pd.DataFrame(tmp) tmp = tmp.kurt(axis=0) kurt.append(tmp.abs().mean()) plt.figure() plt.title("ICA Kurtosis") plt.xlabel("Independent Components") plt.ylabel("Avg Kurtosis Across IC") plt.plot(dims, kurt, 'b-') plt.grid(False) plt.savefig(plot_path + '/ICA_DR')
def run_ICA(X, y, title): dims = list(np.arange(2, (X.shape[1] - 1), 3)) dims.append(X.shape[1]) ica = ICA(random_state=randomSeed, whiten=True) kurt = [] for dim in dims: ica.set_params(n_components=dim) tmp = ica.fit_transform(X) tmp = pd.DataFrame(tmp) tmp = tmp.kurt(axis=0) kurt.append(tmp.abs().mean()) plt.figure() plt.title("ICA Kurtosis: " + title) plt.xlabel("Independent Components") plt.ylabel("Avg Kurtosis Across IC") plt.plot(dims, kurt, 'b-') plt.grid(False) d = plotsdir + "/" + title if not os.path.exists(d): os.makedirs(d) plt.savefig(d + "/ICA Kurtosis.png")
X = [] y = [] for l in fin: X.append(l.split(",")[:-2]) y.append(int(l.split(",")[-2])) X = np.array(X,dtype=np.float32) X = np.array(X,dtype=np.float32) scaler = StandardScaler() scaler.fit(X) X = scaler.transform(X) ica = ICA(n_components=4) ica.fit(X) X = ica.transform(X) #plot_bic(X) gmm = mixture.GaussianMixture(n_components=2,covariance_type='tied') gmm.fit(X) newX = [] for pt in X:
run_PCA(diabetes_X,diabetes_Y,"Diabetes Data") run_ICA(diabetes_X,diabetes_Y,"Diabetes Data") run_RCA(diabetes_X,diabetes_Y,"Diabetes Data") X_train, X_test, y_train, y_test = train_test_split(np.array(creditX),np.array(creditY), test_size=0.2) run_PCA(X_train,creditY,"Credit Data") run_ICA(X_train,creditY,"Credit Data") run_RCA(X_train,creditY,"Credit Data") imp_diabetes, topcols_diabetes = run_RFC(diabetes_X,diabetes_Y,df_diabetes) pca_diabetes = PCA(n_components=3,random_state=5).fit_transform(diabetes_X) ica_diabetes = ICA(n_components=5,random_state=5).fit_transform(diabetes_X) rca_diabetes = ICA(n_components=6,random_state=5).fit_transform(diabetes_X) rfc_diabetes = df_diabetes[topcols_diabetes] rfc_diabetes = np.array(rfc_diabetes.values,dtype='int64') run_kmeans(pca_diabetes,diabetes_Y,'PCA Diabetes Data') run_kmeans(ica_diabetes,diabetes_Y,'ICA Diabetes Data') run_kmeans(rca_diabetes,diabetes_Y,'RCA Diabetes Data') run_kmeans(rfc_diabetes,diabetes_Y,'RFC Diabetes Data') run_EM(pca_diabetes,diabetes_Y,'PCA Diabetes Data') run_EM(ica_diabetes,diabetes_Y,'ICA Diabetes Data') run_EM(rca_diabetes,diabetes_Y,'RCA Diabetes Data')
import pandas as pd import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from sklearn.decomposition import FastICA as ICA import sklearn.model_selection as k r=pd.read_csv('bank_contacts.csv') x=r.drop('credit_application',axis=1) y=r['credit_application'] train_x,test_x,train_y,test_y=k.train_test_split(x,y,test_size=0.2,random_state=42) sc=StandardScaler() train_x=sc.fit_transform(train_x) test_x=sc.transform(test_x) ica=ICA(n_components=4,random_state=42) train_x=ica.fit_transform(train_x,train_y) test_x=ica.transform(test_x) from sklearn.ensemble import RandomForestClassifier classifier = RandomForestClassifier(max_depth=2, random_state=0) classifier.fit(train_x,train_y) pred = classifier.predict(test_x) from sklearn.metrics import confusion_matrix from sklearn.metrics import accuracy_score print(confusion_matrix(test_y,pred)) print('Accuracy:',accuracy_score(test_y,pred)) plt.scatter(pred,test_x[:,0],marker='o') plt.scatter(pred,test_x[:,1],marker='o') plt.scatter(pred,test_x[:,2],marker='o') plt.show()
ica = ICA(n_components = i) Z = ica.fit_transform(X) kur[i-1] = np.mean(kurtosis(Z)) scr[i-1] = np.amin(kurtosis(Z)) plt.plot(ks,kur,ks,scr) plt.title("Adult Income Data - ICA") plt.xlabel("# of components") plt.ylabel("Score") plt.legend(["kurtosis-avg",'kurtosis-min axis']) plt.show() """ pca = ICA(n_components=6) Z = pca.fit_transform(X) for k in ks: clust = KMeans(n_clusters=k).fit(Z) W = clust.predict(Z) ss[k - 1] = clust.inertia_ plt.plot(ks, ss) plt.title("Wine Quality Data - KM") plt.xlabel("# of clusters") plt.ylabel("Sum of Squares") plt.legend(["kmeans"]) plt.show() for k in ks: clust = GaussianMixture(n_components=k).fit(Z)
# play(song*3) plt.plot(np.arange(1000)**2) signal1 = np.sin(np.arange(256) * 0.15) signal2 = np.sin(np.arange(256) * 0.06) combinations = np.dot(np.random.random((200, 2)), np.vstack( (signal1, signal2))) combinations += np.random.random(combinations.shape) * .1 PCA_t = PCA(n_components=10) PCA_t.fit(combinations) ideal_n = np.where(np.cumsum(PCA_t.explained_variance_ratio_) > .99)[0][0] + 1 ICA_t = ICA(n_components=ideal_n, tol=1e-8) scores = ICA_t.fit_transform(combinations) mixings = ICA_t.mixing_ loadings = ICA_t.components_ # tops = mixings plt.close() fig, ax = plt.subplots(2, 2) ax[0, 0].plot(signal1) ax[0, 1].plot(signal2) for i in range(combinations.shape[0]): ax[1, 0].plot(combinations[i, :]) for i in range(loadings.shape[0]): ax[1, 1].plot(loadings[i, :])
def separateComponents(self): self.ica = ICA(n_components=len(self.signals[0]), max_iter=300) self.components = np.matrix.transpose( self.ica.fit_transform(self.signals)) self.amUnits = [np.amax(self.components), np.amin(self.components)] self.selectedComponents = list(range(len(self.components)))
def get_ica_data(X, components): return ICA(n_components=components, random_state=randomSeed).fit_transform(X)
def main(): df = pd.read_csv("../Dataset/winequality-white.csv", delimiter=";") seed = 200 np.random.seed(seed) lowquality = df.loc[df['quality'] <= 6].index highquality = df.loc[df['quality'] > 6].index df.iloc[lowquality, df.columns.get_loc('quality')] = 0 df.iloc[highquality, df.columns.get_loc('quality')] = 1 X = np.array(df.iloc[:, 0:-1]) wine_Y = np.array(df.iloc[:, -1]) standardScalerX = StandardScaler() wine_x = standardScalerX.fit_transform(X) pca_wine = PCA(n_components=7, random_state=seed).fit_transform(wine_x) ica_wine = ICA(n_components=9, random_state=seed).fit_transform(wine_x) rca_wine = RCA(n_components=8, random_state=seed).fit_transform(wine_x) imp_wine, top_columns_wine = run_RFC(wine_x, wine_Y, df) rfc_wine = df[top_columns_wine] rfc_wine = np.array(rfc_wine.values, dtype='int64') X_train, X_test, y_train, y_test = train_test_split(np.array(wine_x), np.array(wine_Y), test_size=0.30) learner = MLPClassifier(hidden_layer_sizes=(22, ), activation='relu', learning_rate_init=0.0051, random_state=seed) evaluate(learner, X_train, X_test, y_train, y_test, title="FullDataset") X_train, X_test, y_train, y_test = train_test_split(np.array(pca_wine), np.array(wine_Y), test_size=0.30) learner = MLPClassifier(hidden_layer_sizes=(22, ), activation='relu', learning_rate_init=0.0051, random_state=seed) evaluate(learner, X_train, X_test, y_train, y_test, title="PCA") X_train, X_test, y_train, y_test = train_test_split(np.array(ica_wine), np.array(wine_Y), test_size=0.30) learner = MLPClassifier(hidden_layer_sizes=(22, ), activation='relu', learning_rate_init=0.0051, random_state=seed) evaluate(learner, X_train, X_test, y_train, y_test, title="ICA") X_train, X_test, y_train, y_test = train_test_split(np.array(rca_wine), np.array(wine_Y), test_size=0.30) learner = MLPClassifier(hidden_layer_sizes=(22, ), activation='relu', learning_rate_init=0.0051, random_state=seed) evaluate(learner, X_train, X_test, y_train, y_test, title="RP") X_train, X_test, y_train, y_test = train_test_split(np.array(rfc_wine), np.array(wine_Y), test_size=0.30) learner = MLPClassifier(hidden_layer_sizes=(22, ), activation='relu', learning_rate_init=0.0051, random_state=seed) evaluate(learner, X_train, X_test, y_train, y_test, title="RFC")
ttX,ttY,bankX,bankY = import_data() X_train, X_test, y_train, y_test = train_test_split(np.array(bankX),np.array(bankY), test_size=0.2) run_PCA(X_train,y_train,"Banking Data") #run_ICA(X_train,y_train,"Banking Data") #run_RCA(bankX,bankY,"Banking Data") #imp_bank, topcols_bank = run_RFC(X_train,y_train,df_bank) #imp_bank # In[15]: ttX,ttY,bankX,bankY = import_data() imp_tt, topcols_tt = run_RFC(ttX,ttY,df_tt) pca_tt = PCA(n_components=10,random_state=5).fit_transform(ttX) ica_tt = ICA(n_components=38,random_state=5).fit_transform(ttX) rca_tt = ICA(n_components=29,random_state=5).fit_transform(ttX) rfc_tt = df_tt[topcols_tt] rfc_tt = np.array(rfc_tt.values,dtype='int64') # In[26]: print('pca') run_kmeans(pca_tt,ttY,'PCA titanic Data') print('ica') run_kmeans(ica_tt,ttY,'ICA titanic Data') print('rca') run_kmeans(rca_tt,ttY,'RCA titanic Data') print('rfc')
run_PCA(loanX, loanY, "Loan Data") run_ICA(loanX, loanY, "Loan Data") run_RCA(loanX, loanY, "Loan Data") imp_loan, topcols_loan = run_RFC(loanX, loanY, df_loan) loanX, loanY, telescopeX, telescopeY = import_data() run_PCA(telescopeX, telescopeY, "Telescope Data") run_ICA(telescopeX, telescopeY, "Telescope Data") run_RCA(telescopeX, telescopeY, "Telescope Data") imp_telescope, topcols_telescope = run_RFC(telescopeX, telescopeY, df_telescope) loanX, loanY, telescopeX, telescopeY = import_data() imp_loan, topcols_loan = run_RFC(loanX, loanY, df_loan) pca_loan = PCA(n_components=4, random_state=5).fit_transform(loanX) ica_loan = ICA(n_components=8, random_state=5).fit_transform(loanX) rca_loan = ICA(n_components=6, random_state=5).fit_transform(loanX) rfc_loan = df_loan[topcols_loan] rfc_loan = np.array(rfc_loan.values, dtype='int64') run_kmeans(pca_loan, loanY, 'PCA loan Data') run_kmeans(ica_loan, loanY, 'ICA loan Data') run_kmeans(rca_loan, loanY, 'RCA loan Data') run_kmeans(rfc_loan, loanY, 'RFC loan Data') evaluate_kmeans(KMeans(n_clusters=10, n_init=10, random_state=100, n_jobs=-1), pca_loan, loanY) evaluate_kmeans(KMeans(n_clusters=10, n_init=10, random_state=100, n_jobs=-1), ica_loan, loanY) evaluate_kmeans(KMeans(n_clusters=5, n_init=10, random_state=100, n_jobs=-1), rca_loan, loanY)