def run_dr_clustering(): deposit_data = load_cleanse_data() income_data = loadData() pcakMeans(deposit_data, 7, 'deposit', 'manhattan') # running PCA/kmeans for income Data pcakMeans(income_data, 4, 'income', 'euclidean') # running PCA/em for Deposit Data pcaem(deposit_data, 7, 'deposit', 'manhattan') # running PCA/em for Deposit Data pcaem(income_data, 4, 'income', 'euclidean') # ICA for deposit clustering icakMeans(deposit_data, 35, 'deposit', 'manhattan') icaem(deposit_data, 35, 'deposit', 'manhattan') # ICA for income clustering icakMeans(income_data, 12, 'income', 'euclidean') icaem(income_data, 12, 'income', 'euclidean') rpkMeans(deposit_data, 30, 'deposit', 'manhattan') rpem(deposit_data, 30, 'deposit', 'manhattan') # ICA for income clustering rpkMeans(income_data, 8, 'income', 'euclidean') rpem(income_data, 8, 'income', 'euclidean') uvfskMeans(deposit_data, 30, 'deposit', 'manhattan') uvfsem(deposit_data, 30, 'deposit', 'manhattan') uvfskMeans(income_data, 10, 'income', 'euclidean') uvfsem(income_data, 10, 'income', 'euclidean')
def performKmeansNN(): # load deposit dataset data = load_cleanse_data() # Select a range of k to check target_clusters = [7, 10, 15, 20, 25, 30, 35, 40, 41] mlp = MLPClassifier(hidden_layer_sizes=(15, 2), random_state=70, activation='relu', max_iter=500) scoring = ['accuracy'] scores = cross_validate(mlp, data['features'], data['labels'], scoring=scoring, cv=10) print(scores) NN_fit_time = np.mean(scores['fit_time']) NN_accuracy = np.mean(scores['test_accuracy']) kmeans_nn_accuracy = [] kmeans_nn_time = [] for cluster in target_clusters: kmeans = KMeans(n_clusters=cluster, random_state=42) clusters = kmeans.fit_predict(data['features']) scores = cross_validate(mlp, clusters.reshape(-1, 1), data['labels'], scoring=scoring, cv=10) kmeans_nn_accuracy.append(np.mean(scores['test_accuracy'])) kmeans_nn_time.append(np.mean(scores['fit_time'])) print(kmeans_nn_accuracy) print(kmeans_nn_time) plt.style.use("seaborn") plt.figure(figsize=(8, 8)) plt.plot(target_clusters, kmeans_nn_accuracy) plt.xticks(target_clusters) plt.axhline(y=NN_accuracy, color='r', linestyle='-') plt.xlabel("# Clusters") plt.ylabel('NN Accuracy') plt.grid(True) plt.savefig('plots/kmeans_nn/deposit/kmeans_nn_accuracy.png') plt.clf() plt.style.use("seaborn") plt.plot(target_clusters, kmeans_nn_time) plt.xticks(target_clusters) plt.axhline(y=NN_fit_time, color='r', linestyle='-') plt.xlabel("Principal Components") plt.ylabel('NN Fit Time') plt.grid(True) plt.savefig('plots/kmeans_nn/deposit/kmeans_nn_fit_time.png') plt.clf()
def performDepositPCA(): data = load_cleanse_data() pca.perform_pca(data['features'], 'deposit') pca.validate_pca_nn(data, [7, 10, 15, 20, 25, 30, 35, 40, 41], 'deposit')
def performDeposituvfs(): data = load_cleanse_data() uvfs.validate_uvfs_nn(data, [7, 10, 15, 20, 25, 30, 35, 40, 41],'deposit')
def performDepositRandomProjection(): data = load_cleanse_data() randomprojection.apply_rp(data, [7, 10, 15, 20, 25, 30, 35, 40, 41],'deposit',2,3) randomprojection.validate_rp_nn(data, [7, 10, 15, 20, 25, 30, 35, 40, 41],'deposit')
def deposit_clustering(): data = load_cleanse_data() estimate_k(data, 'deposit', 'plots/kmeans/', 'manhattan') validate_k(data, 'plots/kmeans/', 'deposit')
def deposit_em(): data = load_cleanse_data() estimate_em_k(data, 'deposit', 'plots/em/', 'manhattan') validate_em_k(data, 'plots/em/', 'deposit')