def run_dr_clustering():
    deposit_data = load_cleanse_data()
    income_data = loadData()
    pcakMeans(deposit_data, 7, 'deposit', 'manhattan')
    # running PCA/kmeans for income Data
    pcakMeans(income_data, 4, 'income', 'euclidean')
    # running PCA/em for Deposit Data
    pcaem(deposit_data, 7, 'deposit', 'manhattan')
    # running PCA/em for Deposit Data
    pcaem(income_data, 4, 'income', 'euclidean')
    # ICA for deposit clustering
    icakMeans(deposit_data, 35, 'deposit', 'manhattan')
    icaem(deposit_data, 35, 'deposit', 'manhattan')
    # ICA for income clustering
    icakMeans(income_data, 12, 'income', 'euclidean')
    icaem(income_data, 12, 'income', 'euclidean')
    rpkMeans(deposit_data, 30, 'deposit', 'manhattan')
    rpem(deposit_data, 30, 'deposit', 'manhattan')
    # ICA for income clustering
    rpkMeans(income_data, 8, 'income', 'euclidean')
    rpem(income_data, 8, 'income', 'euclidean')
    uvfskMeans(deposit_data, 30, 'deposit', 'manhattan')
    uvfsem(deposit_data, 30, 'deposit', 'manhattan')
    uvfskMeans(income_data, 10, 'income', 'euclidean')
    uvfsem(income_data, 10, 'income', 'euclidean')
Ejemplo n.º 2
0
def performKmeansNN():
    # load deposit dataset
    data = load_cleanse_data()
    # Select a range of k to check
    target_clusters = [7, 10, 15, 20, 25, 30, 35, 40, 41]
    mlp = MLPClassifier(hidden_layer_sizes=(15, 2),
                        random_state=70,
                        activation='relu',
                        max_iter=500)
    scoring = ['accuracy']
    scores = cross_validate(mlp,
                            data['features'],
                            data['labels'],
                            scoring=scoring,
                            cv=10)
    print(scores)
    NN_fit_time = np.mean(scores['fit_time'])
    NN_accuracy = np.mean(scores['test_accuracy'])

    kmeans_nn_accuracy = []
    kmeans_nn_time = []

    for cluster in target_clusters:
        kmeans = KMeans(n_clusters=cluster, random_state=42)
        clusters = kmeans.fit_predict(data['features'])
        scores = cross_validate(mlp,
                                clusters.reshape(-1, 1),
                                data['labels'],
                                scoring=scoring,
                                cv=10)
        kmeans_nn_accuracy.append(np.mean(scores['test_accuracy']))
        kmeans_nn_time.append(np.mean(scores['fit_time']))

    print(kmeans_nn_accuracy)
    print(kmeans_nn_time)

    plt.style.use("seaborn")
    plt.figure(figsize=(8, 8))
    plt.plot(target_clusters, kmeans_nn_accuracy)
    plt.xticks(target_clusters)
    plt.axhline(y=NN_accuracy, color='r', linestyle='-')
    plt.xlabel("# Clusters")
    plt.ylabel('NN Accuracy')
    plt.grid(True)
    plt.savefig('plots/kmeans_nn/deposit/kmeans_nn_accuracy.png')

    plt.clf()

    plt.style.use("seaborn")
    plt.plot(target_clusters, kmeans_nn_time)
    plt.xticks(target_clusters)
    plt.axhline(y=NN_fit_time, color='r', linestyle='-')
    plt.xlabel("Principal Components")
    plt.ylabel('NN Fit Time')
    plt.grid(True)
    plt.savefig('plots/kmeans_nn/deposit/kmeans_nn_fit_time.png')

    plt.clf()
Ejemplo n.º 3
0
def performDepositPCA():
    data = load_cleanse_data()
    pca.perform_pca(data['features'], 'deposit')
    pca.validate_pca_nn(data, [7, 10, 15, 20, 25, 30, 35, 40, 41], 'deposit')
Ejemplo n.º 4
0
def performDeposituvfs():
    data = load_cleanse_data()
    uvfs.validate_uvfs_nn(data, [7, 10, 15, 20, 25, 30, 35, 40, 41],'deposit')
Ejemplo n.º 5
0
def performDepositRandomProjection():
    data = load_cleanse_data()
    randomprojection.apply_rp(data, [7, 10, 15, 20, 25, 30, 35, 40, 41],'deposit',2,3)
    randomprojection.validate_rp_nn(data, [7, 10, 15, 20, 25, 30, 35, 40, 41],'deposit')
Ejemplo n.º 6
0
def deposit_clustering():
    data = load_cleanse_data()
    estimate_k(data, 'deposit', 'plots/kmeans/', 'manhattan')
    validate_k(data, 'plots/kmeans/', 'deposit')
Ejemplo n.º 7
0
def deposit_em():
    data = load_cleanse_data()
    estimate_em_k(data, 'deposit', 'plots/em/', 'manhattan')
    validate_em_k(data, 'plots/em/', 'deposit')