예제 #1
0
def fzclustering(users_skills, n_clusters_range, fuzzpar, plot=False):
    X = users_skills
    n_clusters_range = list(n_clusters_range)

    fzmodels = {}
    times = []
    fpcs = []

    # Find the best number of clusters
    for n_clusters_ in n_clusters_range:
        # another library
        start = time.time()
        fuzzy_fcm = FCM(n_clusters=n_clusters_,
                        max_iter=50,
                        m=fuzzpar,
                        error=1e-5,
                        random_state=37)
        fuzzy_fcm.fit(X)
        end = time.time()
        times.append(end - start)

        #print("Number of fuzzy clusters " + str(n_clusters_) + ' duration: ' + str((end - start)))

        fcm_centers = fuzzy_fcm.centers
        fcm_labels = fuzzy_fcm.predict(X)

        fuzzy_clustering_coeff = fuzzy_fcm.partition_coefficient
        pec = fuzzy_fcm.partition_entropy_coefficient

        fpcs.append(fuzzy_clustering_coeff)

        fzmodels[
            n_clusters_] = fcm_centers, fcm_labels, fuzzy_clustering_coeff, fuzzy_fcm

    best_centers = max(fzmodels.values(), key=lambda x: x[2])

    if plot:
        plt.figure()
        plt.title(f"Fuzzy c-means over number of clusters")
        plt.xlabel("Number of clusters")
        plt.xticks(n_clusters_range)
        plt.ylabel("Fuzzy partition coefficient")
        plt.plot(n_clusters_range, fpcs)
        plt.tight_layout()
        plt.savefig(f"clustering_Fuzzy_1.png")
        plt.close()

    return best_centers, times
def fzclustering(users_skills, n_clusters_range, plot=False):
    X = users_skills
    n_clusters_range = list(n_clusters_range)

    fzmodels_2 = {}

    fpcs_2 = []

    # Find the best number of clusters
    for n_clusters_ in n_clusters_range:
        # another library
        fuzzy_fcm = FCM(n_clusters=n_clusters_, max_iter=50, m=1.2, error=1e-5, random_state=88)
        fuzzy_fcm.fit(X)

        fcm_centers = fuzzy_fcm.centers
        fcm_labels = fuzzy_fcm.predict(X)

        fuzzy_clustering_coeff = fuzzy_fcm.partition_coefficient
        pec = fuzzy_fcm.partition_entropy_coefficient

        fpcs_2.append(fuzzy_clustering_coeff)

        fzmodels_2[n_clusters_] = fcm_centers, fcm_labels, fuzzy_clustering_coeff

    best_centers_2 = max(fzmodels_2.values(), key=lambda x: x[2])

    if plot:
        plt.figure()
        plt.title(f"Fuzzy c-means over number of clusters")
        plt.xlabel("Number of clusters")
        plt.xticks(n_clusters_range)
        plt.ylabel("Fuzzy partition coefficient (FPC)")
        plt.plot(n_clusters_range, fpcs_2)
        plt.tight_layout()
        plt.savefig(f"Fuzzy partition coefficient")
        plt.close()

    return best_centers_2
class RBF:
    def __init__(self, gamma=0.1):
        self.gamma = gamma
        self.G = None
        self.C = None
        self.W = None
        self.fcm = None

    def fit(self, k, X_train, y_train):
        n, m = X_train.shape
        self.fcm = FCM(n_clusters=k)
        fcm = self.fcm.fit(X_train)
        V = fcm.centers
        U = fcm.u
        self.G = np.ndarray(shape=(n, k))
        self.C = np.zeros(shape=(k, m, m))
        for i in range(k):
            sm = 0
            for j in range(n):
                diff = np.array(X_train[j] - V[i]).reshape(-1, 1)
                self.C[i] += (U[j, i]**m) * diff * (diff.transpose())
                sm += U[j, i]**m
            self.C[i] /= sm
        self.C = np.array([np.linalg.inv(c) for c in self.C])

        for i in range(k):
            for j in range(n):
                diff = np.array(X_train[j] - V[i]).reshape(-1, 1)
                self.G[j, i] = np.exp(
                    -self.gamma * (diff.transpose().dot(self.C[i])).dot(diff))

        ohe = OneHotEncoder(sparse=False)
        Y = ohe.fit_transform(y_train)
        self.W = np.linalg.inv(self.G.T.dot(self.G)).dot(self.G.T).dot(Y)
        return self

    def predict(self, X_test):
        n, m = X_test.shape
        V = self.fcm.centers
        U = self.fcm.predict(X_test)
        k = self.fcm.n_clusters
        G = np.ndarray(shape=(n, k))
        C = np.zeros(shape=(k, m, m))
        for i in range(k):
            sm = 0
            for j in range(n):
                diff = np.array(X_test[j] - V[i])
                C[i] += (U[j, i]**m) * diff * (diff.transpose())
                sm += U[j, i]**m
            C[i] /= sm
        C = np.array([np.linalg.pinv(c) for c in C])
        for i in range(k):
            for j in range(n):
                diff = np.array(X_test[j] - V[i])
                G[j, i] = np.exp(-self.gamma *
                                 (diff.transpose().dot(C[i])).dot(diff))

        y_pred = np.argmax(G.dot(self.W), axis=1)
        return y_pred

    def get_accuracy(self, y_test, y_pred):
        return np.mean(np.equal(y_test.flatten(), y_pred.flatten()))