def fzclustering(users_skills, n_clusters_range, fuzzpar, plot=False): X = users_skills n_clusters_range = list(n_clusters_range) fzmodels = {} times = [] fpcs = [] # Find the best number of clusters for n_clusters_ in n_clusters_range: # another library start = time.time() fuzzy_fcm = FCM(n_clusters=n_clusters_, max_iter=50, m=fuzzpar, error=1e-5, random_state=37) fuzzy_fcm.fit(X) end = time.time() times.append(end - start) #print("Number of fuzzy clusters " + str(n_clusters_) + ' duration: ' + str((end - start))) fcm_centers = fuzzy_fcm.centers fcm_labels = fuzzy_fcm.predict(X) fuzzy_clustering_coeff = fuzzy_fcm.partition_coefficient pec = fuzzy_fcm.partition_entropy_coefficient fpcs.append(fuzzy_clustering_coeff) fzmodels[ n_clusters_] = fcm_centers, fcm_labels, fuzzy_clustering_coeff, fuzzy_fcm best_centers = max(fzmodels.values(), key=lambda x: x[2]) if plot: plt.figure() plt.title(f"Fuzzy c-means over number of clusters") plt.xlabel("Number of clusters") plt.xticks(n_clusters_range) plt.ylabel("Fuzzy partition coefficient") plt.plot(n_clusters_range, fpcs) plt.tight_layout() plt.savefig(f"clustering_Fuzzy_1.png") plt.close() return best_centers, times
def fzclustering(users_skills, n_clusters_range, plot=False): X = users_skills n_clusters_range = list(n_clusters_range) fzmodels_2 = {} fpcs_2 = [] # Find the best number of clusters for n_clusters_ in n_clusters_range: # another library fuzzy_fcm = FCM(n_clusters=n_clusters_, max_iter=50, m=1.2, error=1e-5, random_state=88) fuzzy_fcm.fit(X) fcm_centers = fuzzy_fcm.centers fcm_labels = fuzzy_fcm.predict(X) fuzzy_clustering_coeff = fuzzy_fcm.partition_coefficient pec = fuzzy_fcm.partition_entropy_coefficient fpcs_2.append(fuzzy_clustering_coeff) fzmodels_2[n_clusters_] = fcm_centers, fcm_labels, fuzzy_clustering_coeff best_centers_2 = max(fzmodels_2.values(), key=lambda x: x[2]) if plot: plt.figure() plt.title(f"Fuzzy c-means over number of clusters") plt.xlabel("Number of clusters") plt.xticks(n_clusters_range) plt.ylabel("Fuzzy partition coefficient (FPC)") plt.plot(n_clusters_range, fpcs_2) plt.tight_layout() plt.savefig(f"Fuzzy partition coefficient") plt.close() return best_centers_2
class RBF: def __init__(self, gamma=0.1): self.gamma = gamma self.G = None self.C = None self.W = None self.fcm = None def fit(self, k, X_train, y_train): n, m = X_train.shape self.fcm = FCM(n_clusters=k) fcm = self.fcm.fit(X_train) V = fcm.centers U = fcm.u self.G = np.ndarray(shape=(n, k)) self.C = np.zeros(shape=(k, m, m)) for i in range(k): sm = 0 for j in range(n): diff = np.array(X_train[j] - V[i]).reshape(-1, 1) self.C[i] += (U[j, i]**m) * diff * (diff.transpose()) sm += U[j, i]**m self.C[i] /= sm self.C = np.array([np.linalg.inv(c) for c in self.C]) for i in range(k): for j in range(n): diff = np.array(X_train[j] - V[i]).reshape(-1, 1) self.G[j, i] = np.exp( -self.gamma * (diff.transpose().dot(self.C[i])).dot(diff)) ohe = OneHotEncoder(sparse=False) Y = ohe.fit_transform(y_train) self.W = np.linalg.inv(self.G.T.dot(self.G)).dot(self.G.T).dot(Y) return self def predict(self, X_test): n, m = X_test.shape V = self.fcm.centers U = self.fcm.predict(X_test) k = self.fcm.n_clusters G = np.ndarray(shape=(n, k)) C = np.zeros(shape=(k, m, m)) for i in range(k): sm = 0 for j in range(n): diff = np.array(X_test[j] - V[i]) C[i] += (U[j, i]**m) * diff * (diff.transpose()) sm += U[j, i]**m C[i] /= sm C = np.array([np.linalg.pinv(c) for c in C]) for i in range(k): for j in range(n): diff = np.array(X_test[j] - V[i]) G[j, i] = np.exp(-self.gamma * (diff.transpose().dot(C[i])).dot(diff)) y_pred = np.argmax(G.dot(self.W), axis=1) return y_pred def get_accuracy(self, y_test, y_pred): return np.mean(np.equal(y_test.flatten(), y_pred.flatten()))