def create_cluster_from_neuralgasnetwork(model: model, a=0.5, passes=80, distance_toremove_edge=8): data = model.mesures().values model.setname("NEURALGAS avec distance_toremove=" + str(distance_toremove_edge) + " passes=" + str(passes)) if not model.load_cluster(): model.start_treatment() gng = GrowingNeuralGas(data) gng.fit_network(e_b=0.05, e_n=0.006, distance_toremove_edge=distance_toremove_edge, l=100, a=0.5, d=0.995, passes=passes, plot_evolution=False) model.end_treatment() print('Found %d clusters.' % gng.number_of_clusters()) model.clusters_from_real(gng.cluster_data(), "NEURALGAS_") #gng.plot_clusters(gng.cluster_data()) return model
def evaluate(): ds = pd.read_csv("Sales_Transactions_Dataset_Weekly.csv") data = ds[ ["Normalized 0", "Normalized 1", "Normalized 2", "Normalized 3", "Normalized 4", "Normalized 5", "Normalized 6", "Normalized 7", "Normalized 8", "Normalized 9", "Normalized 10", "Normalized 11", "Normalized 12", "Normalized 13", "Normalized 14", "Normalized 15", "Normalized 16", "Normalized 17", "Normalized 18", "Normalized 19", "Normalized 20", "Normalized 21", "Normalized 22", "Normalized 23", "Normalized 24", "Normalized 25", "Normalized 26", "Normalized 27", "Normalized 28", "Normalized 29", "Normalized 30", "Normalized 31", "Normalized 32", "Normalized 33", "Normalized 34", "Normalized 35", "Normalized 36", "Normalized 37", "Normalized 38", "Normalized 39", "Normalized 40", "Normalized 41", "Normalized 42", "Normalized 43", "Normalized 44", "Normalized 45", "Normalized 46", "Normalized 47", "Normalized 48", "Normalized 49", "Normalized 51" ]] gng = GrowingNeuralGas(data.as_matrix(), output_folder="visualization") gng.fit_network(e_b=0.05, e_n=0.006, a_max=8, l=100, a=0.5, d=0.995, passes=10, plot_evolution=True) clustered_data = gng.cluster_data() print('Found %d clusters.' % nx.number_connected_components(gng.network)) target_infered = [] for observation, cluster in clustered_data: target_infered.append(cluster) # homogeneity = metrics.homogeneity_score(data, target_infered) # print(homogeneity) gng.plot_clusters(gng.cluster_data()) clusters = [[]] len = 1 for elem in gng.cluster_data(): if (clusters.__len__() <= elem[1]): for i in range(int(elem[1] + 1)): clusters.append([]) clusters[int(elem[1])].append(len) len += 1 print(clusters)
def evaluate_on_digits(): digits = datasets.load_digits() data = digits.data target = digits.target gng = GrowingNeuralGas(data) gng.fit_network(e_b=0.05, e_n=0.006, a_max=8, l=100, a=0.5, d=0.995, passes=5, plot_evolution=False) clustered_data = gng.cluster_data() print('Found %d clusters.' % nx.number_connected_components(gng.network)) target_infered = [] for observation, cluster in clustered_data: target_infered.append(cluster) homogeneity = metrics.homogeneity_score(target, target_infered) print(homogeneity) gng.plot_clusters(gng.reduce_dimension(gng.cluster_data()))
def evaluate(e_b, e_n, a_max, l, a, d, passes): ds = pd.read_csv("Sales_Transactions_Dataset_Weekly.csv") data = ds[[ "W0", "W1", "W2", "W3", "W4", "W5", "W6", "W7", "W8", "W9", "W10", "W11", "W12", "W13", "W14", "W15", "W16", "W17", "W18", "W19", "W20", "W21", "W22", "W23", "W24", "W25", "W26", "W27", "W28", "W29", "W30", "W31", "W32", "W33", "W34", "W35", "W36", "W37", "W38", "W39", "W40", "W41", "W42", "W43", "W44", "W45", "W46", "W47", "W48", "W49", "W51" ]] # plt.plot(data, label='Data') # plt.show() gng = GrowingNeuralGas(data.as_matrix(), output_folder="visualization") gng.fit_network(e_b=e_b, e_n=e_n, a_max=a_max, l=l, a=a, d=d, passes=passes, plot_evolution=True) clustered_data = gng.cluster_data() print('Found %d clusters.' % nx.number_connected_components(gng.network)) target_infered = [] for observation, cluster in clustered_data: target_infered.append(cluster) # homogeneity = metrics.homogeneity_score(data, target_infered) # print(homogeneity) gng.plot_clusters(gng.cluster_data()) clusters = [[]] len = 1 for elem in gng.cluster_data(): if (clusters.__len__() <= elem[1]): for i in range(int(elem[1] + 1)): clusters.append([]) clusters[int(elem[1])].append(len) len += 1 ind = 0 for clust in clusters: if clust.__len__() != 0: print('Cluster №' + str(ind) + ' size: (' + str(clust.__len__()) + ') contains data:') ind += 1 print(clust) print('Global error all network(on euclidean distance): ' + str(gng.compute_global_error())) return clusters
values = normalize(values).values # returning values return values if __name__ == '__main__': if os.path.exists('visualization/sequence'): shutil.rmtree('visualization/sequence') os.makedirs('visualization/sequence') n_samples = 1500 data = None #data = dataset() # in case the user wants to use a specific dataset #data = datasets.load_breast_cancer() #data = datasets.load_iris(n_samples=n_samples, random_state=8) #data = datasets.make_blobs(n_samples=n_samples, random_state=8) data = datasets.make_moons(n_samples=n_samples, noise=.05) #data = datasets.make_circles(n_samples=n_samples, factor=.5, noise=.05) #data = StandardScaler().fit_transform(data.data) # depends on the data variable data = StandardScaler().fit_transform(data[0]) # depends on the data variable print('Done.') print('Fitting neural network...') gng = GrowingNeuralGas(data) gng.fit_network(e_b=0.1, e_n=0.006, a_max=10, l=200, a=0.5, d=0.995, passes=5, plot_evolution=True) print('Found %d clusters.' % gng.number_of_clusters()) gng.plot_clusters(gng.cluster_data())
from gng import GrowingNeuralGas from sklearn import datasets from sklearn.preprocessing import StandardScaler import os import shutil __authors__ = 'Adrien Guille' __email__ = '*****@*****.**' if __name__ == '__main__': if os.path.exists('visualization/sequence'): shutil.rmtree('visualization/sequence') os.makedirs('visualization/sequence') n_samples = 2000 dataset_type = 'moons' data = None print('Preparing data...') if dataset_type == 'blobs': data = datasets.make_blobs(n_samples=n_samples, random_state=8) elif dataset_type == 'moons': data = datasets.make_moons(n_samples=n_samples, noise=.05) elif dataset_type == 'circles': data = datasets.make_circles(n_samples=n_samples, factor=.5, noise=.05) data = StandardScaler().fit_transform(data[0]) print('Done.') print('Fitting neural network...') gng = GrowingNeuralGas(data) gng.fit_network(e_b=0.1, e_n=0.006, a_max=10, l=200, a=0.5, d=0.995, passes=8, plot_evolution=True) print('Found %d clusters.' % gng.number_of_clusters()) gng.plot_clusters(gng.cluster_data())