def templateClusterAllocationOneDimensionData(self, ccore_flag): input_data = [ [random()] for i in range(10) ] + [ [random() + 3] for i in range(10) ] + [ [random() + 5] for i in range(10) ] + [ [random() + 8] for i in range(10) ]; rock_instance = rock(input_data, 1, 4, 0.5, ccore_flag); rock_instance.process(); clusters = rock_instance.get_clusters(); assert len(clusters) == 4; for cluster in clusters: assert len(cluster) == 10;
def get_modelo(self, algoritmo, eps, neig): print(algoritmo + ' ' + str(eps) + ' - ' + str(neig)) instance = None if algoritmo == 'AGNES': instance = agglomerative(self.amostras, self.numero_clusters, link=None) elif algoritmo == 'BIRCH': instance = birch(self.amostras, self.numero_clusters, entry_size_limit=10000) elif algoritmo == 'CLARANS': instance = clarans(self.amostras, self.numero_clusters, numlocal=100, maxneighbor=1) elif algoritmo == 'CURE': instance = cure(self.amostras, self.numero_clusters, number_represent_points=5, compression=0.5) elif algoritmo == 'DBSCAN': instance = dbscan(self.amostras, eps=eps, neighbors=neig) elif algoritmo == 'FCM': initial_centers = kmeans_plusplus_initializer( self.amostras, self.numero_clusters).initialize() instance = fcm(self.amostras, initial_centers) elif algoritmo == 'KMEANS': initial_centers = kmeans_plusplus_initializer( self.amostras, self.numero_clusters).initialize() instance = kmeans(self.amostras, initial_centers, tolerance=0.001) elif algoritmo == 'KMEDOIDS': instance = kmedoids(self.amostras, initial_index_medoids=[0, 0, 0, 0, 0, 0, 0], tolerance=0.0001) #ajustar o n_de cluster elif algoritmo == 'OPTICS': instance = optics(self.amostras, eps=eps, minpts=neig) elif algoritmo == 'ROCK': instance = rock(self.amostras, eps=eps, number_clusters=self.numero_clusters, threshold=0.5) else: pass instance.process() lista_agrupada = self.get_lista_agrupada(instance.get_clusters()) lista_agrupada = np.array(lista_agrupada) if (neig != 0): n_grupos = len(np.unique(lista_agrupada)) if n_grupos > self.numero_clusters: lista_agrupada = self.get_modelo(algoritmo, eps, neig + 1) return lista_agrupada
def template_clustering(path, radius, cluster_numbers, threshold, draw = True, ccore = True): sample = read_sample(path); rock_instance = rock(sample, radius, cluster_numbers, threshold, ccore); (ticks, result) = timedcall(rock_instance.process); clusters = rock_instance.get_clusters(); print("Sample: ", path, "\t\tExecution time: ", ticks, "\n"); if (draw == True): draw_clusters(sample, clusters);
def rock(cls, data, eps, number_clusters, threshold=0.5, ccore=False): """ Constructor of the ROCK cluster analysis algorithm :param eps: Connectivity radius (similarity threshold), points are neighbors if distance between them is less than connectivity radius :param number_clusters: Defines number of clusters that should be allocated from the input data set :param threshold: Value that defines degree of normalization that influences on choice of clusters for merging during processing :param ccore: Defines should be CCORE (C++ pyclustering library) used instead of Python code or not. :return: The resulting clustering.rst object """ data = cls.input_preprocess(data) model = rock(data, eps, number_clusters, threshold, ccore) return cls(model)
def templateLengthProcessData(self, path_to_file, radius, cluster_numbers, threshold, expected_cluster_length, ccore = False): sample = read_sample(path_to_file); rock_instance = rock(sample, radius, cluster_numbers, threshold, ccore); rock_instance.process(); clusters = rock_instance.get_clusters(); length = sum([len(cluster) for cluster in clusters]); assert len(sample) == length; obtained_cluster_sizes = [len(cluster) for cluster in clusters]; obtained_cluster_sizes.sort(); expected_cluster_length.sort(); assert obtained_cluster_sizes == expected_cluster_length;
def rocAlgo(filename, col_name): df = pd.read_csv(filename, usecols=[col_name]) df[col_name] = df[col_name] rock_instance = rock(col_name, 1.0, 100) # Run cluster analysis rock_instance.process() # Obtain results of clustering clusters = rock_instance.get_clusters() print(clusters) print(timeit.timeit('"-".join(str(n) for n in range(100))', number=10000)) #Visualize clusters: visualizer = cluster_visualizer() visualizer.append_clusters(clusters, col_name) visualizer.show(display=False) plt.savefig( "C:/Users/Nupura Hajare/Desktop/flask_app/web/static/img/Roc.png")
def testCoreInterfaceIntInputData(self): optics_instance = rock([[1], [2], [3], [20], [21], [22]], 3, 2, 0.5, True) optics_instance.process() assert len(optics_instance.get_clusters()) == 2
def process_rock(sample): instance = rock(sample, 1, NUMBER_CLUSTERS, 0.5) (ticks, _) = timedcall(instance.process) return ticks
df = pd.read_excel('chosen12345610percent.xlsx') features = df.columns print(features) f = ['RPy', 'edu', 'role', 'industry', 'most_often'] #print(df.values) df.loc[df['RPy'].isna(), 'RPy'] = 0 df.loc[df['edu'].isna(), 'edu'] = 0 df.loc[df['role'].isna(), 'role'] = 0 df.loc[df['industry'].isna(), 'industry'] = 0 df.loc[df['most_often'].isna(), 'most_often'] = 0 data = df[features] scaler = MinMaxScaler().fit(data) x = pd.DataFrame(scaler.transform(data)) # Create instance of ROCK algorithm for cluster analysis. Seven clusters should be allocated. rock_instance = rock(x.values, 1.0, 7) # Run cluster analysis. rock_instance.process() # Obtain results of clustering. clusters = rock_instance.get_clusters() print(clusters) # Visualize clustering results. #visualizer = cluster_visualizer() #visualizer.append_clusters(clusters, x.values) #visualizer.show()
from pyclustering.cluster import cluster_visualizer from pyclustering.cluster.rock import rock from pyclustering.samples.definitions import FCPS_SAMPLES from pyclustering.utils import read_sample # Read sample for clustering from file. sample = read_sample(FCPS_SAMPLES.SAMPLE_HEPTA) # Create instance of ROCK algorithm for cluster analysis. Seven clusters should be allocated. rock_instance = rock(sample, 1.0, 7) # Run cluster analysis. rock_instance.process() # Obtain results of clustering. clusters = rock_instance.get_clusters() # Visualize clustering results. visualizer = cluster_visualizer() visualizer.append_clusters(clusters, sample) visualizer.show()