Exemple #1
0
 def templateClusterAllocationOneDimensionData(self, ccore_flag):
     input_data = [ [random()] for i in range(10) ] + [ [random() + 3] for i in range(10) ] + [ [random() + 5] for i in range(10) ] + [ [random() + 8] for i in range(10) ];
     
     rock_instance = rock(input_data, 1, 4, 0.5, ccore_flag);
     rock_instance.process();
     clusters = rock_instance.get_clusters();
     
     assert len(clusters) == 4;
     for cluster in clusters:
         assert len(cluster) == 10;
 def templateClusterAllocationOneDimensionData(self, ccore_flag):
     input_data = [ [random()] for i in range(10) ] + [ [random() + 3] for i in range(10) ] + [ [random() + 5] for i in range(10) ] + [ [random() + 8] for i in range(10) ];
     
     rock_instance = rock(input_data, 1, 4, 0.5, ccore_flag);
     rock_instance.process();
     clusters = rock_instance.get_clusters();
     
     assert len(clusters) == 4;
     for cluster in clusters:
         assert len(cluster) == 10;
    def get_modelo(self, algoritmo, eps, neig):
        print(algoritmo + ' ' + str(eps) + ' - ' + str(neig))
        instance = None

        if algoritmo == 'AGNES':
            instance = agglomerative(self.amostras,
                                     self.numero_clusters,
                                     link=None)
        elif algoritmo == 'BIRCH':
            instance = birch(self.amostras,
                             self.numero_clusters,
                             entry_size_limit=10000)
        elif algoritmo == 'CLARANS':
            instance = clarans(self.amostras,
                               self.numero_clusters,
                               numlocal=100,
                               maxneighbor=1)
        elif algoritmo == 'CURE':
            instance = cure(self.amostras,
                            self.numero_clusters,
                            number_represent_points=5,
                            compression=0.5)
        elif algoritmo == 'DBSCAN':
            instance = dbscan(self.amostras, eps=eps, neighbors=neig)
        elif algoritmo == 'FCM':
            initial_centers = kmeans_plusplus_initializer(
                self.amostras, self.numero_clusters).initialize()
            instance = fcm(self.amostras, initial_centers)
        elif algoritmo == 'KMEANS':
            initial_centers = kmeans_plusplus_initializer(
                self.amostras, self.numero_clusters).initialize()
            instance = kmeans(self.amostras, initial_centers, tolerance=0.001)
        elif algoritmo == 'KMEDOIDS':
            instance = kmedoids(self.amostras,
                                initial_index_medoids=[0, 0, 0, 0, 0, 0, 0],
                                tolerance=0.0001)  #ajustar o n_de cluster
        elif algoritmo == 'OPTICS':
            instance = optics(self.amostras, eps=eps, minpts=neig)
        elif algoritmo == 'ROCK':
            instance = rock(self.amostras,
                            eps=eps,
                            number_clusters=self.numero_clusters,
                            threshold=0.5)
        else:
            pass

        instance.process()
        lista_agrupada = self.get_lista_agrupada(instance.get_clusters())
        lista_agrupada = np.array(lista_agrupada)

        if (neig != 0):
            n_grupos = len(np.unique(lista_agrupada))
            if n_grupos > self.numero_clusters:
                lista_agrupada = self.get_modelo(algoritmo, eps, neig + 1)
        return lista_agrupada
Exemple #4
0
def template_clustering(path, radius, cluster_numbers, threshold, draw = True, ccore = True):
    sample = read_sample(path);
    
    rock_instance = rock(sample, radius, cluster_numbers, threshold, ccore);
    (ticks, result) = timedcall(rock_instance.process);
    
    clusters = rock_instance.get_clusters();
    
    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n");
    
    if (draw == True):
        draw_clusters(sample, clusters);
Exemple #5
0
    def rock(cls, data, eps, number_clusters, threshold=0.5, ccore=False):
        """
        Constructor of the ROCK cluster analysis algorithm

        :param eps: Connectivity radius (similarity threshold), points are neighbors if distance between them is less than connectivity radius
        :param number_clusters: Defines number of clusters that should be allocated from the input data set
        :param threshold: Value that defines degree of normalization that influences on choice of clusters for merging during processing
        :param ccore: Defines should be CCORE (C++ pyclustering library) used instead of Python code or not.
        :return: The resulting clustering.rst object
        """
        data = cls.input_preprocess(data)
        model = rock(data, eps, number_clusters, threshold, ccore)
        return cls(model)
 def templateLengthProcessData(self, path_to_file, radius, cluster_numbers, threshold, expected_cluster_length, ccore = False):
     sample = read_sample(path_to_file);
     
     rock_instance = rock(sample, radius, cluster_numbers, threshold, ccore);
     rock_instance.process();
     clusters = rock_instance.get_clusters();
     
     length = sum([len(cluster) for cluster in clusters]);
     assert len(sample) == length;
     
     obtained_cluster_sizes = [len(cluster) for cluster in clusters];
     obtained_cluster_sizes.sort();
     expected_cluster_length.sort();
     
     assert obtained_cluster_sizes == expected_cluster_length;
Exemple #7
0
 def templateLengthProcessData(self, path_to_file, radius, cluster_numbers, threshold, expected_cluster_length, ccore = False):
     sample = read_sample(path_to_file);
     
     rock_instance = rock(sample, radius, cluster_numbers, threshold, ccore);
     rock_instance.process();
     clusters = rock_instance.get_clusters();
     
     length = sum([len(cluster) for cluster in clusters]);
     assert len(sample) == length;
     
     obtained_cluster_sizes = [len(cluster) for cluster in clusters];
     obtained_cluster_sizes.sort();
     expected_cluster_length.sort();
     
     assert obtained_cluster_sizes == expected_cluster_length;
def rocAlgo(filename, col_name):
    df = pd.read_csv(filename, usecols=[col_name])
    df[col_name] = df[col_name]
    rock_instance = rock(col_name, 1.0, 100)
    # Run cluster analysis
    rock_instance.process()
    # Obtain results of clustering
    clusters = rock_instance.get_clusters()
    print(clusters)
    print(timeit.timeit('"-".join(str(n) for n in range(100))', number=10000))
    #Visualize clusters:
    visualizer = cluster_visualizer()
    visualizer.append_clusters(clusters, col_name)
    visualizer.show(display=False)
    plt.savefig(
        "C:/Users/Nupura Hajare/Desktop/flask_app/web/static/img/Roc.png")
Exemple #9
0
 def testCoreInterfaceIntInputData(self):
     optics_instance = rock([[1], [2], [3], [20], [21], [22]], 3, 2, 0.5,
                            True)
     optics_instance.process()
     assert len(optics_instance.get_clusters()) == 2
def process_rock(sample):
    instance = rock(sample, 1, NUMBER_CLUSTERS, 0.5)
    (ticks, _) = timedcall(instance.process)
    return ticks
def process_rock(sample):
    instance = rock(sample, 1, NUMBER_CLUSTERS, 0.5)
    (ticks, _) = timedcall(instance.process)
    return ticks
Exemple #12
0
df = pd.read_excel('chosen12345610percent.xlsx')

features = df.columns
print(features)
f = ['RPy', 'edu', 'role', 'industry', 'most_often']

#print(df.values)

df.loc[df['RPy'].isna(), 'RPy'] = 0
df.loc[df['edu'].isna(), 'edu'] = 0
df.loc[df['role'].isna(), 'role'] = 0
df.loc[df['industry'].isna(), 'industry'] = 0
df.loc[df['most_often'].isna(), 'most_often'] = 0

data = df[features]

scaler = MinMaxScaler().fit(data)
x = pd.DataFrame(scaler.transform(data))

# Create instance of ROCK algorithm for cluster analysis. Seven clusters should be allocated.
rock_instance = rock(x.values, 1.0, 7)
# Run cluster analysis.
rock_instance.process()
# Obtain results of clustering.
clusters = rock_instance.get_clusters()

print(clusters)
# Visualize clustering results.
#visualizer = cluster_visualizer()
#visualizer.append_clusters(clusters, x.values)
#visualizer.show()
Exemple #13
0
from pyclustering.cluster import cluster_visualizer
from pyclustering.cluster.rock import rock
from pyclustering.samples.definitions import FCPS_SAMPLES
from pyclustering.utils import read_sample
# Read sample for clustering from file.
sample = read_sample(FCPS_SAMPLES.SAMPLE_HEPTA)

# Create instance of ROCK algorithm for cluster analysis. Seven clusters should be allocated.
rock_instance = rock(sample, 1.0, 7)
# Run cluster analysis.
rock_instance.process()
# Obtain results of clustering.
clusters = rock_instance.get_clusters()
# Visualize clustering results.
visualizer = cluster_visualizer()
visualizer.append_clusters(clusters, sample)
visualizer.show()