def templateLengthProcessData(path_to_file, start_centers, expected_cluster_length, ccore, **kwargs):
        sample = read_sample(path_to_file)

        metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN_SQUARE))
        itermax = kwargs.get('itermax', 200)
        
        kmeans_instance = kmeans(sample, start_centers, 0.001, ccore, metric=metric, itermax=itermax)
        kmeans_instance.process()
        
        clusters = kmeans_instance.get_clusters()
        centers = kmeans_instance.get_centers()
        wce = kmeans_instance.get_total_wce()

        if itermax == 0:
            assertion.eq(start_centers, centers)
            assertion.eq([], clusters)
            assertion.eq(0.0, wce)
            return

        obtained_cluster_sizes = [len(cluster) for cluster in clusters]
        assertion.eq(len(sample), sum(obtained_cluster_sizes))
        
        assertion.eq(len(clusters), len(centers))
        for center in centers:
            assertion.eq(len(sample[0]), len(center))
        
        if expected_cluster_length is not None:
            obtained_cluster_sizes.sort()
            expected_cluster_length.sort()
            assertion.eq(obtained_cluster_sizes, expected_cluster_length)
Example #2
0
    def cluster_kmeans(self, xs, ys):
        POI = []
        for i in range(len(xs)):
            POI.append([xs[i], ys[i]])
        POI = np.array(POI)

        rand = []
        for i in range(14):
            r = randint(0, len(POI))
            rand.append(POI[r])

        kmeans_instance = kmeans(POI, rand, 4)
        kmeans_instance.process()
        clusters = kmeans_instance.get_clusters()

        if self.visualize:
            vis = cluster_visualizer()
            vis.append_clusters(clusters, POI)
            vis.show()
        ret = []

        for i in range(len(clusters)):
            ret.append([])
            for j in range(len(clusters[i])):
                ret[i].append(POI[clusters[i][j]])

        return ret
Example #3
0
def elbow_analysis(sample_file_path, kmin, kmax, **kwargs):
    initializer = kwargs.get('initializer', kmeans_plusplus_initializer)
    sample = read_sample(sample_file_path)

    elbow_instance = elbow(sample, kmin, kmax, initializer=initializer)
    elbow_instance.process()

    amount_clusters = elbow_instance.get_amount()
    wce = elbow_instance.get_wce()

    centers = kmeans_plusplus_initializer(sample, amount_clusters).initialize()
    kmeans_instance = kmeans(sample, centers)
    kmeans_instance.process()
    clusters = kmeans_instance.get_clusters()
    centers = kmeans_instance.get_centers()

    print("Sample '%s': Obtained amount of clusters: '%d'." % (sample_file_path, amount_clusters))

    figure = plt.figure(1)
    ax = figure.add_subplot(111)
    ax.plot(range(kmin, kmax), wce, color='b', marker='.')
    ax.plot(amount_clusters, wce[amount_clusters - kmin], color='r', marker='.', markersize=10)
    ax.annotate("Elbow", (amount_clusters + 0.1, wce[amount_clusters - kmin] + 5))
    ax.grid(True)
    plt.ylabel("WCE")
    plt.xlabel("K")
    plt.show()

    kmeans_visualizer.show_clusters(sample, clusters, centers)
Example #4
0
def gt_boxes_cluster(gt_boxes, centers=5):
    """
    聚类gt boxes长宽
    :param gt_boxes: numpy数组 [n,(y1,x1,y2,x2)]
    :param centers: 聚类中心个数
    :return: 聚类后的高度和宽度
    """

    height = gt_boxes[:, 2] - gt_boxes[:, 0]
    width = gt_boxes[:, 3] - gt_boxes[:, 1]
    hw = np.stack([height, width], axis=1)
    # 保存长宽数据
    np.save('/tmp/gt_height_width.npy', hw)

    # Kmeans聚类
    metric = distance_metric(type_metric.USER_DEFINED, func=iou_distance)
    init_centers = hw[np.random.choice(len(hw), centers, replace=False)]
    m = kmeans(hw, init_centers, metric=metric)
    m.process()
    cluster_centers = np.array(m.get_centers())

    # 聚类
    height = np.array([round(h, 2) for h in cluster_centers[:, 0]])
    width = np.array([round(w, 2) for w in cluster_centers[:, 1]])
    # 排序输出结果
    sort_indices = np.argsort(height)
    height = height[sort_indices]
    width = width[sort_indices]

    return height, width
Example #5
0
    def __improve_parameters(self, centers, available_indexes = None):
        """!
        @brief Performs k-means clustering in the specified region.
        
        @param[in] centers (list): Centers of clusters.
        @param[in] available_indexes (list): Indexes that defines which points can be used for k-means clustering, if None - then all points are used.
        
        @return (list) List of allocated clusters, each cluster contains indexes of objects in list of data.
        
        """

        if available_indexes and len(available_indexes) == 1:
            index_center = available_indexes[0]
            return [ available_indexes ], self.__pointer_data[index_center]

        local_data = self.__pointer_data
        if available_indexes:
            local_data = [ self.__pointer_data[i] for i in available_indexes ]

        local_centers = centers
        if centers is None:
            local_centers = kmeans_plusplus_initializer(local_data, 2, kmeans_plusplus_initializer.FARTHEST_CENTER_CANDIDATE).initialize()

        kmeans_instance = kmeans(local_data, local_centers, tolerance=self.__tolerance, ccore=False)
        kmeans_instance.process()

        local_centers = kmeans_instance.get_centers()
        
        clusters = kmeans_instance.get_clusters()
        if available_indexes:
            clusters = self.__local_to_global_clusters(clusters, available_indexes)
        
        return clusters, local_centers
Example #6
0
def kmeansWithScores(filenameData, filenameSilhMean, filenameDBS, filenameCHS,
                     kClusters):
    data = read_sample(str(root) + '\\' + filenameData)

    #kClusters = canoc(data, kmin, kmax)

    initial_centers = rci(data, kClusters).initialize()
    kmeans_instance = kmeans(data, initial_centers, metric=metricResearch)

    kmeans_instance.process()
    clusters = kmeans_instance.get_clusters()
    predicted = kmeans_instance.predict(data)

    silhouetteScore = silhouette(data, clusters).process().get_score()
    meanSilhouetteScore = np.mean(silhouetteScore)
    witTXT(meanSilhouetteScore,
           filenameSilhMean,
           filepath=root,
           note='k: ' + str(kClusters))

    dbsScore = dbs(data, predicted)
    witTXT(dbsScore, filenameDBS, filepath=root, note='k: ' + str(kClusters))

    chsScore = chs(data, predicted)
    witTXT(chsScore, filenameCHS, filepath=root, note='k: ' + str(kClusters))
Example #7
0
    def __search_optimial_parameters(self, local_data):
        """!
        @brief Split data of the region into two cluster and tries to find global optimum by running k-means clustering
                several times (defined by 'repeat' argument).

        @param[in] local_data (list): Points of a region that should be split into two clusters.

        @return (tuple) List of allocated clusters, list of centers and total WCE (clusters, centers, wce).

        """
        optimal_wce, optimal_centers, optimal_clusters = float(
            '+inf'), None, None

        for _ in range(self.__repeat):
            candidates = 5
            if len(local_data) < candidates:
                candidates = len(local_data)

            local_centers = kmeans_plusplus_initializer(
                local_data, 2, candidates).initialize()

            kmeans_instance = kmeans(local_data,
                                     local_centers,
                                     tolerance=self.__tolerance,
                                     ccore=False)
            kmeans_instance.process()

            local_wce = kmeans_instance.get_total_wce()
            if local_wce < optimal_wce:
                optimal_centers = kmeans_instance.get_centers()
                optimal_clusters = kmeans_instance.get_clusters()
                optimal_wce = local_wce

        return optimal_clusters, optimal_centers, optimal_wce
Example #8
0
    def _search_optimal_parameters(self, data, amount):
        """!
        @brief Performs cluster analysis for specified data several times to find optimal clustering result in line
                with WCE.

        @param[in] data (array_like): Input data that should be clustered.
        @param[in] amount (unit): Amount of clusters that should be allocated.

        @return (tuple) Optimal clustering result: (clusters, centers, wce).

        """
        best_wce, best_clusters, best_centers = float('+inf'), [], []
        for _ in range(self.__repeat):
            initial_centers = kmeans_plusplus_initializer(
                data, amount, random_state=self.__random_state).initialize()
            solver = kmeans(data,
                            initial_centers,
                            tolerance=self.__tolerance,
                            ccore=False).process()

            candidate_wce = solver.get_total_wce()
            if candidate_wce < best_wce:
                best_wce = candidate_wce
                best_clusters = solver.get_clusters()
                best_centers = solver.get_centers()

            if len(initial_centers) == 1:
                break  # No need to rerun clustering for one initial center.

        return best_clusters, best_centers, best_wce
Example #9
0
    def get_kmeans_clusters(data, count_centers):
        rows = data.getRows()
        input_data = list()
        result_clusters = list()
        for row in rows:
            input_data.append(row.getDataArray())
        SST = calculate_sst(input_data)

        # initialize initial centers using K-Means++ method
        initial_centers = kmeans_plusplus_initializer(
            input_data, count_centers).initialize()
        # create instance of K-Means algorithm with prepared centers
        kmeans_instance = kmeans(input_data, initial_centers)
        # run cluster analysis and obtain results
        kmeans_instance.process()
        clusters = kmeans_instance.get_clusters()
        colorRange = Constants.DEFAULT_COLOR_SET
        SSB = 0
        SSW = 0
        for i, cluster in enumerate(clusters):
            result_cluster = Cluster(
                KMeansWindow.get_rows_kmeans(data, cluster))
            ro = KMeansWindow.get_rows_kmeans(data, cluster)
            f = [x._dataArray for x in ro]
            SSW = SSW + calculate_ssw(f)
            colour = random.choice(colorRange)
            result_cluster.setName(colour)
            result_cluster.setColor(colour)
            result_clusters.append(result_cluster)
        SSB = calculate_ssb(SST, SSW)
        RS_RESULT.append(SSB / SST)

        print(RS_RESULT)
        return result_clusters
def elbow_analysis(sample_file_path, kmin, kmax, **kwargs):
    initializer = kwargs.get('initializer', kmeans_plusplus_initializer)
    sample = read_sample(sample_file_path)

    elbow_instance = elbow(sample, kmin, kmax, initializer=initializer)
    elbow_instance.process()

    amount_clusters = elbow_instance.get_amount()
    wce = elbow_instance.get_wce()

    centers = kmeans_plusplus_initializer(sample, amount_clusters).initialize()
    kmeans_instance = kmeans(sample, centers)
    kmeans_instance.process()
    clusters = kmeans_instance.get_clusters()
    centers = kmeans_instance.get_centers()

    print("Sample '%s': Obtained amount of clusters: '%d'." %
          (sample_file_path, amount_clusters))

    figure = plt.figure(1)
    ax = figure.add_subplot(111)
    ax.plot(range(kmin, kmax), wce, color='b', marker='.')
    ax.plot(amount_clusters,
            wce[amount_clusters - kmin],
            color='r',
            marker='.',
            markersize=10)
    ax.annotate("Elbow",
                (amount_clusters + 0.1, wce[amount_clusters - kmin] + 5))
    ax.grid(True)
    plt.ylabel("WCE")
    plt.xlabel("K")
    plt.show()

    kmeans_visualizer.show_clusters(sample, clusters, centers)
def subcluster(dataset):
    kmin = 1
    kmax = 20
    if kmax > len(dataset):
        kmax = len(dataset)
    optimal_clusters = 1
    # Determining Clusters
    # Might potentially be inefficient technique
    # Instead of elbow, could again repeat what is done
    # in the main clustering, going through K values
    # Choosing one with lowest error from calcError
    # This could be very time intensive however
    if kmax - kmin <= 3:
        optimal_clusters = int((kmin + kmax) / 2)
    else:
        elbow_inst = elbow(dataset, kmin, kmax)
        elbow_inst.process()
        optimal_clusters = elbow_inst.get_amount()
    if optimal_clusters > len(dataset):
        optimal_clusters = len(dataset)
    initial_centers = kmeans_plusplus_initializer(
        dataset, optimal_clusters).initialize()
    metric = distance_metric(type_metric.EUCLIDEAN)
    kmeans_instance = kmeans(dataset, initial_centers, metric=metric)
    kmeans_instance.process()
    clusters = kmeans_instance.get_clusters()

    return clusters
Example #12
0
    def templateLengthProcessData(path_to_file, start_centers,
                                  expected_cluster_length, ccore, **kwargs):
        sample = read_sample(path_to_file)

        metric = kwargs.get('metric',
                            distance_metric(type_metric.EUCLIDEAN_SQUARE))

        kmeans_instance = kmeans(sample,
                                 start_centers,
                                 0.025,
                                 ccore,
                                 metric=metric)
        kmeans_instance.process()

        clusters = kmeans_instance.get_clusters()
        centers = kmeans_instance.get_centers()

        obtained_cluster_sizes = [len(cluster) for cluster in clusters]
        assertion.eq(len(sample), sum(obtained_cluster_sizes))

        assertion.eq(len(clusters), len(centers))
        for center in centers:
            assertion.eq(len(sample[0]), len(center))

        if expected_cluster_length != None:
            obtained_cluster_sizes.sort()
            expected_cluster_length.sort()
            assertion.eq(obtained_cluster_sizes, expected_cluster_length)
Example #13
0
def kmeansWithScores(nameData, nameSilhouetteMean, nameDBS, nameCHS, k_clusters, measure, kmin, kmax):
    data = read_sample(str(root)+'\\'+nameData)

    #initial_centers = kppi(data, k_clusters).initialize()
    initial_centers = rci(data, k_clusters).initialize()
    kmeans_instance = kmeans(data, initial_centers, metric = measure)

    kmeans_instance.process()
    clusters = kmeans_instance.get_clusters()
    predicted = kmeans_instance.predict(data)

#    final_centers = kmeans_instance.get_centers()

    silhouetteScore = silhouette(data, clusters).process().get_score()
    meanSilhouetteScore = np.mean(silhouetteScore)
    #wlitCSV(silhouetteScore, filenameSilhouette, '', root)
    #witCSV(meanSilhouetteScore, nameSilhouetteMean, '', root)

    dbsScore = dbs(data, predicted)
    #witCSV(dbsScore, nameDBS, '', root)

    chsScore = chs(data, predicted)
    #witCSV(chsScore, nameCHS, '', root)

    elbow_instance = elbow(data, kmin, kmax)
    elbow_instance.process()
    amount_clusters = elbow_instance.get_amount()
    wce = elbow_instance.get_wce()
Example #14
0
def template_clustering(start_centers, path, tolerance=0.25, ccore=False):
    sample = read_sample(path)
    dimension = len(sample[0])

    metric = distance_metric(type_metric.MANHATTAN)

    observer = kmeans_observer()
    kmeans_instance = kmeans(sample,
                             start_centers,
                             tolerance,
                             ccore,
                             observer=observer,
                             metric=metric)
    (ticks, _) = timedcall(kmeans_instance.process)

    clusters = kmeans_instance.get_clusters()
    centers = kmeans_instance.get_centers()

    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n")

    visualizer = cluster_visualizer_multidim()
    visualizer.append_clusters(clusters, sample)
    visualizer.show()

    if dimension > 3:
        kmeans_visualizer.show_clusters(sample, clusters, centers,
                                        start_centers)
        kmeans_visualizer.animate_cluster_allocation(sample, observer)
Example #15
0
def calculate_fitness(ind, df, X, target, func_set, operations,
                      number_of_clusters):
    ind_exp = ind.unroll_expression([])

    def fitness_distance(data1, data2):
        """
        input:
            point1 e point2 = pontos utilizados no cálculo da distância
        output:
            result = distância entre os dois pontos
        """
        result = eval(data1, data2, func_set, operations, ind_exp)
        return result

    # distance function
    fitness_metric = distance_metric(type_metric.USER_DEFINED,
                                     func=fitness_distance)

    k = number_of_clusters

    initial_centers = kmeans_plusplus_initializer(X, k).initialize()
    kmeans_instance = kmeans(X, initial_centers, metric=fitness_metric)
    kmeans_instance.process()
    clusters = kmeans_instance.get_clusters()

    for i in range(len(clusters)):
        df.loc[clusters[i], 'y_pred'] = i

    score = v_measure_score(target, df.y_pred)
    # reseting dataframe
    df = df.drop(['y_pred'], axis=1)

    return score
def process_kmeans(sample):
    instance = kmeans(
        sample, [[random() + (multiplier * 5),
                  random() + (multiplier + 5)]
                 for multiplier in range(NUMBER_CLUSTERS)])
    (ticks, _) = timedcall(instance.process)
    return ticks
Example #17
0
    def __improve_parameters(self, centers, available_indexes=None):
        """!
        @brief Performs k-means clustering in the specified region.
        
        @param[in] centers (list): Cluster centers, if None then automatically generated two centers using center initialization method.
        @param[in] available_indexes (list): Indexes that defines which points can be used for k-means clustering, if None then all points are used.
        
        @return (tuple) List of allocated clusters, list of centers and total WCE.
        
        """

        if available_indexes and len(available_indexes) == 1:
            index_center = available_indexes[0]
            return [available_indexes], self.__pointer_data[index_center], 0.0

        local_data = self.__pointer_data
        if available_indexes:
            local_data = [self.__pointer_data[i] for i in available_indexes]

        local_centers = centers
        if centers is None:
            local_centers = kmeans_plusplus_initializer(local_data, 2, kmeans_plusplus_initializer.FARTHEST_CENTER_CANDIDATE).initialize()

        kmeans_instance = kmeans(local_data, local_centers, tolerance=self.__tolerance, ccore=False)
        kmeans_instance.process()

        local_wce = kmeans_instance.get_total_wce()
        local_centers = kmeans_instance.get_centers()
        
        clusters = kmeans_instance.get_clusters()
        if available_indexes:
            clusters = self.__local_to_global_clusters(clusters, available_indexes)
        
        return clusters, local_centers, local_wce
Example #18
0
    def templateAnimateClusteringResultNoFailure(filename, initial_centers, ccore_flag):
        sample = read_sample(filename);

        observer = kmeans_observer();
        kmeans_instance = kmeans(sample, initial_centers, 0.025, ccore_flag, observer=observer);
        kmeans_instance.process();

        kmeans_visualizer.animate_cluster_allocation(sample, observer);
Example #19
0
def template_segmentation_image(source, start_centers):
    data = read_image(source)

    kmeans_instance = kmeans(data, start_centers)
    kmeans_instance.process()

    clusters = kmeans_instance.get_clusters()
    draw_image_mask_segments(source, clusters)
Example #20
0
def clustering_random_points(amount_points, amount_centers, ccore):
    sample = [ [ random.random(), random.random() ] for _ in range(amount_points) ]
    centers = [ [ random.random(), random.random() ] for _ in range(amount_centers) ]
    
    kmeans_instance = kmeans(sample, centers, 0.0001, ccore)
    (ticks, _) = timedcall(kmeans_instance.process)
    
    print("Execution time ("+ str(amount_points) +" 2D-points):", ticks)
def clustering_random_points(amount_points, amount_centers, ccore):
    sample = [ [ random.random(), random.random() ] for _ in range(amount_points) ]
    centers = [ [ random.random(), random.random() ] for _ in range(amount_centers) ]
    
    kmeans_instance = kmeans(sample, centers, 0.0001, ccore)
    (ticks, _) = timedcall(kmeans_instance.process)
    
    print("Execution time ("+ str(amount_points) +" 2D-points):", ticks)
Example #22
0
    def templateAnimateClusteringResultNoFailure(filename, initial_centers, ccore_flag):
        sample = read_sample(filename)

        observer = kmeans_observer()
        kmeans_instance = kmeans(sample, initial_centers, 0.025, ccore_flag, observer=observer)
        kmeans_instance.process()

        kmeans_visualizer.animate_cluster_allocation(sample, observer)
def template_segmentation_image(source, start_centers):    
    data = read_image(source);

    kmeans_instance = kmeans(data, start_centers);
    kmeans_instance.process();
    
    clusters = kmeans_instance.get_clusters();
    draw_image_mask_segments(source, clusters);
Example #24
0
 def testDrawSegmentationResultNoFailure(self):
     data = utils.read_image(IMAGE_SIMPLE_SAMPLES.IMAGE_SIMPLE01);
 
     kmeans_instance = kmeans(data, [[255, 0, 0], [0, 0, 255], [180, 136, 0], [255, 255, 255]]);
     kmeans_instance.process();
     
     clusters = kmeans_instance.get_clusters();
     utils.draw_image_mask_segments(IMAGE_SIMPLE_SAMPLES.IMAGE_SIMPLE01, clusters);
     utils.draw_image_color_segments(IMAGE_SIMPLE_SAMPLES.IMAGE_SIMPLE01, clusters);
def clusterData(data, numberOfConversations, getData):
    initial_centers = kmeans_plusplus_initializer(data, numberOfConversations).initialize()
    instance = kmeans(data, initial_centers)
    instance.process()
    #kmeans_visualizer.show_clusters(data, instance.get_clusters(), instance.get_centers(), initial_centers)
    if getData:  # returns list that specifies which messages belong to which conversation
        return instance.get_clusters()
    else:        # returns time list of when each conversation occured
        return instance.get_centers()
Example #26
0
    def templateShowClusteringResultNoFailure(filename, initial_centers, ccore_flag):
        sample = read_sample(filename);

        kmeans_instance = kmeans(sample, initial_centers, 0.025, ccore_flag);
        kmeans_instance.process();

        clusters = kmeans_instance.get_clusters();
        centers = kmeans_instance.get_centers();

        kmeans_visualizer.show_clusters(sample, clusters, centers, initial_centers);
Example #27
0
def kmeansRun(sample, k, specMetric):
    initial_centers = rci(sample, k).initialize()
    kmeans_instance = kmeans(sample,
                             initial_centers,
                             metric=distance_metric(specMetric))

    kmeans_instance.process()
    clusters = kmeans_instance.get_clusters()
    predicted = kmeans_instance.predict(sample)
    return (clusters, predicted)
def template_clustering(start_centers, path, tolerance = 0.25, ccore = True):
    sample = read_sample(path);
    
    kmeans_instance = kmeans(sample, start_centers, tolerance, ccore);
    (ticks, result) = timedcall(kmeans_instance.process);
    
    clusters = kmeans_instance.get_clusters();
    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n");

    draw_clusters(sample, clusters);
def template_clustering(start_centers, path, tolerance=0.25, ccore=True):
    sample = read_sample(path)

    kmeans_instance = kmeans(sample, start_centers, tolerance, ccore)
    (ticks, result) = timedcall(kmeans_instance.process)

    clusters = kmeans_instance.get_clusters()
    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n")

    draw_clusters(sample, clusters)
Example #30
0
    def templateShowClusteringResultNoFailure(filename, initial_centers, ccore_flag):
        sample = read_sample(filename)

        kmeans_instance = kmeans(sample, initial_centers, 0.025, ccore_flag)
        kmeans_instance.process()

        clusters = kmeans_instance.get_clusters()
        centers = kmeans_instance.get_centers()

        kmeans_visualizer.show_clusters(sample, clusters, centers, initial_centers)
Example #31
0
 def templateClusterAllocationOneDimensionData(ccore_flag):
     input_data = [ [random()] for _ in range(10) ] + [ [random() + 3] for _ in range(10) ] + [ [random() + 5] for _ in range(10) ] + [ [random() + 8] for _ in range(10) ];
     
     kmeans_instance = kmeans(input_data, [ [0.0], [3.0], [5.0], [8.0] ], 0.025, ccore_flag);
     kmeans_instance.process();
     clusters = kmeans_instance.get_clusters();
     
     assertion.eq(4, len(clusters));
     for cluster in clusters:
         assertion.eq(10, len(cluster));
Example #32
0
 def templateClusterAllocationOneDimensionData(self, ccore_flag):
     input_data = [ [random()] for i in range(10) ] + [ [random() + 3] for i in range(10) ] + [ [random() + 5] for i in range(10) ] + [ [random() + 8] for i in range(10) ];
     
     kmeans_instance = kmeans(input_data, [ [0.0], [3.0], [5.0], [8.0] ], 0.025, ccore_flag);
     kmeans_instance.process();
     clusters = kmeans_instance.get_clusters();
     
     assert len(clusters) == 4;
     for cluster in clusters:
         assert len(cluster) == 10;
Example #33
0
 def templateClusterAllocationOneDimensionData(ccore_flag):
     input_data = [ [random()] for _ in range(10) ] + [ [random() + 3] for _ in range(10) ] + [ [random() + 5] for _ in range(10) ] + [ [random() + 8] for _ in range(10) ]
     
     kmeans_instance = kmeans(input_data, [ [0.0], [3.0], [5.0], [8.0] ], 0.025, ccore_flag)
     kmeans_instance.process()
     clusters = kmeans_instance.get_clusters()
     
     assertion.eq(4, len(clusters))
     for cluster in clusters:
         assertion.eq(10, len(cluster))
    def get_modelo(self, algoritmo, eps, neig):
        print(algoritmo + ' ' + str(eps) + ' - ' + str(neig))
        instance = None

        if algoritmo == 'AGNES':
            instance = agglomerative(self.amostras,
                                     self.numero_clusters,
                                     link=None)
        elif algoritmo == 'BIRCH':
            instance = birch(self.amostras,
                             self.numero_clusters,
                             entry_size_limit=10000)
        elif algoritmo == 'CLARANS':
            instance = clarans(self.amostras,
                               self.numero_clusters,
                               numlocal=100,
                               maxneighbor=1)
        elif algoritmo == 'CURE':
            instance = cure(self.amostras,
                            self.numero_clusters,
                            number_represent_points=5,
                            compression=0.5)
        elif algoritmo == 'DBSCAN':
            instance = dbscan(self.amostras, eps=eps, neighbors=neig)
        elif algoritmo == 'FCM':
            initial_centers = kmeans_plusplus_initializer(
                self.amostras, self.numero_clusters).initialize()
            instance = fcm(self.amostras, initial_centers)
        elif algoritmo == 'KMEANS':
            initial_centers = kmeans_plusplus_initializer(
                self.amostras, self.numero_clusters).initialize()
            instance = kmeans(self.amostras, initial_centers, tolerance=0.001)
        elif algoritmo == 'KMEDOIDS':
            instance = kmedoids(self.amostras,
                                initial_index_medoids=[0, 0, 0, 0, 0, 0, 0],
                                tolerance=0.0001)  #ajustar o n_de cluster
        elif algoritmo == 'OPTICS':
            instance = optics(self.amostras, eps=eps, minpts=neig)
        elif algoritmo == 'ROCK':
            instance = rock(self.amostras,
                            eps=eps,
                            number_clusters=self.numero_clusters,
                            threshold=0.5)
        else:
            pass

        instance.process()
        lista_agrupada = self.get_lista_agrupada(instance.get_clusters())
        lista_agrupada = np.array(lista_agrupada)

        if (neig != 0):
            n_grupos = len(np.unique(lista_agrupada))
            if n_grupos > self.numero_clusters:
                lista_agrupada = self.get_modelo(algoritmo, eps, neig + 1)
        return lista_agrupada
Example #35
0
    def _perform_clustering(self):
        """!
        @brief Performs cluster analysis using K-Means algorithm using current centers are initial.

        @param[in] data (array_like): Input data for cluster analysis.

        """
        solver = kmeans(self.__data, self.__centers, tolerance=self.__tolerance, ccore=False).process()
        self.__clusters = solver.get_clusters()
        self.__centers = solver.get_centers()
        self.__total_wce = solver.get_total_wce()
Example #36
0
 def templateDrawClustersNoFailure(self, data_path, amount_clusters):
     sample = read_sample(data_path);
     
     initial_centers = kmeans_plusplus_initializer(sample, amount_clusters).initialize();
     kmeans_instance = kmeans(sample, initial_centers, amount_clusters);
     
     kmeans_instance.process();
     clusters = kmeans_instance.get_clusters();
     
     ax = draw_clusters(sample, clusters);
     assert None != ax;
Example #37
0
def model_train_euclidian():
    # define número de clusters
    k = 3
    # Inicializa centróides utilizando método K-Means++
    initial_centers = kmeans_plusplus_initializer(X, k).initialize()
    # cria instância do K-Means utilizando distância Euclidiana
    kmeans_instance = kmeans(X, initial_centers)
    # run cluster analysis and obtain results
    kmeans_instance.process()
    # recupera os clusters gerados
    clusters = kmeans_instance.get_clusters()
Example #38
0
def template_segmentation_image_amount_colors(source, amount):
    data = read_image(source)

    centers = kmeans_plusplus_initializer(
        data, amount,
        kmeans_plusplus_initializer.FARTHEST_CENTER_CANDIDATE).initialize()
    kmeans_instance = kmeans(data, centers)
    kmeans_instance.process()

    clusters = kmeans_instance.get_clusters()
    draw_image_mask_segments(source, clusters)
Example #39
0
def elbow_k_means(key_word, model_path):
    logger = Logger(model_path)
    model = logger.model
    result = model.most_similar(key_word, topn=100)

    word_vectors = []
    num_clusters = 8
    word_names = []
    word_correlation = []
    for r in result:
        word_vectors.append(model.wv[r[0]])
        word_names.append(r[0])
        word_correlation.append(r[1])

    tsne = PCA(n_components=2)

    X_tsne = tsne.fit_transform(word_vectors)

    kmin, kmax = 1, 10
    elbow_instance = elbow(X_tsne, kmin, kmax)

    elbow_instance.process()
    amount_clusters = elbow_instance.get_amount()
    wce = elbow_instance.get_wce()

    centers = kmeans_plusplus_initializer(X_tsne,
                                          amount_clusters,
                                          amount_candidates=kmeans_plusplus_initializer.FARTHEST_CENTER_CANDIDATE).initialize()
    k_means_instance = kmeans(X_tsne, centers)
    k_means_instance.process()

    clusters = k_means_instance.get_clusters()
    centers = k_means_instance.get_centers()

    index_to_word = [[] for i in range(len(clusters))]
    index_to_correlation = [[] for i in range(len(clusters))]
    idx = 0
    cluster_list = []
    for c in clusters:
        words_list = []
        for i in c:
            word_dict = dict()
            word_dict["text"] = word_names[i]
            word_dict["correlation"] = word_correlation[i]
            t_dict = dict()
            t_dict["word"] = word_dict
            words_list.append(t_dict)
        words_dict = dict()
        words_dict["words"] = words_list
        cluster_list.append(words_dict)
        idx += 1

    return len(clusters), cluster_list
Example #40
0
 def templateLengthProcessData(self, path_to_file, start_centers, expected_cluster_length, ccore = False):
     sample = read_sample(path_to_file);
     
     kmeans_instance = kmeans(sample, start_centers, 0.025, ccore);
     kmeans_instance.process();
     
     clusters = kmeans_instance.get_clusters();
 
     obtained_cluster_sizes = [len(cluster) for cluster in clusters];
     assert len(sample) == sum(obtained_cluster_sizes);
     
     obtained_cluster_sizes.sort();
     expected_cluster_length.sort();
     assert obtained_cluster_sizes == expected_cluster_length;
Example #41
0
 def templateCollectEvolution(filename, initial_centers, number_clusters, ccore_flag):
     sample = read_sample(filename)
     
     observer = kmeans_observer()
     kmeans_instance = kmeans(sample, initial_centers, 0.025, ccore_flag, observer=observer)
     kmeans_instance.process()
     
     assertion.le(1, len(observer))
     for i in range(len(observer)):
         assertion.le(1, len(observer.get_centers(i)))
         for center in observer.get_centers(i):
             assertion.eq(len(sample[0]), len(center))
         
         assertion.le(1, len(observer.get_clusters(i)))
Example #42
0
    def __process_by_python(self):
        """!
        @brief Performs processing using python implementation.

        """
        for amount in range(self.__kmin, self.__kmax):
            centers = self.__initializer(self.__data, amount).initialize()
            instance = kmeans(self.__data, centers, ccore=True)
            instance.process()

            self.__wce.append(instance.get_total_wce())

        self.__calculate_elbows()
        self.__find_optimal_kvalue()
Example #43
0
 def templateEncoderProcedures(filename, initial_centers, number_clusters, ccore_flag):
     sample = read_sample(filename)
     
     kmeans_instance = kmeans(sample, initial_centers, 0.025, ccore_flag)
     kmeans_instance.process()
     
     clusters = kmeans_instance.get_clusters()
     encoding = kmeans_instance.get_cluster_encoding()
     
     encoder = cluster_encoder(encoding, clusters, sample)
     encoder.set_encoding(type_encoding.CLUSTER_INDEX_LABELING)
     encoder.set_encoding(type_encoding.CLUSTER_OBJECT_LIST_SEPARATION)
     encoder.set_encoding(type_encoding.CLUSTER_INDEX_LIST_SEPARATION)
     
     assertion.eq(number_clusters, len(clusters))
def template_clustering(start_centers, path, tolerance=0.25, ccore=True):
    sample = read_sample(path)

    kmeans_instance = kmeans(sample, start_centers, tolerance, ccore)
    (ticks, result) = timedcall(kmeans_instance.process)

    clusters = kmeans_instance.get_clusters()
    centers = kmeans_instance.get_centers()

    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n")

    visualizer = cluster_visualizer()
    visualizer.append_clusters(clusters, sample)
    visualizer.append_cluster(start_centers, marker="*", markersize=20)
    visualizer.append_cluster(centers, marker="*", markersize=20)
    visualizer.show()
def find_optimal_amout_clusters(sample_path, kmin, kmax, algorithm):
    sample = read_sample(sample_path)
    search_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm).process()

    amount = search_instance.get_amount()
    scores = search_instance.get_scores()

    print("Sample: '%s', Scores: '%s'" % (sample_path, str(scores)))

    initial_centers = kmeans_plusplus_initializer(sample, amount).initialize()
    kmeans_instance = kmeans(sample, initial_centers).process()

    clusters = kmeans_instance.get_clusters()

    visualizer = cluster_visualizer()
    visualizer.append_clusters(clusters, sample)
    visualizer.show()
Example #46
0
 def __initialize_kmeans(self):
     initial_centers = kmeans_plusplus_initializer(self.__sample, self.__amount).initialize()
     kmeans_instance = kmeans(self.__sample, initial_centers, ccore = True)
     kmeans_instance.process()
     
     means = kmeans_instance.get_centers()
     
     covariances = []
     initial_clusters = kmeans_instance.get_clusters()
     for initial_cluster in initial_clusters:
         if len(initial_cluster) > 1:
             cluster_sample = [ self.__sample[index_point] for index_point in initial_cluster ]
             covariances.append(numpy.cov(cluster_sample, rowvar = False))
         else:
             dimension = len(self.__sample[0])
             covariances.append(numpy.zeros((dimension, dimension))  + random.random() / 10.0)
     
     return means, covariances
Example #47
0
def template_clustering(start_centers, path, tolerance = 0.25, ccore = False):
    sample = read_sample(path)
    dimension = len(sample[0])

    metric = distance_metric(type_metric.MANHATTAN)

    observer = kmeans_observer()
    kmeans_instance = kmeans(sample, start_centers, tolerance, ccore, observer=observer, metric=metric)
    (ticks, _) = timedcall(kmeans_instance.process)
    
    clusters = kmeans_instance.get_clusters()
    centers = kmeans_instance.get_centers()
    
    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n")

    visualizer = cluster_visualizer_multidim()
    visualizer.append_clusters(clusters, sample)
    visualizer.show()

    if dimension > 3:
        kmeans_visualizer.show_clusters(sample, clusters, centers, start_centers)
        kmeans_visualizer.animate_cluster_allocation(sample, observer)
Example #48
0
 def testDifferentDimensions(self):
     kmeans_instance = kmeans([ [0, 1, 5], [0, 2, 3] ], [ [0, 3] ]);
     self.assertRaises(NameError, kmeans_instance.process);
Example #49
0
 def testCoreInterfaceIntInputData(self):
     kmeans_instance = kmeans([ [1], [2], [3], [20], [21], [22] ], [ [2], [21] ], 0.025, True)
     kmeans_instance.process()
     assert len(kmeans_instance.get_clusters()) == 2
Example #50
0
def process_kmeans(sample):
    instance = kmeans(sample, [ [random() + (multiplier * 5), random() + (multiplier + 5)] for multiplier in range(NUMBER_CLUSTERS) ])
    (ticks, _) = timedcall(instance.process)
    return ticks
Example #51
0
 def testDifferentDimensions(self):
     kmeans_instance = kmeans([ [0, 1, 5], [0, 2, 3] ], [ [0, 3] ], ccore=False)
     self.assertRaises(ValueError, kmeans_instance.process)