Ejemplo n.º 1
0
 def __init__(self, publisher_node=None):
     super().__init__()
     self.data_6d = []
     self.data_9d = []
     self.k_means6 = k_means(data_6d)
     self.k_means9 = k_means(data_9d)
     self.pub_node = publisher_node
     self.judge_msg = 0
def imgWithKmeans(imgFile, k):

    rawData = mtimg.imread(imgFile)

    fig = plt.figure()
    ax1 = fig.add_subplot(121)
    ax1.set_title('origin img')
    ax2 = fig.add_subplot(122)
    ax2.set_title('k = {}'.format(k))

    ax1.imshow(rawData)

    pixelArray = np.concatenate([i for i in rawData], axis=0)
    imgSize = rawData.shape

    pixelClass, _, _, clusterIndexs = k_means(pixelArray, k, False)

    infos = zip(pixelClass, clusterIndexs)
    for info in infos:

        pixel, indexs = info
        pixelArray[indexs] = pixel

    newImg = pixelArray.reshape(imgSize)
    ax2.imshow(newImg)

    plt.show()
Ejemplo n.º 3
0
def main():
    N = 100
    
    group_means = [(0, 0), (4, 5), (5, 0)]
    
    all_points = []
    
    for group_x_coord, group_y_coord in group_means:
        group_point_x_coords = np.random.randn(N) + group_x_coord
        group_point_y_coords = np.random.randn(N) + group_y_coord
        
        group_points = zip(group_point_x_coords, group_point_y_coords)
        all_points.extend(group_points)
    
    group_point_lists = k_means.k_means(all_points, 3)
    
    axes = plt.subplots(2, 1, sharex=True, sharey=True)[1]
    
    x_coordinates, y_coordinates = zip(*all_points)
    axes[0].plot(x_coordinates, y_coordinates, ".")
    
    for group_points in group_point_lists:
        if len(group_points):
            x_coordinates, y_coordinates = zip(*group_points)
            axes[1].plot(x_coordinates, y_coordinates, ".", mew=0)
    
    plt.show()
Ejemplo n.º 4
0
def biKmeans(dataSet, k, distMeas=dist_eclud):
    m = dataSet.shape[0]
    clusterAssment = np.matrix(np.zeros((m, 2)))
    centroid0 = np.mean(dataSet, axis=0)
    centList = [centroid0]
    for j in range(m):
        clusterAssment[j, 1] = distMeas(np.matrix(centroid0), dataSet[j, :]) ** 2
    while len(centList) < k:
        lowestSSE = np.inf
        for i in range(len(centList)):
            ptsInCurrCluster = dataSet[np.nonzero(clusterAssment[:, 0].A == i)[0], :]
            centroidMat, splitClustAss = k_means(ptsInCurrCluster, 2, distMeas)
            sseSplit = sum(splitClustAss[:, 1])
            sseNotSplit = sum(clusterAssment[np.nonzero(clusterAssment[:, 0].A != i)[0], 1])
            print("sseSplit, and notSplit: ", sseSplit, sseNotSplit)
            if (sseSplit + sseNotSplit) < lowestSSE:
                bestCentToSplit = i
                bestNewCents = centroidMat
                bestClustAss = splitClustAss.copy()
                lowestSSE = sseSplit + sseNotSplit
        bestClustAss[np.nonzero(bestClustAss[:, 0].A == 1)[0], 0] = len(centList)
        bestClustAss[np.nonzero(bestClustAss[:, 0].A == 0)[0], 0] = bestCentToSplit
        print('the bestCentToSplit is: ', bestCentToSplit)
        print('the len of bestClustAss is: ', len(bestClustAss))
        centList[bestCentToSplit] = bestNewCents[0, :]
        centList.append(bestNewCents[1, :])
        clusterAssment[np.nonzero(clusterAssment[:, 0].A == bestCentToSplit)[0], :] = bestClustAss
    return centList, clusterAssment
Ejemplo n.º 5
0
def main():
    file_name = 'transfusion.csv'
    headers, points = read_csv(file_name)
    #points = km.gen_points(4)
    centroids = km.k_means(5, points, point_labels=False)
    print("The headers are {}".format(headers))
    centroids_to_graph(centroids, headers, 0, 1)
Ejemplo n.º 6
0
 def __init__(self, points, k, idx, phi):
     self.points = points
     self.k = k
     self.idx = idx
     self.phi = phi
     self.optCenters, self.optLabels = k_means.k_means(self.points, self.k)
     self.mdp = MeanDist(self.points, self.optCenters, self.optLabels)
     self.pars = [Partition(self.optCenters, self.optLabels)]
Ejemplo n.º 7
0
def my_cluster_my_kmeans(data, n_cluster=10):
    # using kmeans on raw data
    # @return cluster labels
    print("Begin my clustering on raw data...")
    print("Data shape = ", data.shape)
    start = time.time()
    labels, _, _, _ = k_means(data, n_clusters=n_cluster, max_iter=300)
    end = time.time()
    print("Clustering on raw data, using time = ", end - start)
    return labels
Ejemplo n.º 8
0
def main():
    # test = k_means([[250,250],[1,1],[1,2],[2,1],[10,10],[240,240]], 2)
    # print(test)

    image = Image.open("a.jpg")
    data = __decompose(image)
    res = k_means(data,4)
    __compose(res[0], res[1], image)
    image.save("b.jpg")
    return 0
def elbow_method(data):
    sum_of_squared_distances = []
    for k in range(1, 15):
        centers, clusters = k_means.k_means(data, k)
        ssd = 0
        for i in range(k):
            ssd += np.sum(pairwise_distances(data[clusters == i],
                                             [centers[i]]))
        sum_of_squared_distances.append(ssd)

    return sum_of_squared_distances
Ejemplo n.º 10
0
def main():
    # construct the clusters and plot them
    samples, c = 800, 5
    data = km.generate_random_blobs(samples, c)
    km.plot_blobs(data)
    centers, clusters = km.k_means(data, c)
    km.plot_clusters(centers, clusters, data)

    # validate the choice of k with elbow and plot
    # the choice of
    sum_of_squared_distances = em.elbow_method(data)
    em.plot_elbow(sum_of_squared_distances)
Ejemplo n.º 11
0
def inertia_plot(n_runs, n_init_range, features, number_of_clusters, labels,
                 label_names, random_seed):
    plt.figure()
    plots = []
    legends = []
    n_runs = n_runs
    n_init_range = n_init_range
    train_centers = None
    km_util = k_means(random_seed)
    cases = [(KMeans, 'k-means++', {}), (KMeans, 'random', {}),
             (KMeans, 'custom', {}),
             (MiniBatchKMeans, 'k-means++', {
                 'max_no_improvement': 3
             }),
             (MiniBatchKMeans, 'random', {
                 'max_no_improvement': 3,
                 'init_size': 500
             })]

    for factory, init, params in cases:
        print("Evaluation of %s with %s init" % (factory.__name__, init))
        inertia = np.empty((len(n_init_range), n_runs))
        for run_id in range(n_runs):
            for i, n_init in enumerate(n_init_range):
                if init == 'custom':
                    custom_inertias = []
                    for j in range(n_init):
                        centers = km_util.initialize_centers(
                            features, labels, label_names)
                        km = factory(n_clusters=number_of_clusters,
                                     init=centers,
                                     random_state=run_id,
                                     n_init=1,
                                     **params).fit(features)
                        custom_inertias.append(km.inertia_)
                    inertia[i, run_id] = min(custom_inertias)
                else:
                    km = factory(n_clusters=number_of_clusters,
                                 init=init,
                                 random_state=run_id,
                                 n_init=n_init,
                                 **params).fit(features)
                    inertia[i, run_id] = km.inertia_
        p = plt.errorbar(n_init_range, inertia.mean(axis=1),
                         inertia.std(axis=1))
        plots.append(p[0])
        legends.append("%s with %s init" % (factory.__name__, init))

    plt.xlabel('n_init')
    plt.ylabel('inertia')
    plt.legend(plots, legends)
    plt.title("Mean inertia for various k-means init across %d runs" % n_runs)
    plt.show()
Ejemplo n.º 12
0
def compress_image(file, k):
    img = cv2.imread(file)
    Z = img.reshape((-1, 3))
    Z = np.float32(Z)

    _, labels2, centroids2 = k_means(k, Z)

    centroids2 = np.uint8(centroids2)
    res = [centroids2[l] for l in labels2]
    res2 = np.array(res).reshape((img.shape))

    cv2.imwrite('imagenes/perrito_K{0}.jpg'.format(k), res2)
Ejemplo n.º 13
0
    def maintenance(self, node: QTreeNode):
        """
        Maintain Quad Tree. Look up LARS maintenance for details.
        :param node: Node to be updated.
        """
        if node.dirty():
            # Have to use bool() because straight forward comparison would need another None check in else part because
            # of incorrect type hinting. Get your shit together python.
            if bool(node.children):
                if self.check_merge(node, node.children):
                    node.elements = [
                        element for child in node.children
                        for element in child.elements
                    ]
                    node.children.clear()
                    node.dirty_size = 0
            else:
                clusters, centroids = k_means(node.elements, 4)
                nodes = [
                    QTreeNode(cluster, centroid)
                    for cluster, centroid in zip(clusters, centroids)
                ]
                if self.check_split(node, nodes):
                    node.addChildren(nodes)
                    for child in node.children:
                        self.maintenance(child)
                    node.dirty_size = 0

        if not bool(node.children):
            clusters, centroids = k_means(node.elements, 4)
            nodes = [
                QTreeNode(cluster, centroid)
                for cluster, centroid in zip(clusters, centroids)
            ]
            if self.check_split(node, nodes):
                node.addChildren(nodes)
                for child in node.children:
                    self.maintenance(child)
                node.dirty_size = 0
Ejemplo n.º 14
0
def compare_to_sklearn():
    n_dim = 2
    n_clusters = 3
    n_samples = 5
    random_state = check_random_state(0)
    X = np.ndarray([n_clusters*n_samples, n_dim])
    initial_centers = random_state.rand(n_clusters, n_dim)*10
    for i in range(0, n_clusters):
        x_current = random_state.multivariate_normal(initial_centers[i, :], [[1, 0], [0, 1]], n_samples)
        X[i*n_samples:(i+1)*n_samples, :] = x_current
    closest_center, initial_centers = k_means.k_means(X, n_clusters)
    k_means_scikit_learn = sklearn.cluster.KMeans(n_clusters=n_clusters, random_state=random_state)
    closest_center_sklearn = k_means_scikit_learn.fit_predict(X)
    _swap_values_in_ndarray(closest_center_sklearn, 2, 0)
    numpy.testing.assert_array_equal(closest_center, closest_center_sklearn)
    def fit_transform(self, X, cluster_number, epochs):
        data_number, feature_number = X.shape
        self.__cluster_number = cluster_number

        model = k_means.k_means()
        y = model.fit_transform(X, cluster_number, 1,
                                distance.euclidean_distance)

        classes = np.unique(y)

        self.__pis = np.zeros((1, cluster_number))
        self.__means = np.zeros((cluster_number, feature_number))
        self.__sigma = np.zeros(
            (cluster_number, feature_number, feature_number))
        for i in range(cluster_number):
            self.__pis[:, i] = np.mean(y == classes[i])

            indexes = np.flatnonzero(y == classes[i])

            self.__means[i] = np.mean(X[indexes], axis=0)
            self.__sigma[i] = np.cov(X[indexes].T)

        for _ in range(epochs):
            y_probs = self.score(X)

            number_classes = np.sum(y_probs, axis=0, keepdims=True)

            for i in range(cluster_number):
                self.__means[i] = np.sum(y_probs[:, i].reshape((-1, 1)) * X,
                                         axis=0) / number_classes[:, i]

                diff1 = (X - self.__means[i])[:, :, np.newaxis]
                diff2 = np.transpose(
                    diff1, axes=(0, 2, 1)) * y_probs[:, i].reshape(-1, 1, 1)
                self.__sigma[i] = np.tensordot(
                    diff1, diff2, axes=(0, 0)).reshape(
                        (feature_number, feature_number)) / number_classes[:,
                                                                           i]
                '''
                for j in range(data_number):
                    diff = (X[j] - self.__means[i]).reshape(-1, 1)
                    self.__sigma[i] += y_probs[j, i] * diff.dot(diff.T)
                self.__sigma[i] /= number_classes[:, i]
                '''

            self.__pis = number_classes / data_number

        return np.argmax(y_probs, axis=1)
Ejemplo n.º 16
0
def launch_k_means(X):
    clusters = []
    for k in n_clusters:
        print(f"{k} clusters")
        # 'Compress' image using K-means
        _, clustered = k_means(X,
                               k=k,
                               max_iterations=max_iterations,
                               launch_count=launch_count)
        clusters.append(clustered)

        # Save the result for this k to file
        # clustered_compressed = np.array(clustered).astype(np.uint8)
        # np.savetxt(f"{out_data_path}clustered_{k}.txt", X=clustered_compressed, delimiter='\t', fmt='%1d')

    print("Done clustering.")
    return np.array(clusters)
Ejemplo n.º 17
0
def compare_to_sklearn():
    n_dim = 2
    n_clusters = 3
    n_samples = 5
    random_state = check_random_state(0)
    X = np.ndarray([n_clusters * n_samples, n_dim])
    initial_centers = random_state.rand(n_clusters, n_dim) * 10
    for i in range(0, n_clusters):
        x_current = random_state.multivariate_normal(initial_centers[i, :],
                                                     [[1, 0], [0, 1]],
                                                     n_samples)
        X[i * n_samples:(i + 1) * n_samples, :] = x_current
    closest_center, initial_centers = k_means.k_means(X, n_clusters)
    k_means_scikit_learn = sklearn.cluster.KMeans(n_clusters=n_clusters,
                                                  random_state=random_state)
    closest_center_sklearn = k_means_scikit_learn.fit_predict(X)
    _swap_values_in_ndarray(closest_center_sklearn, 2, 0)
    numpy.testing.assert_array_equal(closest_center, closest_center_sklearn)
Ejemplo n.º 18
0
    def to_quad_tree(self, elements: List[Tuple[int, Tuple[float, float]]]):
        """
        Convert given data to a QuadTree
        :param elements: List of user id and user location coordinate tuple
        """
        break_now = True
        # Initialize root
        rep_lat = 0
        rep_lon = 0
        for element in elements:
            rep_lat = element[1][0]
            rep_lon = element[1][1]
        rep_lat = rep_lat / max(len(elements), 1)
        rep_lon = rep_lon / max(len(elements), 1)
        self.root = QTreeNode(elements, (rep_lat, rep_lon))
        work_level = [self.root]

        level_no = 1
        while True:
            print("Building level", level_no)
            level_no += 1
            new_level = []
            # Split each node at current height level
            level_built_pct = 0

            for node in work_level:
                if len(node.elements) >= 4:
                    clusters, centroids = k_means(node.elements, 4)
                    nodes = [
                        QTreeNode(cluster, centroid)
                        for cluster, centroid in zip(clusters, centroids)
                    ]
                    if self.check_split(node, nodes):
                        break_now = False
                        new_level.extend(nodes)
                        node.addChildren(nodes)
                level_built_pct += 100 / len(work_level)
                print("Level built {0}%".format(level_built_pct))
            # No node was split. Algorithm converges.
            if break_now:
                break
            work_level = new_level
            break_now = True
Ejemplo n.º 19
0
 def __init__(self, center, labels, k_means_flag, terminal, data, k):
     '''
     构造函数
     center      :预先设置的中心向量
     labels      :预先设置的标签
     k_means_flag:是否使用k_means作为初始化的标识
     terminal    :迭代次数
     data:       :数据
     k           :聚类目标类别数
     '''
     self.data = data
     self.k = k
     self.terminal = terminal
     if k_means_flag:
         func = k_means(data, k)
         self.labels, self.center = func.k_means()
     else:
         self.labels = labels
         self.center = center
Ejemplo n.º 20
0
def test():
    data_set = []
    data_set.append(np.array([1.2, 2.3, 3.14]))
    data_set.append(np.array([1.3, 3.01, 4.0]))
    data_set.append(np.array([1.4, 3.1, 3.22]))
    data_set.append(np.array([1.5, 2.3, 3.64]))
    data_set.append(np.array([1.6, 2.33, 4.2]))
    data_set.append(np.array([1.7, 2.7, 3.76]))
    data_set.append(np.array([1.8, 2.3, 3.72]))
    data_set.append(np.array([1.88, 3.3, 4.1]))
    data_set.append(np.array([1.9, 3.0, 3.95]))

    data_set.append(np.array([7.2, 8.1, 19.99]))
    data_set.append(np.array([7, 8.24, 20]))
    data_set.append(np.array([7.54, 7.9, 18.5]))
    data_set.append(np.array([7.25, 8.1, 19.8]))
    data_set.append(np.array([7, 8.24, 20]))
    data_set.append(np.array([7.77, 7.6, 18.6]))
    data_set.append(np.array([7.66, 8.2, 19.6]))
    data_set.append(np.array([7, 8.24, 20]))
    data_set.append(np.array([7.52, 7.8, 18.2]))

    data_set.append(np.array([11.8, 21.7, -34.1]))
    data_set.append(np.array([12.1, 20.7, -33.6]))
    data_set.append(np.array([10.4, 19.4, -33.8]))
    data_set.append(np.array([11.9, 22.5, -34.2]))
    data_set.append(np.array([12.1, 20.7, -33.6]))
    data_set.append(np.array([10.9, 19.8, -32.9]))
    data_set.append(np.array([11.2, 22.0, -34.3]))
    data_set.append(np.array([12.2, 20.9, -43.0]))
    data_set.append(np.array([10.5, 19.9, -23.5]))

    C = k_means.k_means(3, dist, *data_set)

    # output
    for cluster in C:
        for vec in cluster:
            print(vec)
        mean_vec = reduce(lambda x, y: x + y, cluster) / len(cluster)
        print("The mean vector is:", mean_vec)
        print()
Ejemplo n.º 21
0
def main():
    # Read input image
    image = np.array(Image.open(input_image_file))
    X = image.reshape((image.shape[0] * image.shape[1], image.shape[2]))

    for k in n_clusters:
        print(f"{k} clusters")
        # 'Compress' image using K-means
        centroids, clustered = k_means(X,
                                       k=k,
                                       max_iterations=max_iterations,
                                       launch_count=launch_count)
        new_X = np.array(
            [centroids[cluster_index] for cluster_index in clustered])
        new_X = new_X.astype(np.uint8)

        # Write output image
        new_image = new_X.reshape(image.shape)
        output_image_name = f"{output_image_prefix}_{k}.jpg"
        Image.fromarray(new_image).save(output_image_name)
        print(f"Saved {output_image_name}")

    print("Done.")
Ejemplo n.º 22
0
def launch_k_means():
    image = np.array(Image.open(image_name))
    X = image.reshape((image.shape[0] * image.shape[1], image.shape[2]))

    for k in n_clusters:
        print(f"{k} clusters")
        # 'Compress' image using K-means
        centroids, clustered = k_means(X,
                                       k=k,
                                       max_iterations=max_iterations,
                                       launch_count=launch_count)

        # Save the result for this k to file
        np.savetxt(f"{data_path}centroids_{k}.txt",
                   X=centroids,
                   delimiter='\t')
        clustered_compressed = np.array(clustered).astype(np.uint8)
        np.savetxt(f"{data_path}clustered_{k}.txt",
                   X=clustered_compressed,
                   delimiter='\t',
                   fmt='%1d')

    print("Done clustering.")
Ejemplo n.º 23
0
def divide_list_of_children_by_k_means(k, point_list, list_of_all_children):
    children_coordinates = []
    divide_list_of_children = []
    temp_list_of_children = []

    for i in point_list:
        children_coordinates.append(list(i))
    children_coordinates.remove([0, 0])

    clusters, means = k_means(children_coordinates, k)

    for i in clusters:
        for j in i:
            x1, y1 = j
            index = point_list.index((x1, y1))
            temp = list_of_all_children[index]
            temp_list_of_children.append(temp)

        new_list = temp_list_of_children.copy()
        divide_list_of_children.append(new_list)
        temp_list_of_children.clear()

    return divide_list_of_children, means, clusters
Ejemplo n.º 24
0
import matplotlib.pyplot as plt
import k_means as km

if __name__ == "__main__":
    save_path = 'toy_data.tsv' 
    data = pd.read_csv(save_path,delimiter = '\t', header = None)
    labels = data[0].to_numpy()
    labels = np.reshape(labels,(np.size(labels), 1))
    x = data[[1,2]].to_numpy()
    # x1 = np.reshape(x[:,0],(np.size(labels), 1))
    # x2 = np.reshape(x[:,1],(np.size(labels), 1))
    # print(x[:,0])
    ones = np.ones((len(data),1))
    A = np.concatenate((ones,x), 1)
    theta = ls.least_squares(A, labels)
    theta_0 = theta[0]
    theta = np.delete(theta, 0, axis = 0)
    print(theta, theta_0)
    # plt.scatter(x[0:100,0], x[0:100,1])
    # plt.scatter(x[100:,0], x[100:,1])
    # axes = plt.gca()
    # x_vals = np.array(axes.get_xlim())
    # y_vals = theta_0 + (-1*theta[0]) * x_vals
    # plt.plot(x_vals, y_vals, '--')
    # plt.show()
    # z = np.dot(theta, x1[120]) + theta_0
    # print(z)
    theta_numpy, res, rank, s = np.linalg.lstsq(A, labels, rcond= None)
    print(theta_numpy)
    km.k_means(x, labels)
 def setUp(self):
     self.centers = k_means(C)
Ejemplo n.º 26
0
                    P.append(ps[j])
        if P != []:
            hull = ConvexHull(P)
            x = [P[l][0] for l in hull.vertices]
            y = [P[l][1] for l in hull.vertices]
            orig_len = len(x)
            x = x[-3:-1] + x + x[1:3]
            y = y[-3:-1] + y + y[1:3]
            t = np.arange(len(x))
            ti = np.linspace(2, orig_len + 1, 10 * orig_len)
            xi = interp1d(t, x, kind='cubic')(ti)
            yi = interp1d(t, y, kind='cubic')(ti)
            plt.fill(xi, yi, alpha=0.2)
        
if __name__ == '__main__':
    ps = np.random.rand(300,2)
    plt.subplot(211)
    plot(ps)
    draw_regs(ps,km.k_means(ps,k),0)
    plt.xlim(-0.1, 1.1)
    plt.ylim(-0.1, 1.1)

    plt.subplot(212)
    plot(ps)
    draw_regs(ps,cm.c_means(ps,k,m),1)
    plt.xlim(-0.1, 1.1)
    plt.ylim(-0.1, 1.1)
    
    plt.show()
    
Ejemplo n.º 27
0
def two_points():
    X = np.asarray([(0, 0), (0, 1)])
    clusters, centers = k_means.k_means(X, 2)
    numpy.testing.assert_array_equal(clusters.tolist(), [0, 1])
    numpy.testing.assert_array_equal(centers, X)
Ejemplo n.º 28
0
    percetion_m_childTest = percetion_m_childTest.append( bootstrap.Bootstrap(data2 , loops).ix[0] )

percetion_m_childTest = percetion_m_childTest.reset_index(drop= True)
percetion_m_childTest.columns = ['mux', 'muy', 'sigmax', 'sigmay']
# print(percetion_m_childTest)
percetion_m_childTest.to_excel('3.xlsx')

s3 = [np.mean(percetion_m_childTest['mux']), np.mean(percetion_m_childTest['muy']),
      np.mean(percetion_m_childTest['sigmax']), np.mean(percetion_m_childTest['sigmay'])]
print('所有的平均值,直接平均 :')
print(s3 )



# 4 聚类
s4 = k_means.k_means(percetion_m_childTest)
s4 = pd.DataFrame(s4)
print('k_means聚类后 :')
print(s4)


# 5 KNN
# s5 = KNN.KNN(percetion_m_childTest)
# print('s5 ,KNN:',s5)

# 6误差平方和
aa = [100,120,10,15]
D =[]
for i in range(4):
    d = ( aa[i] - s3[i] ) *( aa[i] - s3[i] )
    D.append(d)
Ejemplo n.º 29
0
    except FileNotFoundError:
        print(
            "ERROR: File", input_filename,
            "does not exist, please input the filename WITHOUT .ppm or check your directory"
        )
        print()
        file_found = False

while True:
    k_num = input("Enter number of colors to filter (whole numbers only):")

    if k_num.isnumeric():
        if '.' not in k_num:
            break
        else:
            print("ERROR: Please enter an integer")
    else:
        print("ERROR: Please enter an integer")

out_filename = input("Enter output image name (w/o .ppm):")

new_image = k_means(image, int(k_num))
save_ppm(out_filename + ".ppm", new_image)

if os.path.isfile(os.getcwd() + "/" + out_filename + ".ppm"):
    print("Image saved in", os.getcwd() + "/" + out_filename, end=".ppm\n")
elif os.path.isfile(out_filename + ".ppm"):
    print("Image saved in", out_filename, end=".ppm\n")
else:
    print("ERROR: Image process error, please restart the program")
Ejemplo n.º 30
0
# Тестовая выборка
TESTING_DATA = [
	(1.3, 4, 27, 1, 1, 1),
	(1.1, 6, 30, 10, 0, 0),
	(1.0, 4, 22, 11, 1, 1),
	(0.8, 10, 32, 1, 1, 0),
	(0.8, 10, 32, 6, 1, 1),
	(3.3, 5, 32, 15, 1, 0),
]

# Выделение кластеров и присвоение им группы риска
print "tuple has the form:"
print "(auto cost, expierence, age, number of accidents, sex, married)"

clusters = k_means(TRAINING_DATA, 3)
lvls = {}
for idx in range(len(clusters)):
	cluster = clusters[idx]
	print "%s)" % idx
	print cluster	

	lvl = raw_input("Please, input danger level:")
	lvls[idx] = lvl


# Тестирование и оценка 
for testing_data in TESTING_DATA:
	print "testing data:", testing_data
	idx = find_nearest_cluster(testing_data, clusters)
	print "Man has %s danger level" % lvls[idx]
Ejemplo n.º 31
0
def k_medians(points, k, initialization_method):
    if k <= 0 or len(points) <= k:
        return False
    # initialize k centers with zeroes
    k_centers = np.zeros((k, len(points[0])), dtype = np.float64)
    
    # initialization
    if initialization_method == FIRST_K_POINTS:
        print "FIRST_K_POINTS"
        
        k_centers = points[0:k]
    
    elif initialization_method == UNIFORMLY_K_POINTS:
        print "UNIFORMLY_K_POINTS"
        
        random_array = np.zeros(len(points), dtype = np.int)
        for i in range(random_array.size - 1):
            random_array[i + 1] =  random_array[i] + 1
        # permute to generate random array
        for i in range(random_array.size):
            j = random.randint(0, random_array.size - 1)
            e = random_array[i]
            random_array[i] = random_array[j]
            random_array[j] = e
            
        for i in range(len(k_centers)):
            k_centers[i] = points[random_array[i]]
                
    elif initialization_method == K_MEANS_PLUS_PLUS:
        print "K_MEANS_PLUS_PLUS"
        
        c0_index = random.randint(0, len(points) - 1)
        k_centers[0] = points[c0_index]
        
        distribution = np.zeros(len(points), dtype = np.float64)
        
        for r in range(1, len(k_centers)):
            for i in range(len(points)):
                nearest_center_index, nearest_distance = find_nearest_point(k_centers[0: r], points[i])
                distribution[i] = nearest_distance ** 2
            
            # normalization distribution
            sum_distances = np.sum(distribution)
            distribution /= sum_distances
            
            # accumulate distribution
            accumulate_distribution = np.zeros(len(distribution), dtype = np.float64)
            accumulate_distribution[0] = distribution[0]
            for i in range(1, len(distribution)):
                accumulate_distribution[i] = distribution[i] + accumulate_distribution[i - 1]
            
            random_number = random.random()
            for i in range(len(accumulate_distribution)):
                if random_number <= accumulate_distribution[i] and distribution[i] != 0:
                    k_centers[r] = points[i]
                    break
    
    elif initialization_method == GONZALES_ALGORITHM:
        print "GONZALES_ALGORITHM"
        
        # c0_index = random.randint(0, len(points) - 1)
        # k_centers[0] = points[c0_index]
        k_centers[0] = points[0]
        
        for t in range(1, len(k_centers)):
            
            nearest_center_index, cost_function = find_nearest_point(k_centers[0: t], points[0])
            t_th_center_index = 0
            for i in range(1, len(points)):
                nearest_center_index, nearest_distance = find_nearest_point(k_centers[0: t], points[i])
                         
                if nearest_distance > cost_function:
                    t_th_center_index = i
                    cost_function = nearest_distance
             
            k_centers[t] = points[t_th_center_index]
    elif initialization_method == K_MEANS_PLUS_PLUS_RESULT:
        k_centers, points_labels, k_means_cost_function_values = k_means.k_means(points, k, K_MEANS_PLUS_PLUS)
    else:
        return False
          
    # clustering
    # initialize k clusters, i.e., label array
    points_labels = np.zeros(len(points), dtype = np.int)
    k_medians_cost_function_values = []
    while True:
        # assignment
        for i in range(len(points)):
            nearest_center_index, nearest_distance = find_nearest_point(k_centers, points[i])
            points_labels[i] = nearest_center_index
        
        # compute k-means cost functions
        k_medians_cost_function_values.append(k_medians_cost_function(points, k_centers, points_labels))
        
        # update
        new_k_centers = np.zeros((len(k_centers), len(points[0])), dtype = np.float64)
        k_clusters = [[] for i in range(len(new_k_centers))]
        for j in range(len(points_labels)):
            k_clusters[points_labels[j]].append(points[j])
        
        # compute k-medians instead of k-means of each cluster
        # k-means
        # for i in range(len(new_k_centers)):
        #    new_k_centers[i] = np.mean(np.array(k_clusters[i]), axis = 0)
        # k-medians
        for i in range(len(new_k_centers)):
            new_k_centers[i] = np.median(np.array(k_clusters[i]), axis = 0)
            
        if np.linalg.norm(np.linalg.norm(new_k_centers - k_centers, axis = 1)) <= 10.0 ** (-10):
            k_centers = new_k_centers
            k_medians_cost_function_values.append(k_medians_cost_function(points, k_centers, points_labels))
            break
        else:
            k_centers = new_k_centers
    
    return k_centers, points_labels, k_medians_cost_function_values
Ejemplo n.º 32
0
    #  (requiere pillow en python3, PIL en python2)
    image = Image.open(imageName)
    pixels = image.load()

    # Se obtiene el tamaño de la imagen
    width, height = image.size

    # Se obtiene el nombre y la extension de la imagen
    name, extension = imageName.split(".")

    # Se inicializa una lista de pixeles
    pixelList = [[(0, 0, 0) for i in range(height)] for j in range(width)]

    # Se aplana la matriz de pixeles en la lista
    for i in range(width):
        for j in range(height):
            pixelList[i][j] = pixels[i, j]

    flattenedPixels = [i for sublist in pixelList for i in sublist]

    # Se corre k_means para cada K y se cambian los pixeles
    #  luego se guarda la imagen.
    # (No se cambia la lista aplanada de pixeles).
    for i in [2, 4, 8, 16, 32, 64, 128]:
        clusters, mapping = k_means(i, flattenedPixels, compression=True)
        for w in range(width):
            for h in range(height):
                pixels[w, h] = clusters[mapping[w * height + h]]
        image.save("ImagenesComprimidas/" + name + "K" + str(i) + "." +
                   extension)
Ejemplo n.º 33
0
# plt.plot(costs[:,0],costs[:,1])
# plt.xlabel("Number of Clusters")
# plt.ylabel("Log Likelihood")
# plt.title("MoG Clusters")
# plt.grid()
# plt.savefig('results/mog_2d_ksweep.pdf')
# plt.close()


'''
PART 2.2.4
'''
costs = []
for k in range(1,10):
    # vcost = train_mog_model(dataset2, k, validation2, 3)[-1]
    clusters = k_means(dataset2, k)[0]
    kvcost = get_cost(validation2, clusters)
    costs.append([k,0,kvcost])
    # print "%d - %.3f - %.3f" %(k, vcost, kvcost)

# costs = np.array(costs)
# np.savetxt('results/costs100d.csv', costs, fmt="%d, %.3f, %.3f")
#
# plt.plot(costs[:,0],costs[:,1])
# plt.xlabel("Number of Clusters")
# plt.ylabel("Log Likelihood")
# plt.title("MoG Clusters")
# plt.grid()
# plt.savefig('results/mog_100d.pdf')
# plt.close()
Ejemplo n.º 34
0
def four_points_3d():
    # It doesn't make sense to use have more dimensions in ndarray here: it can be reshaped anyway to 2D
    X = np.asarray([(0, 1, 1), (0, 1, 2), (0, 1, 10), (0, 1, 11)])
    clusters, centers = k_means.k_means(X, 2)
    numpy.testing.assert_array_equal(clusters.tolist(), [0, 0, 1, 1])
Ejemplo n.º 35
0
eig_vector = eig_vector.T
req_eigen_vectors = [[0.0 for i in range(0, no_of_hidden_neurons)]
                     for j in range(0, no_of_output_neurons)]
req_eigen_vectors = np.array(req_eigen_vectors)

# Sorting the eigen vectors using the eigen values
for i in range(0, len(eig_value) - 1):
    for j in range(0, len(eig_value) - i - 1):
        if (eig_value[j] > eig_value[j + 1]):
            eig_value[j], eig_value[j + 1] = eig_value[j + 1], eig_value[j]
            eig_vector[j], eig_vector[j + 1] = eig_vector[j + 1], eig_vector[j]

# Finding n0 smallest eigen values
for i in range(0, no_of_output_neurons):
    req_eigen_vectors[i] = eig_vector[i]

    req_eigen_vectors[i] = np.divide(
        req_eigen_vectors[i],
        np.linalg.norm(np.dot(hidden_matrix, req_eigen_vectors[i].T)))

hidden_matrix = np.array(hidden_matrix)
req_eigen_vectors = np.array(req_eigen_vectors)

output_matrix = np.dot(hidden_matrix, (req_eigen_vectors.T))

i = 0
print("Final Weights")
print(req_eigen_vectors)

k.k_means(output_matrix, no_of_clusters)
Ejemplo n.º 36
0
def separate_center():
    X = np.asarray([(0, 1), (0, 2), (0, 3), (0, 10)])
    clusters, centers = k_means.k_means(X, 2, np.asarray([(0, 2), (0, 10)]))
    numpy.testing.assert_array_equal(clusters.tolist(), [0, 0, 0, 1])
    numpy.testing.assert_array_equal(centers.tolist(), [[0, 2], [0, 10]])
Ejemplo n.º 37
0
    X[i*n_samples:(i+1)*n_samples, :] = x_current
    if not perform_profiling:
        plt.scatter(x_current[:, 0], x_current[:, 1], s=1000, c=plt.cm.viridis(clr), marker=markers[i % len(markers)])

if not perform_profiling:
    print("Points:")
    print(X)

if perform_profiling:
    pr = cProfile.Profile()
    pr.enable()

print('\nCalculating clusters\n')
asked_for_n_clusters = 3
colors = np.linspace(0, 1, asked_for_n_clusters)
closest_center, initial_centers = k_means.k_means(X, asked_for_n_clusters)

if perform_profiling:
    pr.disable()
    s = io.StringIO()
    ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
    ps.print_stats()
    print(s.getvalue())
else:
    print("Closest centers:")
    print(closest_center)

    print("Initial centers:")
    print(initial_centers)

    print("Plotting the calculated clusters")
Ejemplo n.º 38
0
#get test data and apply scaler and pca
test_ids = np.asarray(fe.get_test_dataset_song_ids())
test_features, test_genres = read_data(
    fe, test_ids)  #read_echonest_data(fe, test_ids)
test_features = scaler.transform(test_features)
test_features = pca.transform(test_features)

#number of clusters is number of genres
number_of_clusters = get_number_of_clusters(fe)
#get list of all gernes
all_genre_names = fe.get_all_genres()
'''
Step 2: K-means Clustering
'''
km = k_means(RANDOM_SEED)

#generating inertia plot of different initialization strategies
n_run = 20
n_init_range = np.array([1, 5, 10, 15, 20])
inertia_plot(n_run, n_init_range, train_features, number_of_clusters,
             train_genres, all_genre_names, RANDOM_SEED)

#initialize
n_trials = 100
km_models = []
evaluation_values = []
ls_train_genres = []
ls_validatation_genres = []
ls_train_centers = []
Ejemplo n.º 39
0
import numpy as np
import string
from word_indexing import word_indexer
from k_means import k_means

data_path = "//home//sh//Desktop//june_project//data_quine//all_texts//1953e_On Mental Entities_Quine (1).txt"
data_points = word_indexer(data_path)
print(data_points)

test1 = k_means(3, data_points)
initialize = test1.cluster_centroid_initialization(2)
print(initialize)
clustering_step = test1.clustering(2, initialize)
print(clustering_step)
runit = test1.find_clusters(2)
print(runit)
cost = test1.distorsion_function(2, runit)
print(cost)
Ejemplo n.º 40
0
def four_points_2d():
    X = np.asarray([(0, 1), (0, 2), (0, 10), (0, 11)])
    clusters, centers = k_means.k_means(X, 2)
    numpy.testing.assert_array_equal(clusters.tolist(), [0, 0, 1, 1])
Ejemplo n.º 41
0
def k_medians(points, k, initialization_method):
    if k <= 0 or len(points) <= k:
        return False
    # initialize k centers with zeroes
    k_centers = np.zeros((k, len(points[0])), dtype=np.float64)

    # initialization
    if initialization_method == FIRST_K_POINTS:
        print "FIRST_K_POINTS"

        k_centers = points[0:k]

    elif initialization_method == UNIFORMLY_K_POINTS:
        print "UNIFORMLY_K_POINTS"

        random_array = np.zeros(len(points), dtype=np.int)
        for i in range(random_array.size - 1):
            random_array[i + 1] = random_array[i] + 1
        # permute to generate random array
        for i in range(random_array.size):
            j = random.randint(0, random_array.size - 1)
            e = random_array[i]
            random_array[i] = random_array[j]
            random_array[j] = e

        for i in range(len(k_centers)):
            k_centers[i] = points[random_array[i]]

    elif initialization_method == K_MEANS_PLUS_PLUS:
        print "K_MEANS_PLUS_PLUS"

        c0_index = random.randint(0, len(points) - 1)
        k_centers[0] = points[c0_index]

        distribution = np.zeros(len(points), dtype=np.float64)

        for r in range(1, len(k_centers)):
            for i in range(len(points)):
                nearest_center_index, nearest_distance = find_nearest_point(
                    k_centers[0:r], points[i])
                distribution[i] = nearest_distance**2

            # normalization distribution
            sum_distances = np.sum(distribution)
            distribution /= sum_distances

            # accumulate distribution
            accumulate_distribution = np.zeros(len(distribution),
                                               dtype=np.float64)
            accumulate_distribution[0] = distribution[0]
            for i in range(1, len(distribution)):
                accumulate_distribution[
                    i] = distribution[i] + accumulate_distribution[i - 1]

            random_number = random.random()
            for i in range(len(accumulate_distribution)):
                if random_number <= accumulate_distribution[
                        i] and distribution[i] != 0:
                    k_centers[r] = points[i]
                    break

    elif initialization_method == GONZALES_ALGORITHM:
        print "GONZALES_ALGORITHM"

        # c0_index = random.randint(0, len(points) - 1)
        # k_centers[0] = points[c0_index]
        k_centers[0] = points[0]

        for t in range(1, len(k_centers)):

            nearest_center_index, cost_function = find_nearest_point(
                k_centers[0:t], points[0])
            t_th_center_index = 0
            for i in range(1, len(points)):
                nearest_center_index, nearest_distance = find_nearest_point(
                    k_centers[0:t], points[i])

                if nearest_distance > cost_function:
                    t_th_center_index = i
                    cost_function = nearest_distance

            k_centers[t] = points[t_th_center_index]
    elif initialization_method == K_MEANS_PLUS_PLUS_RESULT:
        k_centers, points_labels, k_means_cost_function_values = k_means.k_means(
            points, k, K_MEANS_PLUS_PLUS)
    else:
        return False

    # clustering
    # initialize k clusters, i.e., label array
    points_labels = np.zeros(len(points), dtype=np.int)
    k_medians_cost_function_values = []
    while True:
        # assignment
        for i in range(len(points)):
            nearest_center_index, nearest_distance = find_nearest_point(
                k_centers, points[i])
            points_labels[i] = nearest_center_index

        # compute k-means cost functions
        k_medians_cost_function_values.append(
            k_medians_cost_function(points, k_centers, points_labels))

        # update
        new_k_centers = np.zeros((len(k_centers), len(points[0])),
                                 dtype=np.float64)
        k_clusters = [[] for i in range(len(new_k_centers))]
        for j in range(len(points_labels)):
            k_clusters[points_labels[j]].append(points[j])

        # compute k-medians instead of k-means of each cluster
        # k-means
        # for i in range(len(new_k_centers)):
        #    new_k_centers[i] = np.mean(np.array(k_clusters[i]), axis = 0)
        # k-medians
        for i in range(len(new_k_centers)):
            new_k_centers[i] = np.median(np.array(k_clusters[i]), axis=0)

        if np.linalg.norm(np.linalg.norm(new_k_centers - k_centers,
                                         axis=1)) <= 10.0**(-10):
            k_centers = new_k_centers
            k_medians_cost_function_values.append(
                k_medians_cost_function(points, k_centers, points_labels))
            break
        else:
            k_centers = new_k_centers

    return k_centers, points_labels, k_medians_cost_function_values
Ejemplo n.º 42
0
def extract_features(pubs,authors):
	m = len(conferences)
	n = len(authors)
	X = np.zeros((m,n))
	for p in pubs:
		aus = pubs[p]['authors']
		venue = pubs[p]['venue']
		for i in range(m):
			if conferences[i] in venue:
				for j in range(n):
					if authors[j] in aus:
						X[i][j] = X[i][j] + 1
	# remove duplicate
	X[kdd_i] = X[kdd_i] - X[pakdd_i] - X[pkdd_i]
	X[sdm_i] = X[sdm_i] - X[wsdm_i]
	return X

def member_print(mem):
	for m in mem:
		for i in range(len(m)):
			if m[i] == 1:
				print i+1

if __name__ == '__main__':
	pubs = load_data(TRAIN_FILE)
	top_aus = top_authors(pubs)
	X = extract_features(pubs,top_aus)
	member = km.k_means(X,4)
	#member_print(member)
	print "Purity is %f" %ca.purity(ground_truth_label,member)
	print "NMI is %f" %ca.nmi(ground_truth_label,member)
 def test_k_means(self):
     result = k_means(self.points, self.k)
     print result
Ejemplo n.º 44
0
def covered(circle: tuple, r: float) -> int:
    return sum(dist(circle, point) <= r**2 for point in points)


def add(u1: set, u2: set) -> int:
    return len(u2) - len(u2 & u1)


N = int(input())
points = [tuple(map(int, input().split())) for i in range(N)]
M = int(input())
radii = [int(input()) for i in range(M)]
circle_order = sorted(range(M), key=lambda i: radii[i])

centers, ids, groups = k_means(M, points)


def f(i):
    return -sum(dist(point, centers[i]) for point in groups[i])


center_order = sorted(range(M), key=f)

active = set(points)
circles = [None] * M
for i in range(M):
    # print(i)
    t = kdTree(list(active))
    rk, k = circle_order[i], center_order[i]
    candidates = [centers[k]] + groups[k]