Beispiel #1
0
 def main(self, args):
     seed = 71
     print("For dataset1")
     dataSet = KMeans.readDataSet("dataset1.txt")
     random.Random(seed).shuffle(dataSet)
     self.noOfLabels = DataPoints.getNoOFLabels(dataSet)
     self.getEpsilonFromCurve(dataSet)
     #self.e = self.getEpsilon(dataSet)
     #set e manully according to curve
     self.e = 0.49
     print("Esp :" + str(self.e))
     self.dbscan(dataSet,1)
     
     print("\nFor dataset2")
     dataSet = KMeans.readDataSet("dataset2.txt")
     random.Random(seed).shuffle(dataSet)
     self.noOfLabels = DataPoints.getNoOFLabels(dataSet)
     self.getEpsilonFromCurve(dataSet)
     #self.e = self.getEpsilon(dataSet)
     #set e manully according to curve
     self.e = 0.6
     print("Esp :" + str(self.e))
     self.dbscan(dataSet,2)
     
     print("\nFor dataset3")
     dataSet = KMeans.readDataSet("dataset3.txt")
     random.Random(seed).shuffle(dataSet)
     self.noOfLabels = DataPoints.getNoOFLabels(dataSet)
     self.getEpsilonFromCurve(dataSet)
     #set e manully according to curve
     #self.e = self.getEpsilon(dataSet)
     self.e = 0.2
     print("Esp :" + str(self.e))
     self.dbscan(dataSet,3)
Beispiel #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('path', type=str, help="path to dataset")
    parser.add_argument('--k', type=int, default=3,
                        help="quantity of clusters (default 3)")
    parser.add_argument('--it', type=int, default=100,
                        help="max iterations (default 100)")
    parser.add_argument('--tol', type=float, default=0.001,
                        help="tolerance (default 0.001)")
    args = parser.parse_args()

    csvManager = CSVManager()
    df = csvManager.read(args.path)

    df = csvManager.replaceNan(df)

    formattedCSV = csvManager.deleteObjectColumns(df)
    matrix = csvManager.convertCSVToMatrix(formattedCSV)

    kmeans = KMeans(args.k, args.it, args.tol)

    kmeans.fit(matrix)

    for centroid in kmeans.centroids:
        plt.scatter(kmeans.centroids[centroid][0], kmeans.centroids[centroid][1],
                    marker="o", color="k", s=150, linewidths=5)

    for classification in kmeans.classifications:
        color = randomColor()
        for featureset in kmeans.classifications[classification]:
            plt.scatter(featureset[0], featureset[1],
                        marker="x", color=color, s=60, linewidths=2)

    plt.show()
Beispiel #3
0
    def main(self, args):

        print("For dataset1")
        self.dataSet = KMeans.readDataSet("dataset1.txt")
        self.K = DataPoints.getNoOFLabels(self.dataSet)
        self.W = [[0.0 for y in range(self.K)]
                  for x in range(len(self.dataSet))]
        self.w = [0.0 for x in range(self.K)]
        self.GMM()

        print("\n\n\nFor dataset2")
        self.dataSet = KMeans.readDataSet("dataset2.txt")
        self.K = DataPoints.getNoOFLabels(self.dataSet)
        self.W = [[0.0 for y in range(self.K)]
                  for x in range(len(self.dataSet))]
        self.w = [0.0 for x in range(self.K)]
        self.GMM()

        print("\n\n\nFor dataset3")
        self.dataSet = KMeans.readDataSet("dataset3.txt")
        self.K = DataPoints.getNoOFLabels(self.dataSet)
        self.W = [[0.0 for y in range(self.K)]
                  for x in range(len(self.dataSet))]
        self.w = [0.0 for x in range(self.K)]
        self.GMM()
Beispiel #4
0
    def main(self, args):
        seed = 71

        print("For dataset1")
        dataSet = KMeans.readDataSet("dataset1.txt")
        random.Random(seed).shuffle(dataSet)
        self.noOfLabels = DataPoints.getNoOFLabels(dataSet)
        self.e = self.getEpsilon(dataSet)
        print(("Esp :" + str(self.e)))
        self.dbscan(dataSet)

        print("\nFor dataset2")
        dataSet = KMeans.readDataSet("dataset2.txt")
        random.Random(seed).shuffle(dataSet)
        self.noOfLabels = DataPoints.getNoOFLabels(dataSet)
        self.e = self.getEpsilon(dataSet)
        print(("Esp :" + str(self.e)))
        self.dbscan(dataSet)

        print("\nFor dataset3")
        dataSet = KMeans.readDataSet("dataset3.txt")
        random.Random(seed).shuffle(dataSet)
        self.noOfLabels = DataPoints.getNoOFLabels(dataSet)
        self.e = self.getEpsilon(dataSet)
        print(("Esp :" + str(self.e)))
        self.dbscan(dataSet)
Beispiel #5
0
    def fit(self, data):
        kmeans = KMeans(n_clusters=self.n_clusters)
        kmeans.fit(data)

        candidate = []
        for k in kmeans.centroids:
            candidate.append(kmeans.centroids[k])
        candidate = np.array(candidate).ravel()

        self.dim = data.shape[1]
        self.pso = PSO(dim=self.dim * self.n_clusters,
                       minf=0,
                       maxf=1,
                       swarm_size=self.swarm_size,
                       n_iter=self.n_iter,
                       w=self.w,
                       lb_w=self.lb_w,
                       c1=self.c1,
                       c2=self.c2)
        self.pso.set_candidate(candidate)
        self.pso.optimize(self.__objective_function,
                          customizable=True,
                          dim=self.dim,
                          n_clusters=self.n_clusters,
                          data=data)

        self.centroids = {}
        raw_centroids = self.pso.global_optimum.pos.reshape(
            (self.n_clusters, self.dim))

        for centroid in range(len(raw_centroids)):
            self.centroids[centroid] = raw_centroids[centroid]
Beispiel #6
0
def main():
    path = sys.argv[1]

    csvManager = CSVManager()
    df = csvManager.read(path)

    df = csvManager.replaceNan(df)

    formattedCSV = csvManager.deleteObjectColumns(df)
    matrix = csvManager.convertCSVToMatrix(formattedCSV)

    try:
        for k in range(2, 5):
            kmeans = KMeans(k)
            kmeans.fit(matrix)

            for centroid in kmeans.centroids:
                plt.scatter(kmeans.centroids[centroid][0], kmeans.centroids[centroid][1],
                            marker="o", color="k", s=150, linewidths=5)

            for classification in kmeans.classifications:
                color = randomColor()
                for featureset in kmeans.classifications[classification]:
                    plt.scatter(featureset[0], featureset[1],
                                marker="x", color=color, s=60, linewidths=2)

            plt.show()

            confusionMatrix, purity = kmeans.purity()
            saveData(confusionMatrix, purity, path, k)
    except Exception:
        print("An empty cluster was found, please run the program again. This program does not handle empty clusters")
Beispiel #7
0
def main():
    path = sys.argv[1]

    csvManager = CSVManager()
    df = csvManager.read(path)

    df = csvManager.replaceNan(df)

    formattedCSV = csvManager.deleteObjectColumns(df)

    formattedCSV = csvManager.deleteObjectColumns(df)
    matrix = csvManager.convertCSVToMatrix(formattedCSV)

    try:
        with open('result/result.txt', 'w') as file:
            res = ''
            for k in range(2, 5):
                kmeans = KMeans(k)
                kmeans.fit(matrix)

                simplifiedSilhouette = SimplifiedSilhouette(
                    formattedCSV, kmeans)
                sswc = simplifiedSilhouette.calculate()
                res += 'K = ' + str(k) + '; ' + 'SSWC = ' + str(sswc) + '\n'
            file.write(res)

    except Exception:
        print("An empty cluster was found, please run the program again. This program does not handle empty clusters")
Beispiel #8
0
def test_KMeans_convert():
    """
    Test that KMeans has a working test abstract method
    """
    some = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])

    m = KMeans(some)
    assert m.convert(3)
Beispiel #9
0
def train_KMeans_train():
    """
    Test that KMeans has a working train abstract method
    """
    some = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])

    m = KMeans(some)
    assert m.train()
Beispiel #10
0
def test_KMeans_init():
    """
    Given a pandas dataframe, test the creation of a KMeans class.
    """
    some = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])

    m = KMeans(some)
    data_2 = m.getData()
    assert some.equals(data_2)
Beispiel #11
0
def test_KMeans_distance():
    """
    test that finding the sum of squared distance is correct
    """
    some = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])

    m = KMeans(some)

    x = pd.Series([1, 2])
    y = pd.Series([1, 4])

    assert np.sqrt(4 / 2) == m.distance(x, y)
Beispiel #12
0
def treeClassification(data):

    # pca = PCA(n_components=2)
    # pca_data = pca.fit_transform(data)

    km = KMeans(n_clusters=6, max_iter=200)
    km.fit(data.values, True)

    # km = KMeans(n_clusters=6)
    # clusters = km.fit_predict(data)

    cluster_report(data, km.prediction)
Beispiel #13
0
def main():
    dim = 2
    num_class = 3
    dataset_dir = '../input/wine.csv'
    train_x, train_y, raw_data = data_loader(dataset_dir)
    pca = PCA(first_k=dim, use_threshold=False, threshold=0.5)
    proj = pca.fit(train_x)
    kmeans = KMeans(K=num_class)
    center, predict_y = kmeans.fit(proj)
    result = evaluate(proj, train_y, predict_y, k=num_class)
    visualization(center, proj, predict_y, dim)
    save_to_csv(raw_data, predict_y)
    print(result)
Beispiel #14
0
def attr_analysis(data):

    km = KMeans(n_clusters=6, max_iter=200)
    km.fit(data.values, True)

    for cluster in km.clusters:
        for i in range(len(cluster.data[0])):
            col = _column(cluster.data, i)
            ax = plt.subplot(3, 6, i + 1)
            ax.set_title(data.columns[i], {'fontsize': 6})
            plt.boxplot(col)

        plt.show()
def loadFileKMeans(file,classNameIndex):
	
	k = KMeans(constants.getN())
	fileHelper = FileHelper()
	

	c1 = KMeansClass(0,"Iris-setosa")
	c1.setVCenter([4.6,3.0,4.0,0.0])
	c2 = KMeansClass(1,"Iris-versicolor")
	c2.setVCenter([6.8,3.4,4.6,0.7])

	k.addClass(c1)
	k.addClass(c2)
	try:
		f = fileHelper.openReadOnlyFile(file)
		
		lineas = f.readlines()
		uMatrix = constants.getKMeansInitializeUMAtrix(len(lineas))
		k.setUMatrix(uMatrix)
		xVector = []

		for linea in lineas:

			xVector = linea.strip("\r\n").split(",")
			del xVector[classNameIndex-1]
			xVector = [float(x) for x in xVector]

			k.addXVector(xVector)

		return k
	except:
		print("Error al leer el fichero")
Beispiel #16
0
def sse_plot(X, start=2, stop=20):
    inertia = []
    for x in range(start, stop):
        print("====ITERATION:", x)
        km = KMeans(n_clusters=x, max_iter=1000)
        km.fit(X, True)
        inertia.append(km.sum_squared_error())
    plt.figure(figsize=(12, 6))
    plt.plot(range(start, stop), inertia, marker='o')
    plt.xlabel('Number of Clusters')
    plt.ylabel('SSE')
    plt.title('Inertia plot with K')
    plt.xticks(list(range(start, stop)))
    plt.show()
 def __init__(self,data, ansdict,numCen, tabuindex, pheromap, alpha, beta, max_itter, decay):
     self.centroid = [data[random.randrange(0,len(data))].copy()for i in range(numCen)]
     self.defCentroid = self.centroid.copy()
     self.clust = KMeans(data, self.centroid, ansdict)
     self.max_itter = max_itter
     self.alpha = alpha
     self.beta = beta
     self.pheromap = pheromap
     self.tabu = {}
     self.decay = decay
     self.fitness = 0
     self.numCen = numCen
     self.data = data
     self.tabuIndex = tabuindex
    def computeGMeans(self, X):
        centroid = np.mean(X, axis=0)
        km = KMeans()
        km.split(dataSet=X)
        v = km.c_0 - km.c_1

        X_prime = scale(X.dot(v) / (v.dot(v)))
        accept_split = GMeans.checkGaussianStatistic(X_prime, self.strickLevel)

        if accept_split:
            self.computeGMeans(km.cluster_0)
            self.computeGMeans(km.cluster_1)
        else:
            self.centroids.append(centroid)
 def initialize_(self, X):
     n, p = X.shape
     # kmeans initialization
     if self.initialization_ == 'kmeans':
         kmeans_clstr = KMeans(nr_clusters=self.k_, n_init=1)
         kmeans_clstr.fit(X)
         labels = kmeans_clstr.labels_
         self.cond_prob_ = np.zeros((n, self.k_))
         for i in range(n):
             j = int(labels[i])
             self.cond_prob_[i, j] = 1
     # else randomly initialize them
     else:
         foo = np.random.rand(n, self.k_)
         self.cond_prob_ = foo / np.sum(foo, axis=1)[:, np.newaxis]
Beispiel #20
0
def main():

    #load data
    X = handle_data('data2.txt')
    km = KMeans(5)
    km.fit(X)

    #Plotting
    colors = 10 * [
        'gold', 'mediumseagreen', 'orangered', 'lightpink', 'coral',
        'mediumslateblue', 'violet', 'magenta'
    ]
    plt.figure(figsize=(10, 10))

    #plotting each feature by using corresponding color
    for classification in km.classes:
        color = colors[classification]

        #features
        for features in km.classes[classification]:
            plt.scatter(features[0], features[1], color=color, s=10)

        #plt.scatter(np.mean(features[0]), np.mean(features[1]), marker='*', c = 'k',s = 150)

    #Centroid centers
    for centroid in km.centroids:
        plt.scatter(km.centroids[centroid][0],
                    km.centroids[centroid][1],
                    c='k',
                    s=100,
                    marker="x")

    #random inital points
    for l in range(km.k):
        plt.scatter(km.randoms[l][0],
                    km.randoms[l][1],
                    marker='*',
                    c='k',
                    s=100)

    #plot attributes
    plt.legend(['* = Initial random points', 'X = Final cluster centers'])
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.title('k-Means')
    plt.show()
    print('\t\t\tIteration:', km.iterations)
    print('\n\t\t\tk value: ', km.k)
    def main():
        if len(sys.argv) < 3:
            assert Error("need input argument.")

        _, csv_path, K = sys.argv
        K = int(K)
        X = pd.read_csv(csv_path).values
        X = PCAHelper.parse_data(X)  # Steps 1-5

        # k-means and k-means++ execution
        km_clusters, km_centroids, km_distances = KMeans.execute(X, K)
        kmpp_clusters, kmpp_centroids, kmpp_distances = KMeanspp.execute(X, K)
        print("km dist={}, kmpp dist={}".format(km_distances[-1],
                                                kmpp_distances[-1]))

        # pca
        X = PCAHelper.pca_helper(X, 2)

        # plot
        Utils.plot_data2(X,
                         K,
                         km_clusters,
                         title="K-means clustering with PCA",
                         xaxis="First Principal Component",
                         yaxis="Second Principal Component")
        Utils.plot_data2(X,
                         K,
                         kmpp_clusters,
                         title="K-means++ clustering with PCA",
                         xaxis="First Principal Component",
                         yaxis="Second Principal Component")
Beispiel #22
0
 def extract(self, image_path, max_colors=6, method="kmeans", save_output=True, show_img=True):
     print "path: ", image_path
     source = cv2.imread(image_path)
     if method == "kmeans":
         km = KMeans(source, max_colors)
     else:
         km = MMCQ(source, max_colors)
     theme = km.quantize()
     for i, color in enumerate(theme):
         self.addToCanvas(color, i, max_colors)
     if show_img:
         self.canvas.show()
     if save_output:
         img_name = image_path.split("/")[-1]
         swatch_path = "../swatches/" + img_name
         self.canvas.save(swatch_path)    
Beispiel #23
0
 def __init__(self, initializer='support', cov_type='full'):
     assert initializer in [
         'support', 'uniform'
     ], 'Please select initialization scheme as support or uniform'
     assert cov_type in [
         'full', 'tied', 'diag', 'spherical'
     ], 'Please select covariance type as full, tied, diag, or spherical'
     self.kmeans_cls_ = KMeans()
     self.means_ = None
     self.cov_ = None
     self.mixture_weights_ = None
     self.membership_weights_ = None
     self.k_ = None
     self.ll_graph_ = []
     self.initializer_ = initializer
     self.cov_type_ = cov_type
Beispiel #24
0
def main(argv):
    X, K, init, movie_ids = DataProcessing.process_input(argv)

    if init == "random":
        clusters, centroids, distances = KMeans.execute(X, K)
        print(
            "Ran k-means. Start Distance={:.0f}, End Distance={:.0f}. Clusters = {}."
            .format(distances[0], distances[-1], clusters))

        Utils.write_output_csv(clusters, "output.csv", movie_ids)
    elif init == "k-means++":
        clusters, centroids, distances = KMeanspp.execute(X, K)
        print(
            "Ran k-means++. Start Distance={:.0f}, End Distance={:.0f}. Clusters = {}."
            .format(distances[0], distances[-1], clusters))
        Utils.write_output_csv(clusters, "output.csv", movie_ids)
    elif init == "1d":
        X = PCAHelper.pca_helper(X, 1)
        X.astype(np.float16)
        distances_by_k, cluster, centroids = OneDKmeans(
            X, K).run()  # KMeans.execute(X, k)
        print("Ran 1d K-means. Distance={}".format(distances_by_k[-1]))
        # plot_data_opt_k3(distances_by_k, list(range(1,K+1)))
        Utils.write_output_csv(cluster, "output.csv", movie_ids)
    else:
        assert Error("init parameter was not inputted correctly!")
Beispiel #25
0
def test_KMeans_dtype():
    """
    Test that the initialization of a KMeans class throws a type error for 
    things that are not pandas dataframes
    """
    some = "A wrong data type of type string"
    with pytest.raises(TypeError):
        KMeans(some)
Beispiel #26
0
 def execute(X, K):
     # Initialize Centroids (Cluster center)
     centroids = KMeanspp.initialize_centroids(X, K)
     clusters, centroids, k_means_dist = KMeans.run_k_means(X,
                                                            centroids,
                                                            n_iter=15)
     # Utils.plot_data(X, centroids, clusters)
     # Utils.plot_data_3d(samples, current_centroids, clusters)
     return np.array(clusters), centroids, k_means_dist
Beispiel #27
0
def main():
    random_seed = 0
    iteration = 50
    init_method = 'kmeans++'
    X, y_true = make_blobs(n_samples=300, centers=4, cluster_std=0.60, random_state=random_seed)
    plt.scatter(X[:, 0], X[:, 1], s=4, c='blue')
    kmeans = KMeans()
    #kmeans.fit_range(X, list(range(3, 7)), random_seed=random_seed, iteration=iteration, init_method=init_method)
    
    kmeans.fit(X, 4, random_seed=random_seed, iteration=iteration, init_method=init_method)
    y_pred = kmeans.predict(X)

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(X[:, 0], X[:, 1], c=y_pred, s=4, cmap='viridis')
    centers = kmeans.centroids
    ax.scatter(centers[:, 0], centers[:, 1], c='red', s=15, alpha=0.5)
    plt.show()
def runRegressionAlgorithms(dataset):
    k_values = [5, 10, 15]
    # running knn in respect to k value
    for i in k_values:
        print("Running KNN with K of {}".format(i))
        dataset.runAlgorithm(KNearestNeighbor(i))
    k = math.ceil(len(dataset.data) / 4)
    # running kmeans in respect to the dataset
    KMeans(dataset, k)
def main():
    """docstring for main"""
    kmeans = KMeans("test.txt")
    clusters = kmeans.k_means()
    plan = {}
    saved_num = 0
    ambu_id_start = 1
    plans = []
    sorted_cluster = sorted(clusters.items(), key=lambda x: x[0].id)
    for item in sorted_cluster:
        cluster_plan = RescuePlan(item[0], item[1], ambu_id_start)
        print item[0].result_print()
        ambu_id_start += item[0].ambu
        plans.append(cluster_plan)

    print ""

    for plan in plans:
        num = plan.plan()
 def clustering(self, x_data):
     similarity_matrix = self.similarity_matrix(x_data.T)
     degree_vector = []
     for weight_row in similarity_matrix:
         degree = np.sum(weight_row)
         degree_vector.append(degree)
     degree_matrix = np.diag(np.array(degree_vector))
     laplacian_matrix = degree_matrix - similarity_matrix
     eig_values, eig_vectors = np.linalg.eig(laplacian_matrix)
     idx = eig_values.argsort()
     eig_values = eig_values[idx]
     eig_vectors = eig_vectors[:, idx]
     eig_vectors_smallest = eig_vectors[:, 0:self.K]
     # print(eig_vectors_smallest)
     # cluster1 = []
     # cluster2 = []
     # for i in range(len(eig_vectors_smallest[:, 0])):
     #     if eig_vectors_smallest[i, 0] == 0:
     #         print("dsjfndfj")
     #         cluster1.append(eig_vectors_smallest[i])
     #     else:
     #         cluster2.append(eig_vectors_smallest[i])
     # cluster1 = np.array(cluster1)
     # cluster2 = np.array(cluster2)
     # X11 = cluster1[:, 0]
     # X21 = cluster1[:, 1]
     # X12 = cluster2[:, 0]
     # X22 = cluster2[:, 1]
     # plt.title("Clustering of 2-dim(axes found using Spectral Clustering) data using Spectral Clustering on dataset2")
     # plt.scatter(X11, X21, color="r")
     # plt.scatter(X12, X22, color="b")
     # plt.show()
     kmeans = KMeans(K=self.K, L=1)
     centroids, num_of_points_group, grouping, final_grouping = kmeans.clustering(
         np.real(eig_vectors_smallest.T))
     clusters = []
     for j in range(len(final_grouping[0, :])):
         cluster_points = []
         for i in range(len(final_grouping[:, j])):
             if final_grouping[i, j] == 1:
                 cluster_points.append(i)
         clusters.append(cluster_points)
     return clusters, final_grouping
Beispiel #31
0
 def test(self, k, data, random=False):
     kmeans = KMeans()
     book = kmeans.iterate(k, data, random)
     print('---Start---')
     for i in range(len(book)):
         b = book[i]
         print(b[0])
         listX = []
         listY = []
         repVecX = [b[0][0]]
         repVecY = [b[0][1]]
         for vec in b[1]:
             listX.append(vec[0])
             listY.append(vec[1])
         plt.plot(listDX, listDY, 'ro', listX, listY, 'g^', repVecX,
                  repVecY, 'bs')
         plt.axis([-5, 30, -5, 30])
         plt.show()
         plt.clf()
     print('---End---')
Beispiel #32
0
def visualization_2d(data):

    # reduce dimesions of dataset based on data variance (PCA)
    pca = PCA(n_components=2)
    pca_data = pca.fit_transform(data)

    # Do KMeans for PCA data   n_clusters(6 or 7)
    km = KMeans(n_clusters=6, max_iter=200)
    km.fit(pca_data, True)

    colors = ['red', 'green', 'blue', 'purple', 'orange', 'yellow', 'gray']
    for i in range(len(km.clusters)):
        pc1 = []
        pc2 = []
        for row in km.clusters[i].data:
            pc1.append(row[0])
            pc2.append(row[1])
        plt.scatter(pc1, pc2, c=colors[i], label='cluster ' + str(i))

    plt.show()
Beispiel #33
0
def main():
    logging.basicConfig(filename="result/log.txt",
                        filemode='w',
                        format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
                        datefmt='%H:%M:%S',
                        level=logging.DEBUG)
    logging.getLogger().setLevel(logging.INFO)

    parser = argparse.ArgumentParser()

    parser.add_argument('-n_clusters', type=int, default=5)
    parser.add_argument('-n_points', type=int, default=100)

    opt = parser.parse_args()

    tester = Tester(n_gaussian_clusters=opt.n_clusters)

    # Generate data from n 2d multivariate gaussian parameters
    data, labels = tester.generate_2d_gaussian_points(
        how_many_per_each_gaussian=opt.n_points)
    logging.info(" Generated {} data points from {} different 2 dimensional "
                 "multivariate gaussian distributions. ({} data points for "
                 "each cluster.)".format(opt.n_clusters * opt.n_points,
                                         opt.n_clusters, opt.n_points))

    # Raw Data
    utils.draw(data, labels, without_label_color=True, means=None,
               title="Data", save="result/raw.png", show=False)
    utils.draw(data, labels, without_label_color=False, means=tester.means,
               title="Gaussian", save="result/gaussian.png", show=False)

    # KMeans Prediction
    kmeans = KMeans(n_cluster=opt.n_clusters)
    prediction_lables, prediction_centers = kmeans.fit(data)
    utils.draw(data, prediction_lables, without_label_color=False,
               means=prediction_centers, title="KMeans",
               save="result/kmeans.png", show=False)

    # Concatenate results
    png_list = ["result/raw.png", "result/gaussian.png", "result/kmeans.png"]
    utils.concatenate_pngs(png_list, "result/final.png")
Beispiel #34
0
def main():

    km = KMeans(3)

    iris = pd.read_csv("iris.csv")
    data = np.array(
        iris[["Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"]].values.tolist()
    )

    km.fit(data)

    print("cluster centers: %s" % km.cluster_centers)

    for d in iris.values:
        prediction = km.predict([[
            d[2],
            d[2],
            d[3],
            d[4]
        ]])
        print(d[5]+" - "+str(prediction[0]))
def kmeans_test(vid_src):
    _, frame = vid_src.read()
    used_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    model = KMeans(used_frame, 3)

    # applying background detection
    while frame is not None:

        used_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        fg = model.apply(used_frame)

        cv2.imshow('img', used_frame)
        cv2.imshow('fg', fg)

        prev_frame = np.copy(frame)
        _, frame = vid_src.read()

        # print model.get_background_model().get_density_range(used_frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    pass
Beispiel #36
0
"""
A working demo using KMeans

"""

import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt

data = sio.loadmat('data.mat')
X = np.array(data['X'])

from KMeans import KMeans

k = 3
est = KMeans(k)

c = est.train(X)

colors=np.array(['green', 'red', 'blue'])
# lets plot on matplotlib
for i in range(k):
    x = X[np.where(c == i)[0]]
    plt.scatter(x[:, 0], x[:, 1], color=colors[i])

# plt.savefig('clustering_example.png')
plt.show()
Beispiel #37
0
from scipy import io

############# FILE STUFF ############# 
trainFileMNIST = "./mnist_data/images.mat"
    
trainMatrix = io.loadmat(trainFileMNIST)  # Dictionary

############# GET DATA ############# 
print 20 * "#", "Getting Data", 20 * "#"
imageData = np.array(trainMatrix['images'])
imageData = np.rollaxis(imageData, 2, 0)  # move the index axis to be the first 

dataShape = np.shape(imageData)
print "Image Data Shape", dataShape

imageDataFlat = []
for elem in imageData:
    imageDataFlat.append(elem.flatten())

dataShape = np.shape(imageDataFlat)
print "Image Data Flat Shape", dataShape

num_clusters = [5, 10, 20]

for cluster in num_clusters:
    print 20 * "#", "Num Clusters:", cluster, 20 * "#"
    KM = KMeans(cluster, max_iter=10)
    KM.fit(imageDataFlat)
    visualize(KM.cluster_centers_, cluster)
    
    
Beispiel #38
0
from KMeans import KMeans

kmeans_obj = KMeans()
kmeans_obj.build_kMeans('2295420')
'''
Created on 21-May-2015

@author: amilgeorge
'''
import numpy as np
from KMeans import KMeans

if __name__ == '__main__':
    mean = [0,0,0]
    cov = [[1,1,1],[0,1,0]]
    
    import matplotlib.pyplot as plt
    #x = np.random.multivariate_normal(mean,cov,5000)
    k=KMeans()
    s = np.ones((2,3))
    k.init_centroids(2, s)
    #plt.plot(x,y,'x'); plt.axis('equal'); plt.show()
    print "Theheheh"
import random
import numpy as np
import matplotlib.pyplot as plt
import scipy.spatial as sp

RAND = 4
X = [(i+random.random()*RAND, i+random.random()*RAND) for i in range(100)]
X += [(i+random.random()*RAND, i+random.random()*RAND) for i in range(100)]
X += [(i+random.random()*RAND, i+random.random()*RAND) for i in range(100)]

data, X = giveit()

plt.figure(0)
plt.plot([e[0] for e in X], [e[1] for e in X], 'ro')

k = KMeans(X, [[random.randint(0, 250), random.randint(0, 100)] for e in range(100)])
new_centers = k.compute()

print new_centers
clus = k.get_clusters()
clus = [k for k,v in k.get_clusters().items() if len(v) > 0]

new_centers = np.array(new_centers)[clus]

plt.figure(0)
plt.plot([e[0] for e in new_centers], [e[1] for e in new_centers], 'gd')

new_centers = X

k = 6