sys.path.insert(0, LIB) import matplotlib.pyplot as plt from Cluster import KMeans as kmeans from sklearn.cluster import KMeans import numpy as np import pandas as pd trainData = np.array( pd.read_table(os.path.join(DAT, 'train.txt'), header=None, encoding='gb2312', delim_whitespace=True)) trainData = np.array(trainData) time_start1 = time.time() clf1 = kmeans(k=4, cluster_type="KMeans") pred1 = clf1.train(trainData) time_end1 = time.time() print("Runtime of KMeans:", time_end1 - time_start1) time_start2 = time.time() clf2 = kmeans(k=4, cluster_type="biKMeans") pred = clf2.train(trainData) time_end2 = time.time() print("Runtime of biKMeans:", time_end2 - time_start2) time_start3 = time.time() clf3 = kmeans(k=4, cluster_type="KMeans++") pred3 = clf3.train(trainData) time_end3 = time.time() print("Runtime of KMeans++:", time_end3 - time_start3)
# LIB is the parent directory of the directory where program resides. LIB = os.path.join(os.path.dirname(__file__), '..') DAT = os.path.join(os.path.dirname(__file__), '..', 'dataset', 'dataset2') sys.path.insert(0, LIB) from Cluster import KMeans as kmeans from Cluster import DBSCAN as dbscan from sklearn.cluster import DBSCAN import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn import datasets X1, y1 = datasets.make_circles(n_samples=5000, factor=.6, noise=.05) trainData = X1[0:1000] time_start1 = time.time() clf1 = kmeans(k=4, cluster_type="KMeans") pred1 = clf1.train(trainData) time_end1 = time.time() print("Runtime of KMeans:", time_end1 - time_start1) time_start2 = time.time() clf2 = dbscan() pred = clf2.train(trainData) time_end2 = time.time() print("Runtime of DBSCAN:", time_end2 - time_start2) time_start3 = time.time() clf3 = DBSCAN(eps=0.1, min_samples=10) clf3.fit(trainData) pred3 = clf3.labels_ time_end3 = time.time()