def main(): N = 200 K = 10 B = 5 # generate fake data data_mat = np.concatenate((sgt.genAnnulusCluster( (0.5, 0.5), 0.4, 0.3, N), sgt.genCluster((0.5, 0.5), 0.1, 100)), axis=0) # get distance matrix: dist_mat = snt.getDistanceMatrix(data_mat) # cluster it clustering = AgglomerativeClustering(n_clusters=2, linkage='single', affinity='precomputed').fit(dist_mat) #elb_obj = sst.ElbowTest(K).fit(dist_mat, linkage='single') jump_obj = sst.JumpTest(K, B).fit(dist_mat, linkageMethod='single') # plot it: #spt.plotData(data_mat, labels=clustering.labels_, fname='../media/ring.singleLink1.png') #spt.plotElbowTest(data_mat, elb_obj) spt.plotJumpTest(data_mat, jump_obj, fname='../media/jumpRing.png')
def main(): # set program constants: K = 10 # max number of clusters B = 100 # number of null realisations created by gap test N = 100 # number of points in the input data cloud # generate clusters: n1, n2, n3 = 20, 20, 10 n4 = N - n1 - n2 - n3 cluster_vec = [ sgt.genCluster((0.3, 0.7), 0.1, n1), sgt.genCluster((0.1, 0.3), 0.1, n2), sgt.genCluster((0.7, 0.5), 0.1, n3), sgt.genCluster((0.5, 0.1), 0.1, n4) ] # create data set: data_mat = np.concatenate( (cluster_vec[0], cluster_vec[1], cluster_vec[2], cluster_vec[3]), axis=0) data_mat = sgt.genPoisson(N) # uniform point cloud # convert data to network: dist_mat = snt.getDistanceMatrix(data_mat) den_obj = sst.DendroTest(K).fit(dist_mat) # plot the top p levels of the dendrogram spt.plotDendroTest(data_mat, den_obj, fname='../media/dendro3.png')
def main(): # set program constants: K = 10 # max number of clusters B = 100 # number of null realisations created by gap test N = 100 # number of points in the input data cloud # generate clusters: n1, n2 = 30, 20 n3 = N - n1 - n2 cluster_vec = [ sgt.genCluster((0.3, 0.7), 0.1, n1), sgt.genCluster((0.1, 0.3), 0.1, n2), sgt.genCluster((0.7, 0.5), 0.1, n3) ] # create data set: data_mat = np.concatenate((cluster_vec[0], cluster_vec[1], cluster_vec[2]), axis=0) #data_mat = sgt.genPoisson(N) # uniform point cloud # convert data to network: dist_mat = snt.getDistanceMatrix(data_mat) # perform gap test gap_obj = sst.GapTest(K, B).fit(dist_mat) spt.plotGapTest(data_mat, gap_obj, fname='../media/gap.png')
def main(): N = 200 # number of points in each cluster c = (0.5, 0.5) # center of cluster # generate the data: #data = mgt.genCigar(0.3,0.05,c,N, rot_angle=0.3*np.pi) #data = mgt.genDisk(0.3, c, N) #data = mgt.genAnnulus(0.1, 0.3, c, N) #data = mgt.genRect(0.2,0.5,c,N) #data = mgt.genCross(0.8,0.6,0.1,c,N,rot_angle=7.) data = mgt.genS(0.6, 0.1, c, N) #data = mgt.genEight(0.6, 0.1,c,N) #data = mgt.genGauss(c, 0.1, 0.02, N, rot_angle=10.) # pairwise separation matrix: dist = snt.getDistanceMatrix(data) # compute filter values fil_vec = mst.filterDensity(dist, 0.1) #fil_vec = mst.filterEccentric(dist, 3) #fil_vec = mst.filterLinfinity(dist) # print stats: st_obj = mst.StatsTable(dist) st_obj.printStatsTable() # plot the data: #mpt.plotPoints(data, labels=fil_vec, fname='../media/cigar.ecc.png') #mpt.plotShapeHist(data,dist,fname='../media/cigar.shapeHist.png') mpt.plotPointsFilterHist(data, fil_vec, fname='../media/s.filterHist.den.png')
def main(): # generate clusters: data_mat = np.concatenate((sgt.genCluster( (0.2, 0.5), 0.15, 6), sgt.genCluster( (0.6, 0.7), 0.15, 5), sgt.genCluster((0.7, 0.2), 0.15, 3)), axis=0) # convert data to network: dist_mat = snt.getDistanceMatrix(data_mat) #GX = snt.graphFromDistMatrix(DX) # perform clustering on the network: #clustering = AgglomerativeClustering(n_clusters=3).fit(X) # with raw data clustering = AgglomerativeClustering(n_clusters=3, linkage='average', affinity='precomputed').fit( dist_mat) # with distance matrix # plot it: #spt.plotData(X, annotate=True) #spt.plotData(X, labels=clustering.labels_) #spt.plotData(X, labels=clustering.labels_, fName='../media/clusters.pdf') #spt.plotGraph(GX) #spt.plotData(X, fName="../media/pointCloud.png") #spt.plotClustersGraph(clustering.labels_, fname='../media/clusters.graph.png') #spt.plotPointsGraph(X) spt.plotClustersAndGraph(data_mat, clustering.labels_, ffolder='../media/', flabel=sys.argv[1])
def main(): N = 200 # number of points in each cluster M = 3 # number of sets in the cover K = 5 # max number of clusters in each cover member set B = 100 # number of null realisations created each gap test c = (0.5, 0.5) # center of cluster # generate the data: data = mgt.genCigar(0.3, 0.05, c, N, rot_angle=0.3 * np.pi) #data = mgt.genDisk(0.3, c, N) #data = mgt.genAnnulus(0.1, 0.3, c, N) #data = mgt.genRect(0.2,0.5,c,N) #data = mgt.genCross(0.8,0.6,0.1,c,N,rot_angle=0.) #data = mgt.genS(0.6, 0.1,c,N) #data = mgt.genEight(0.6, 0.1,c,N) #data = mgt.genGauss(c, 0.1, 0.02, N, rot_angle=10.) # pairwise separation matrix: dist = snt.getDistanceMatrix(data) # print stats: st_obj = mst.StatsTable(dist) st_obj.printStatsTable() # compute filter values #fil_vec = mst.filterDensity(dist, 0.1) #fil_vec = mst.filterEccentric(dist, 3) fil_vec = mst.filterLinfinity(dist) eps = 0.4 # initialise mapper object: mp = mst.Mapper(min(fil_vec), max(fil_vec), M, eps, K, B) mp.fit(data, dist, fil_vec) # plot the data: #mpt.plotPoints(data, labels=fil_vec, fname='../media/cigar.ecc.png') #mpt.plotShapeHist(data,dist,fname='../media/cigar.shapeHist.png') mpt.plotPointsFilterHist(data, fil_vec, fname='../media/cigar.filterHist.lin.png') mpt.plotGraph(mp.clusterGraph, '../media/cigar.mapperGraph.png', colorAtt='avfvalue')
def fit(self, dist_mat, linkageMethod='average'): n_points = len(dist_mat[:, 0]) print(n_points) # compute data dendrogram: model = AgglomerativeClustering(linkage=linkageMethod, affinity='precomputed', distance_threshold=0, n_clusters=None).fit(dist_mat) self.linkage_mat = get_linkage_matrix(model) p = self.maxclusters + 1 distances = self.linkage_mat[:, 2][:-p:-1] self.w_log_vec = np.log10(distances) # generate realisations: wnull_log_mat = np.empty([self.maxclusters, self.nreals]) for b in range(self.nreals): distnull_mat = snt.getDistanceMatrix(sgt.genPoisson(n_points)) model = AgglomerativeClustering(linkage=linkageMethod, affinity='precomputed', distance_threshold=0, n_clusters=None).fit(distnull_mat) self.null_linkage_mat = get_linkage_matrix(model) distances = self.null_linkage_mat[:, 2][:-p:-1] wnull_log_mat[:, b] = np.log10(distances) self.wnull_log_average_vec = wnull_log_mat.mean(1) self.wnull_log_err_vec = wnull_log_mat.var(1) \ *(1 + 1/self.nreals)**0.5 self.gap = self.wnull_log_average_vec - self.w_log_vec # estimate optimal k: for i in range(self.maxclusters - 1): if self.gap[i] >= self.gap[i + 1] - self.wnull_log_err_vec[i + 1]: print(f"optimal k: {i+1}") break return self
def main(): # generate the data: data = np.concatenate((sgt.genCluster( (0.2, 0.2), 0.1, 20), sgt.genCluster( (0.6, 0.7), 0.1, 20), sgt.genCluster((0.8, 0.2), 0.1, 20)), axis=0) #data = sgt.genPoisson(60) #spt.plotData(data) # compute distance matrix: dist = snt.getDistanceMatrix(data) # cluster the data: clustering = AgglomerativeClustering(linkage='average', affinity='precomputed', n_clusters=3).fit(dist) # for each cluster, print cluster and shape histogram: #spt.plotData(data, labels=clustering.labels_) mpt.plotShapeHist(data, dist, labels=clustering.labels_)
def main(): K = 10 # max number of clusers # fix fake data parameters: n1, n2, n3 = 30, 20, 15 # number of points in each cluster R1, R2, R3 = 0.3, 0.1, 0.1 # max radius of each cluster # generate clusters: X1 = sgt.genCluster((0.3, 0.7), R1, n1) X2 = sgt.genCluster((0.1, 0.3), R2, n2) X3 = sgt.genCluster((0.7, 0.5), R3, n3) # create data set: n = n1 + n2 + n3 # number of points in fake data set X = np.concatenate((X1, X2, X3), axis=0) #X = sgt.genPoisson(n) # uniform point cloud # convert data to network: dist_mat = snt.getDistanceMatrix(X) # perform elbow test: elbow_obj = sst.ElbowTest(K).fit(dist_mat) spt.plotElbowTest(X, elbow_obj, fname='../media/elbow.png')
def main(): N = 200 # number of points in each cluster c = (0.5, 0.5) # center of cluster # generate the data: #data = mgt.genCigar(0.3,0.05,c,N, rot_angle=0.3*np.pi) data = mgt.genDisk(0.3, c, N) #data = mgt.genAnnulus(0.1, 0.3, c, N) #data = mgt.genRect(0.2,0.5,c,N) #data = mgt.genCross(0.8,0.6,0.1,c,N,rot_angle=7.) #data = mgt.genS(0.6, 0.1,c,N) #data = mgt.genEight(0.6, 0.1,c,N) # pairwise separation matrix: dist = snt.getDistanceMatrix(data) # print stats: st_obj = mst.StatsTable(dist) st_obj.printStatsTable() # plot the data: #mpt.plotPoints(data, fname='../media/cigar.png') mpt.plotShapeHist(data, dist, fname='../media/disk.hist.png')