def do_kr(x, y, nclusters=4, verbose=1, use_global_attr_count=1, n_init=10): kr = k_center1.KRepresentative(n_clusters=nclusters, init='random', n_init=n_init, verbose=verbose, use_global_attr_count=use_global_attr_count) kr.fit_predict(x) ari = evaluation.rand(kr.labels_, y) nmi = evaluation.nmi(kr.labels_, y) purity = evaluation.purity(kr.labels_, y) homogenity, completeness, v_measure = homogeneity_completeness_v_measure( y, kr.labels_) if verbose == 1: print("Purity = {:8.3f}".format(purity)) print("NMI = {:8.3f}".format(nmi)) print("Homogenity = {:8.3f}".format(homogenity)) print("Completeness = {:8.3f}".format(completeness)) print("V-measure = {:8.3f}".format(v_measure)) return [ round(purity, 3), round(nmi, 3), round(homogenity, 3), round(completeness, 3), round(v_measure, 3) ]
def do_kr(x, y, nclusters=4, verbose=1, n_init=10): kr = kmodes.KModes(n_clusters=nclusters, max_iter=1, init='Huang', n_init=n_init, verbose=verbose) kr.fit_predict(x) ari = evaluation.rand(kr.labels_, y) nmi = evaluation.nmi(kr.labels_, y) purity = evaluation.purity(kr.labels_, y) homogenity, completeness, v_measure = homogeneity_completeness_v_measure( y, kr.labels_) if verbose == 1: print("Purity = {:8.3f}".format(purity)) print("NMI = {:8.3f}".format(nmi)) print("Homogenity = {:8.3f}".format(homogenity)) print("Completeness = {:8.3f}".format(completeness)) print("V-measure = {:8.3f}".format(v_measure)) return [ round(purity, 3), round(nmi, 3), round(homogenity, 3), round(completeness, 3), round(v_measure, 3) ]
def do_kr(x, y, nclusters, verbose, use_global_attr_count, n_init): start_time = time() tracemalloc.start() categorical = [0, 3, 4, 5, 6, 8, 9, 11, 12] kr = KCMM(categorical, n_clusters = nclusters, init='random', n_init = n_init, verbose = verbose, use_global_attr_count = use_global_attr_count) kr.fit_predict(x) # print(kr.labels_) ari = evaluation.rand(kr.labels_, y) nmi = evaluation.nmi(kr.labels_, y) purity = evaluation.purity(kr.labels_, y) homogenity, completeness, v_measure = homogeneity_completeness_v_measure(y, kr.labels_) end_time = time() elapsedTime = timedelta(seconds=end_time - start_time).total_seconds() memoryUsage = tracemalloc.get_tracemalloc_memory() / 1024 / 1024 if verbose == 1: print("Purity = {:8.3f}" . format(purity)) print("NMI = {:8.3f}" . format(nmi)) print("Homogenity = {:8.3f}" . format(homogenity)) print("Completeness = {:8.3f}" . format(completeness)) print("V-measure = {:8.3f}" . format(v_measure)) print("Elapsed Time = {:8.3f} secs".format(elapsedTime)) print("Memory usage = {:8.3f} MB".format(memoryUsage)) tracemalloc.stop() return [round(purity,3),round(nmi,3),round(homogenity,3),round(completeness,3),round(v_measure,3),round(elapsedTime,3),round(memoryUsage,3)]
def do_kr(x, y, nclusters, verbose, n_init): start_time = time() tracemalloc.start() # Fill in missing values in numeric attributes in advances xDataFrame = pd.DataFrame(x) attrList = [0, 3, 4, 5, 6, 8, 9, 11, 12] numOfRows = x.shape[0] numOfCols = x.shape[1] for i in range(0, numOfCols): if i not in attrList: colTmp = x[:, i].copy() colTmp.sort() if "?" not in colTmp: continue missIndex = colTmp.tolist().index("?") colTmp = list(map(float, colTmp[0:missIndex])) average = round(mean(colTmp), 2) for j in range(0, numOfRows): if xDataFrame.iloc[j, i] == "?": xDataFrame.iloc[j, i] = average x = np.asarray(xDataFrame) kr = kpro.KPrototypes(n_clusters=nclusters, max_iter=1, init='random', n_init=n_init, verbose=verbose) kr.fit_predict(x, categorical=attrList) ari = evaluation.rand(kr.labels_, y) nmi = evaluation.nmi(kr.labels_, y) purity = evaluation.purity(kr.labels_, y) homogenity, completeness, v_measure = homogeneity_completeness_v_measure( y, kr.labels_) end_time = time() elapsedTime = timedelta(seconds=end_time - start_time).total_seconds() memoryUsage = tracemalloc.get_tracemalloc_memory() / 1024 / 1024 if verbose == 1: print("Purity = {:8.3f}".format(purity)) print("NMI = {:8.3f}".format(nmi)) print("Homogenity = {:8.3f}".format(homogenity)) print("Completeness = {:8.3f}".format(completeness)) print("V-measure = {:8.3f}".format(v_measure)) print("Elapsed Time = {:8.3f} secs".format(elapsedTime)) print("Memory usage = {:8.3f} MB".format(memoryUsage)) # snapshot = tracemalloc.take_snapshot() # top_stats = snapshot.statistics('lineno') # print("[ Top 10 ]") # for stat in top_stats[:10]: # print(stat) tracemalloc.stop() return [ round(purity, 3), round(nmi, 3), round(homogenity, 3), round(completeness, 3), round(v_measure, 3), round(elapsedTime, 3), round(memoryUsage, 3) ]
for d in dataset: for i in xrange(0, cls_idx): d.tuple[i] = float(d.tuple[i] - mins[i].tuple[i]) / (maxs[i].tuple[i] - mins[i].tuple[i]) f_norm.write(str(d.tuple)) f_norm.write('\n') k = int(sys.argv[1]) eps = float(sys.argv[2]) cluster = dbscan.dbscan(dataset, eps, k) if len(cluster) == 0: print 'k:', k, 'no. of cluster:', len(cluster) print pure = evaluation.purity(cluster, len(dataset)) NMI = evaluation.NMI(cluster, dataset) RI = evaluation.RI(cluster, dataset) cp = [len(c) for c in cluster] f_out = open("output.txt", 'w') for i in xrange(0, len(cluster)): print 'cluster:', i, 'no. of pt. in cluster:', cp[i] for c in cluster[i]: s = str(i) + " " + str(c.cls) f_out.write(s) f_out.write('\n') f_out.close()
min_index=0 min_value=1e100 data=self.data[i,:] for j in range(0,len(self.center)): square=self.distance(self.center[j],data) if(square<min_value): min_index=j min_value=square predict_index[i]=min_index return predict_index if __name__ == '__main__': clusters=20 data,label_family,label_genus,label_species,label_record=data_reader.read_frog_data() data=PCA.pca(data,10) print(data.shape) label=np.argmin(-label_family,axis=1) predictor=k_means(data,label,clusters) predictor.init_center() iteration=100 for i in range(0,iteration): predictor.update() predict=predictor.predict_data() acc=evaluation.multi_label_accuracy(predict,label,clusters) p=evaluation.purity(predict,label,clusters) F_score=evaluation.F_score(predict,label,clusters) F_score_output=evaluation.format_F_score(F_score) print("the acc in it %d is %.4f %.4f"%(i,acc,p)) print("the Fscore is "+F_score_output)