def do_kr(x, y, nclusters=4, verbose=1, use_global_attr_count=1, n_init=10): kr = k_center1.KRepresentative(n_clusters=nclusters, init='random', n_init=n_init, verbose=verbose, use_global_attr_count=use_global_attr_count) kr.fit_predict(x) ari = evaluation.rand(kr.labels_, y) nmi = evaluation.nmi(kr.labels_, y) purity = evaluation.purity(kr.labels_, y) homogenity, completeness, v_measure = homogeneity_completeness_v_measure( y, kr.labels_) if verbose == 1: print("Purity = {:8.3f}".format(purity)) print("NMI = {:8.3f}".format(nmi)) print("Homogenity = {:8.3f}".format(homogenity)) print("Completeness = {:8.3f}".format(completeness)) print("V-measure = {:8.3f}".format(v_measure)) return [ round(purity, 3), round(nmi, 3), round(homogenity, 3), round(completeness, 3), round(v_measure, 3) ]
def do_kr(x, y, nclusters, verbose, use_global_attr_count, n_init): start_time = time() tracemalloc.start() categorical = [0, 3, 4, 5, 6, 8, 9, 11, 12] kr = KCMM(categorical, n_clusters = nclusters, init='random', n_init = n_init, verbose = verbose, use_global_attr_count = use_global_attr_count) kr.fit_predict(x) # print(kr.labels_) ari = evaluation.rand(kr.labels_, y) nmi = evaluation.nmi(kr.labels_, y) purity = evaluation.purity(kr.labels_, y) homogenity, completeness, v_measure = homogeneity_completeness_v_measure(y, kr.labels_) end_time = time() elapsedTime = timedelta(seconds=end_time - start_time).total_seconds() memoryUsage = tracemalloc.get_tracemalloc_memory() / 1024 / 1024 if verbose == 1: print("Purity = {:8.3f}" . format(purity)) print("NMI = {:8.3f}" . format(nmi)) print("Homogenity = {:8.3f}" . format(homogenity)) print("Completeness = {:8.3f}" . format(completeness)) print("V-measure = {:8.3f}" . format(v_measure)) print("Elapsed Time = {:8.3f} secs".format(elapsedTime)) print("Memory usage = {:8.3f} MB".format(memoryUsage)) tracemalloc.stop() return [round(purity,3),round(nmi,3),round(homogenity,3),round(completeness,3),round(v_measure,3),round(elapsedTime,3),round(memoryUsage,3)]
def do_kr(x, y, nclusters=4, verbose=1, n_init=10): kr = kmodes.KModes(n_clusters=nclusters, max_iter=1, init='Huang', n_init=n_init, verbose=verbose) kr.fit_predict(x) ari = evaluation.rand(kr.labels_, y) nmi = evaluation.nmi(kr.labels_, y) purity = evaluation.purity(kr.labels_, y) homogenity, completeness, v_measure = homogeneity_completeness_v_measure( y, kr.labels_) if verbose == 1: print("Purity = {:8.3f}".format(purity)) print("NMI = {:8.3f}".format(nmi)) print("Homogenity = {:8.3f}".format(homogenity)) print("Completeness = {:8.3f}".format(completeness)) print("V-measure = {:8.3f}".format(v_measure)) return [ round(purity, 3), round(nmi, 3), round(homogenity, 3), round(completeness, 3), round(v_measure, 3) ]
def do_kr(x, y, nclusters, verbose, n_init): start_time = time() tracemalloc.start() # Fill in missing values in numeric attributes in advances xDataFrame = pd.DataFrame(x) attrList = [0, 3, 4, 5, 6, 8, 9, 11, 12] numOfRows = x.shape[0] numOfCols = x.shape[1] for i in range(0, numOfCols): if i not in attrList: colTmp = x[:, i].copy() colTmp.sort() if "?" not in colTmp: continue missIndex = colTmp.tolist().index("?") colTmp = list(map(float, colTmp[0:missIndex])) average = round(mean(colTmp), 2) for j in range(0, numOfRows): if xDataFrame.iloc[j, i] == "?": xDataFrame.iloc[j, i] = average x = np.asarray(xDataFrame) kr = kpro.KPrototypes(n_clusters=nclusters, max_iter=1, init='random', n_init=n_init, verbose=verbose) kr.fit_predict(x, categorical=attrList) ari = evaluation.rand(kr.labels_, y) nmi = evaluation.nmi(kr.labels_, y) purity = evaluation.purity(kr.labels_, y) homogenity, completeness, v_measure = homogeneity_completeness_v_measure( y, kr.labels_) end_time = time() elapsedTime = timedelta(seconds=end_time - start_time).total_seconds() memoryUsage = tracemalloc.get_tracemalloc_memory() / 1024 / 1024 if verbose == 1: print("Purity = {:8.3f}".format(purity)) print("NMI = {:8.3f}".format(nmi)) print("Homogenity = {:8.3f}".format(homogenity)) print("Completeness = {:8.3f}".format(completeness)) print("V-measure = {:8.3f}".format(v_measure)) print("Elapsed Time = {:8.3f} secs".format(elapsedTime)) print("Memory usage = {:8.3f} MB".format(memoryUsage)) # snapshot = tracemalloc.take_snapshot() # top_stats = snapshot.statistics('lineno') # print("[ Top 10 ]") # for stat in top_stats[:10]: # print(stat) tracemalloc.stop() return [ round(purity, 3), round(nmi, 3), round(homogenity, 3), round(completeness, 3), round(v_measure, 3), round(elapsedTime, 3), round(memoryUsage, 3) ]