예제 #1
0
def do_kr(x, y, nclusters=4, verbose=1, use_global_attr_count=1, n_init=10):
    kr = k_center1.KRepresentative(n_clusters=nclusters,
                                   init='random',
                                   n_init=n_init,
                                   verbose=verbose,
                                   use_global_attr_count=use_global_attr_count)
    kr.fit_predict(x)

    ari = evaluation.rand(kr.labels_, y)
    nmi = evaluation.nmi(kr.labels_, y)
    purity = evaluation.purity(kr.labels_, y)
    homogenity, completeness, v_measure = homogeneity_completeness_v_measure(
        y, kr.labels_)
    if verbose == 1:
        print("Purity = {:8.3f}".format(purity))
        print("NMI = {:8.3f}".format(nmi))
        print("Homogenity = {:8.3f}".format(homogenity))
        print("Completeness = {:8.3f}".format(completeness))
        print("V-measure = {:8.3f}".format(v_measure))

    return [
        round(purity, 3),
        round(nmi, 3),
        round(homogenity, 3),
        round(completeness, 3),
        round(v_measure, 3)
    ]
예제 #2
0
def do_kr(x, y, nclusters, verbose, use_global_attr_count, n_init):
    start_time = time()
    tracemalloc.start()
    categorical = [0, 3, 4, 5, 6, 8, 9, 11, 12]
    kr = KCMM(categorical, n_clusters = nclusters, init='random',
        n_init = n_init, verbose = verbose, use_global_attr_count = use_global_attr_count)
    kr.fit_predict(x)
    # print(kr.labels_)

    ari = evaluation.rand(kr.labels_, y)
    nmi = evaluation.nmi(kr.labels_, y)
    purity = evaluation.purity(kr.labels_, y)
    homogenity, completeness, v_measure = homogeneity_completeness_v_measure(y, kr.labels_)
    end_time = time()
    elapsedTime = timedelta(seconds=end_time - start_time).total_seconds()
    memoryUsage = tracemalloc.get_tracemalloc_memory() / 1024 / 1024
    if verbose == 1:
        print("Purity = {:8.3f}" . format(purity))
        print("NMI = {:8.3f}" . format(nmi))
        print("Homogenity = {:8.3f}" . format(homogenity))
        print("Completeness = {:8.3f}" . format(completeness))
        print("V-measure = {:8.3f}" . format(v_measure))
        print("Elapsed Time = {:8.3f} secs".format(elapsedTime))
        print("Memory usage = {:8.3f} MB".format(memoryUsage))
    tracemalloc.stop()
    return [round(purity,3),round(nmi,3),round(homogenity,3),round(completeness,3),round(v_measure,3),round(elapsedTime,3),round(memoryUsage,3)]
예제 #3
0
파일: k_modes.py 프로젝트: ClarkDinh/k-CMM
def do_kr(x, y, nclusters=4, verbose=1, n_init=10):
    kr = kmodes.KModes(n_clusters=nclusters,
                       max_iter=1,
                       init='Huang',
                       n_init=n_init,
                       verbose=verbose)
    kr.fit_predict(x)

    ari = evaluation.rand(kr.labels_, y)
    nmi = evaluation.nmi(kr.labels_, y)
    purity = evaluation.purity(kr.labels_, y)
    homogenity, completeness, v_measure = homogeneity_completeness_v_measure(
        y, kr.labels_)
    if verbose == 1:
        print("Purity = {:8.3f}".format(purity))
        print("NMI = {:8.3f}".format(nmi))
        print("Homogenity = {:8.3f}".format(homogenity))
        print("Completeness = {:8.3f}".format(completeness))
        print("V-measure = {:8.3f}".format(v_measure))

    return [
        round(purity, 3),
        round(nmi, 3),
        round(homogenity, 3),
        round(completeness, 3),
        round(v_measure, 3)
    ]
예제 #4
0
def do_kr(x, y, nclusters, verbose, n_init):
    start_time = time()
    tracemalloc.start()
    # Fill in missing values in numeric attributes in advances
    xDataFrame = pd.DataFrame(x)
    attrList = [0, 3, 4, 5, 6, 8, 9, 11, 12]
    numOfRows = x.shape[0]
    numOfCols = x.shape[1]
    for i in range(0, numOfCols):
        if i not in attrList:
            colTmp = x[:, i].copy()
            colTmp.sort()
            if "?" not in colTmp:
                continue
            missIndex = colTmp.tolist().index("?")
            colTmp = list(map(float, colTmp[0:missIndex]))
            average = round(mean(colTmp), 2)
            for j in range(0, numOfRows):
                if xDataFrame.iloc[j, i] == "?":
                    xDataFrame.iloc[j, i] = average
    x = np.asarray(xDataFrame)
    kr = kpro.KPrototypes(n_clusters=nclusters,
                          max_iter=1,
                          init='random',
                          n_init=n_init,
                          verbose=verbose)
    kr.fit_predict(x, categorical=attrList)

    ari = evaluation.rand(kr.labels_, y)
    nmi = evaluation.nmi(kr.labels_, y)
    purity = evaluation.purity(kr.labels_, y)
    homogenity, completeness, v_measure = homogeneity_completeness_v_measure(
        y, kr.labels_)
    end_time = time()
    elapsedTime = timedelta(seconds=end_time - start_time).total_seconds()
    memoryUsage = tracemalloc.get_tracemalloc_memory() / 1024 / 1024
    if verbose == 1:
        print("Purity = {:8.3f}".format(purity))
        print("NMI = {:8.3f}".format(nmi))
        print("Homogenity = {:8.3f}".format(homogenity))
        print("Completeness = {:8.3f}".format(completeness))
        print("V-measure = {:8.3f}".format(v_measure))
        print("Elapsed Time = {:8.3f} secs".format(elapsedTime))
        print("Memory usage = {:8.3f} MB".format(memoryUsage))

    # snapshot = tracemalloc.take_snapshot()
    # top_stats = snapshot.statistics('lineno')
    # print("[ Top 10 ]")
    # for stat in top_stats[:10]:
    #     print(stat)
    tracemalloc.stop()
    return [
        round(purity, 3),
        round(nmi, 3),
        round(homogenity, 3),
        round(completeness, 3),
        round(v_measure, 3),
        round(elapsedTime, 3),
        round(memoryUsage, 3)
    ]