예제 #1
0
def do_kr(x, y, nclusters=4, verbose=1, use_global_attr_count=1, n_init=10):
    kr = k_center1.KRepresentative(n_clusters=nclusters,
                                   init='random',
                                   n_init=n_init,
                                   verbose=verbose,
                                   use_global_attr_count=use_global_attr_count)
    kr.fit_predict(x)

    ari = evaluation.rand(kr.labels_, y)
    nmi = evaluation.nmi(kr.labels_, y)
    purity = evaluation.purity(kr.labels_, y)
    homogenity, completeness, v_measure = homogeneity_completeness_v_measure(
        y, kr.labels_)
    if verbose == 1:
        print("Purity = {:8.3f}".format(purity))
        print("NMI = {:8.3f}".format(nmi))
        print("Homogenity = {:8.3f}".format(homogenity))
        print("Completeness = {:8.3f}".format(completeness))
        print("V-measure = {:8.3f}".format(v_measure))

    return [
        round(purity, 3),
        round(nmi, 3),
        round(homogenity, 3),
        round(completeness, 3),
        round(v_measure, 3)
    ]
예제 #2
0
파일: k_modes.py 프로젝트: ClarkDinh/k-CMM
def do_kr(x, y, nclusters=4, verbose=1, n_init=10):
    kr = kmodes.KModes(n_clusters=nclusters,
                       max_iter=1,
                       init='Huang',
                       n_init=n_init,
                       verbose=verbose)
    kr.fit_predict(x)

    ari = evaluation.rand(kr.labels_, y)
    nmi = evaluation.nmi(kr.labels_, y)
    purity = evaluation.purity(kr.labels_, y)
    homogenity, completeness, v_measure = homogeneity_completeness_v_measure(
        y, kr.labels_)
    if verbose == 1:
        print("Purity = {:8.3f}".format(purity))
        print("NMI = {:8.3f}".format(nmi))
        print("Homogenity = {:8.3f}".format(homogenity))
        print("Completeness = {:8.3f}".format(completeness))
        print("V-measure = {:8.3f}".format(v_measure))

    return [
        round(purity, 3),
        round(nmi, 3),
        round(homogenity, 3),
        round(completeness, 3),
        round(v_measure, 3)
    ]
예제 #3
0
def do_kr(x, y, nclusters, verbose, use_global_attr_count, n_init):
    start_time = time()
    tracemalloc.start()
    categorical = [0, 3, 4, 5, 6, 8, 9, 11, 12]
    kr = KCMM(categorical, n_clusters = nclusters, init='random',
        n_init = n_init, verbose = verbose, use_global_attr_count = use_global_attr_count)
    kr.fit_predict(x)
    # print(kr.labels_)

    ari = evaluation.rand(kr.labels_, y)
    nmi = evaluation.nmi(kr.labels_, y)
    purity = evaluation.purity(kr.labels_, y)
    homogenity, completeness, v_measure = homogeneity_completeness_v_measure(y, kr.labels_)
    end_time = time()
    elapsedTime = timedelta(seconds=end_time - start_time).total_seconds()
    memoryUsage = tracemalloc.get_tracemalloc_memory() / 1024 / 1024
    if verbose == 1:
        print("Purity = {:8.3f}" . format(purity))
        print("NMI = {:8.3f}" . format(nmi))
        print("Homogenity = {:8.3f}" . format(homogenity))
        print("Completeness = {:8.3f}" . format(completeness))
        print("V-measure = {:8.3f}" . format(v_measure))
        print("Elapsed Time = {:8.3f} secs".format(elapsedTime))
        print("Memory usage = {:8.3f} MB".format(memoryUsage))
    tracemalloc.stop()
    return [round(purity,3),round(nmi,3),round(homogenity,3),round(completeness,3),round(v_measure,3),round(elapsedTime,3),round(memoryUsage,3)]
예제 #4
0
def do_kr(x, y, nclusters, verbose, n_init):
    start_time = time()
    tracemalloc.start()
    # Fill in missing values in numeric attributes in advances
    xDataFrame = pd.DataFrame(x)
    attrList = [0, 3, 4, 5, 6, 8, 9, 11, 12]
    numOfRows = x.shape[0]
    numOfCols = x.shape[1]
    for i in range(0, numOfCols):
        if i not in attrList:
            colTmp = x[:, i].copy()
            colTmp.sort()
            if "?" not in colTmp:
                continue
            missIndex = colTmp.tolist().index("?")
            colTmp = list(map(float, colTmp[0:missIndex]))
            average = round(mean(colTmp), 2)
            for j in range(0, numOfRows):
                if xDataFrame.iloc[j, i] == "?":
                    xDataFrame.iloc[j, i] = average
    x = np.asarray(xDataFrame)
    kr = kpro.KPrototypes(n_clusters=nclusters,
                          max_iter=1,
                          init='random',
                          n_init=n_init,
                          verbose=verbose)
    kr.fit_predict(x, categorical=attrList)

    ari = evaluation.rand(kr.labels_, y)
    nmi = evaluation.nmi(kr.labels_, y)
    purity = evaluation.purity(kr.labels_, y)
    homogenity, completeness, v_measure = homogeneity_completeness_v_measure(
        y, kr.labels_)
    end_time = time()
    elapsedTime = timedelta(seconds=end_time - start_time).total_seconds()
    memoryUsage = tracemalloc.get_tracemalloc_memory() / 1024 / 1024
    if verbose == 1:
        print("Purity = {:8.3f}".format(purity))
        print("NMI = {:8.3f}".format(nmi))
        print("Homogenity = {:8.3f}".format(homogenity))
        print("Completeness = {:8.3f}".format(completeness))
        print("V-measure = {:8.3f}".format(v_measure))
        print("Elapsed Time = {:8.3f} secs".format(elapsedTime))
        print("Memory usage = {:8.3f} MB".format(memoryUsage))

    # snapshot = tracemalloc.take_snapshot()
    # top_stats = snapshot.statistics('lineno')
    # print("[ Top 10 ]")
    # for stat in top_stats[:10]:
    #     print(stat)
    tracemalloc.stop()
    return [
        round(purity, 3),
        round(nmi, 3),
        round(homogenity, 3),
        round(completeness, 3),
        round(v_measure, 3),
        round(elapsedTime, 3),
        round(memoryUsage, 3)
    ]
예제 #5
0
for d in dataset:
    for i in xrange(0, cls_idx):
        d.tuple[i] = float(d.tuple[i] - mins[i].tuple[i]) / (maxs[i].tuple[i] -
                                                             mins[i].tuple[i])
    f_norm.write(str(d.tuple))
    f_norm.write('\n')

k = int(sys.argv[1])
eps = float(sys.argv[2])

cluster = dbscan.dbscan(dataset, eps, k)
if len(cluster) == 0:
    print 'k:', k, 'no. of cluster:', len(cluster)
    print

pure = evaluation.purity(cluster, len(dataset))
NMI = evaluation.NMI(cluster, dataset)
RI = evaluation.RI(cluster, dataset)

cp = [len(c) for c in cluster]

f_out = open("output.txt", 'w')

for i in xrange(0, len(cluster)):
    print 'cluster:', i, 'no. of pt. in cluster:', cp[i]
    for c in cluster[i]:
        s = str(i) + " " + str(c.cls)
        f_out.write(s)
        f_out.write('\n')

f_out.close()
예제 #6
0
            min_index=0
            min_value=1e100
            data=self.data[i,:]
            for j in range(0,len(self.center)):
                square=self.distance(self.center[j],data)
                if(square<min_value):
                    min_index=j
                    min_value=square
            predict_index[i]=min_index

        return predict_index

if __name__ == '__main__':
    clusters=20
    data,label_family,label_genus,label_species,label_record=data_reader.read_frog_data()
    data=PCA.pca(data,10)
    print(data.shape)
    label=np.argmin(-label_family,axis=1)
    predictor=k_means(data,label,clusters)
    predictor.init_center()
    iteration=100
    for i in range(0,iteration):
        predictor.update()
        predict=predictor.predict_data()
        acc=evaluation.multi_label_accuracy(predict,label,clusters)
        p=evaluation.purity(predict,label,clusters)
        F_score=evaluation.F_score(predict,label,clusters)
        F_score_output=evaluation.format_F_score(F_score)
        print("the acc in it %d is %.4f %.4f"%(i,acc,p))
        print("the Fscore is "+F_score_output)