def Limeng(n, b):
    a = readData()
    labels = np.zeros(shape=(351, 20))
    labels.dtype = 'int64'
    cluster_centers = np.zeros(shape=(n, 40))
    for i in range(20):
        #print(i)
        clf = KMeans(n_clusters=n, random_state=9)
        y_pred = clf.fit_predict(a[:, i, 2:4])
        labels[:, i] = clf.labels_
        cluster_centers[:, 2 * i:(2 * i + 2)] = clf.cluster_centers_
    newpaths = []
    for i in range(351):
        newpath = ""
        for j in range(20):
            string = "L" + str(labels[i, j])
            newpath += string
        newpaths.append(newpath)
    result = Counter(newpaths)
    newpathsdict = dict(result)
    #计算u
    u = mean(list(newpathsdict.values()))
    #随机生成轨迹补足数量
    while (len(newpathsdict) < 351):
        key = ""
        for i in range(20):
            string = "L" + str(random.randint(0, n - 1))
            key += string
        newpathsdict.setdefault(key, 0)
    valueslist = sorted(list(newpathsdict.values()), reverse=True)
    newpathslist = list_sort_by_value(newpathsdict)
    #生成噪声
    lapnoise = []
    for i in range(351):
        ln = np.random.laplace(u, b)
        while ln > 2 * u or ln < 0:
            ln = np.random.laplace(u, b)
        lapnoise.append(ln)
    #添加噪声
    vcarr = np.array(valueslist)
    lcarr = np.array(lapnoise)
    noisecount = vcarr + lcarr
    #保序回归
    x = np.arange(351)
    y = noisecount
    y_ = IsotonicRegression(increasing=False).fit_transform(x, y)
    # # 作图
    # plt.plot(x,a,"b.-",markersize=8)
    # plt.plot(x,y,"r.",markersize=8)
    # plt.plot(x,y_,"g.-",markersize=8)
    # plt.show()
    NMI = metrics.normalized_mutual_info_score(vcarr, y_)
    # print(NMI)
    vcarr.resize(351, 1)
    y_.resize(351, 1)
    hau_dis = hausdorff_distance(vcarr, y_, distance="euclidean")
    return NMI, hau_dis
Esempio n. 2
0
def myMechanism(n, e):
    a = readData()
    labels = np.zeros(shape=(958, 20))
    labels.dtype = 'int64'
    cluster_centers = np.zeros(shape=(n, 40))
    for i in range(20):
        # print(i)
        clf = KMeans(n_clusters=n, random_state=9)
        y_pred = clf.fit_predict(a[:, i, 2:4])
        labels[:, i] = clf.labels_
        cluster_centers[:, 2 * i:(2 * i + 2)] = clf.cluster_centers_
    newpaths = []
    for i in range(958):
        newpath = ""
        for j in range(20):
            string = "L" + str(labels[i, j])
            newpath += string
        newpaths.append(newpath)
    result = Counter(newpaths)
    newpathsdict = dict(result)

    # 随机生成轨迹补足数量
    while (len(newpathsdict) < 958):
        key = ""
        for i in range(20):
            string = "L" + str(random.randint(0, n - 1))
            key += string
        newpathsdict.setdefault(key, 0)

    # 补齐2的n次方haar变换和重构
    values = list(newpathsdict.values())
    a = int(math.log(len(values), 2))
    for index in range(int(math.pow(2, a + 1)) - len(values)):
        values.append(0)

    temp = buildHaarTreeList(values)
    # print(len(temp))
    noise = addNoise(len(temp), e)
    c = [temp[i] + noise[i] for i in range(len(temp))]
    noisecounts = rebuildHaarTreeList(c)

    i = 0
    newpathsdict2 = copy.deepcopy(newpathsdict);
    for key, value in newpathsdict.items():
        newpathsdict[key] = noisecounts[i]
        i = i + 1

    valueslist = sorted(list(newpathsdict.values()), reverse=True)
    newpathslist = list_sort_by_value(newpathsdict)

    truecounts = []
    for item in newpathslist:
        truecounts.append(newpathsdict2.get(item))

    # 根据一致性约束 trueconts 保序回归
    x = np.arange(958)
    y = np.array(truecounts)
    y_ = IsotonicRegression(increasing=False).fit_transform(x, y)

    # print(y.shape)
    NMI = metrics.normalized_mutual_info_score(y_, y)

    mapeyy = mape(y_, y)
    maeyy = metrics.mean_absolute_error(y, y_)
    # hausdorff_distance
    y.resize(1,958)
    y_.resize(1,958)
    # print(y.shape)
    hau_dis = hausdorff_distance(y, y_, distance="euclidean")


    return NMI, hau_dis,mapeyy,maeyy
def Limeng(n, b):
    a = readData()
    labels = np.zeros(shape=(10000, 20))
    labels.dtype = 'int64'
    cluster_centers = np.zeros(shape=(n, 40))
    for i in range(20):
        #print(i)
        clf = KMeans(n_clusters=n, random_state=9)
        y_pred = clf.fit_predict(a[:, i, 2:4])
        labels[:, i] = clf.labels_
        cluster_centers[:, 2 * i:(2 * i + 2)] = clf.cluster_centers_
    newpaths = []
    for i in range(10000):
        newpath = ""
        for j in range(20):
            string = "L" + str(labels[i, j])
            newpath += string
        newpaths.append(newpath)
    result = Counter(newpaths)
    newpathsdict = dict(result)

    #计算u
    u = mean(list(newpathsdict.values()))

    #随机生成轨迹补足数量
    while (len(newpathsdict) < 10000):
        key = ""
        for i in range(20):
            string = "L" + str(random.randint(0, n - 1))
            key += string
        newpathsdict.setdefault(key, 0)

    #生成噪声
    lapnoise = []
    for i in range(10000):
        ln = np.random.laplace(u, b)
        while ln > 2 * u or ln < 0:
            ln = np.random.laplace(u, b)
        lapnoise.append(ln)

    #添加噪声 newpathsdict备份至newpathsdict2,之后newpathsdict包含噪声
    i = 0
    newpathsdict2 = copy.deepcopy(newpathsdict)
    for key, value in newpathsdict.items():
        newpathsdict[key] = value + lapnoise[i]
        i = i + 1

    # dict按value排序得到两个list
    valueslist = sorted(list(newpathsdict.values()), reverse=True)
    newpathslist = list_sort_by_value(newpathsdict)

    # 按照newpathslist里顺序,取出newpathsdict2里truecount的值
    truecountlist = []
    for item in newpathslist:
        truecountlist.append(newpathsdict2.get(item))

    #保序回归
    x = np.arange(10000)
    y = np.array(truecountlist)
    y_ = IsotonicRegression(increasing=False).fit_transform(x, y)
    # # 作图
    # plt.plot(x,a,"b.-",markersize=8)
    # plt.plot(x,y,"r.",markersize=8)
    # plt.plot(x,y_,"g.-",markersize=8)
    # plt.show()
    maeyy = metrics.mean_absolute_error(y_, y)

    NMI = metrics.normalized_mutual_info_score(y, y_)
    # print(NMI)
    y.resize(1, 10000)
    y_.resize(1, 10000)
    hau_dis = hausdorff_distance(y, y_, distance="euclidean")
    return NMI, hau_dis, maeyy