예제 #1
0
dicts_sets = {}
with open(inputName) as fr:
    for line in fr.readlines():
        lines = line.strip('\n').split(",")
        tuples = (lines[0], lines[1], lines[2], lines[3])
        if lines[2] not in usertupledict:
            usertupledict[lines[2]] = []
        usertupledict[lines[2]].append(tuples)

for i in usertupledict.keys():
    dicts_set[i] = clean(usertupledict[i])

for i in dicts_set.keys():
    locationSet = set()
    for j in dicts_set[i]:
        locationSet.add(mygeohash.encode(float(j[1]), float(j[0]), 2))
    dicts_sets[i] = locationSet


def noisyCount(sensitivety, epsilon):
    beta = sensitivety / epsilon
    u1 = np.random.random()
    u2 = np.random.random()
    if u1 <= 0.5:
        n_value = -beta * np.log(1. - u2)
    else:
        n_value = beta * np.log(u2)
    return n_value


def noisyCounts(sensitivety, epsilon):
def getL1(k,e):
    N = 20
    # inputName = "G://data//map1_14_40.txt"
    inputName = "G://data//map1_14_"
    inputName=inputName+str(k)+".txt"
    usertupledict = {}
    sensitivety = int(2*k)
    epsilon = e

    # max = 0
    # j = ""
    dicts_set = {}
    dicts_sets = {}
    with open(inputName) as fr:
        for line in fr.readlines():
            lines = line.strip('\n').split(",")
            tuples = (lines[0], lines[1], lines[2], lines[3])
            if lines[2] not in usertupledict:
                usertupledict[lines[2]] = []
            usertupledict[lines[2]].append(tuples)

    for i in usertupledict.keys():
        dicts_set[i] = clean(usertupledict[i])

    for i in dicts_set.keys():
        locationSet = set()
        for j in dicts_set[i]:
            locationSet.add(mygeohash.encode(float(j[1]), float(j[0]), 2))
        dicts_sets[i] = locationSet

    counts = {}
    lists = []
    for i in dicts_sets.values():
        for j in i:
            if j[0] in mygeohash.neighbors('d').values():
                lists.append(j)

    for i in lists:
        if not i in counts:
            counts[i] = 0
        counts[i] = counts[i] + 1

    sum = 0
    for i in counts.values():
        sum += i

    zoneCounts = {}
    for i in counts.values():
        if not i in zoneCounts:
            zoneCounts[i] = 0
        zoneCounts[i] = zoneCounts[i] + 1

    a = sorted(zoneCounts.items(), key=lambda x: x[0], reverse=False)

    # max = a[len(a) - 1][0]
    # b = {}
    # for i in a:
    #     b[i[0]] = i[1]
    # for i in range(1, max + 1):
    #     if i not in b:
    #         b[i] = 0
    #
    # c = []
    # for i in b.keys():
    #     c.append((i, b[i]))
    #
    # p = []
    # p.append(1)
    # for i in range(1, N):
    #     tamp = ((max - 1) * i / N) + 1
    #     p.append(int(tamp))
    # p.append(max)
    #
    # g = []
    # for i in range(N):
    #     g.append(0)
    #
    # for i in c:
    #     for j in range(1, N + 1):
    #         if i[0] >= p[j - 1] and i[0] < p[j]:
    #             g[j - 1] += i[1]
    # g[N - 1] += c[len(c) - 1][1]
    #
    # G = []
    # for i in range(N):
    #     G.append((p[i + 1], g[i]))

    x = []
    y = []
    y_noise = []
    z = []
    z_noise = []
    for i in a:
        x.append(i[0])
        y.append(i[1])

        tamp = 0
        for j in range(len(y)):
            tamp += y[j]
        z.append(tamp)
    y_noise = laplace_mech(y, sensitivety, epsilon)
    z_noise = laplace_mech(z, sensitivety, epsilon)
    for j in range(len(z_noise)):
        if z_noise[j] < 0:
            z_noise[j] = 0
    for j in range(len(y_noise)):
        if y_noise[j] < 0:
            y_noise[j] = 0

    return countL1(y,y_noise)
예제 #3
0
inputName = "G://data//map1_14_20.txt"
sum = 0

userset = set()
usercount = []
locationset = set()
locationset2 = set()
locationset3 = set()
locationset4 = set()

with open(inputName) as fr:
    for line in fr.readlines():
        sum += 1
        lines = line.strip('\n').split(",")

        locationset2.add(mygeohash.encode(float(lines[1]), float(lines[0]), 2))
        locationset3.add(mygeohash.encode(float(lines[1]), float(lines[0]), 3))
        locationset4.add(mygeohash.encode(float(lines[1]), float(lines[0]), 4))

        userset.add(lines[2])
        usercount.append(lines[2])

print(sum)
print(len(userset))
print(len(usercount))

print("------------------")
print(len(locationset2))
print(len(locationset3))
print(len(locationset4))