dicts_sets = {} with open(inputName) as fr: for line in fr.readlines(): lines = line.strip('\n').split(",") tuples = (lines[0], lines[1], lines[2], lines[3]) if lines[2] not in usertupledict: usertupledict[lines[2]] = [] usertupledict[lines[2]].append(tuples) for i in usertupledict.keys(): dicts_set[i] = clean(usertupledict[i]) for i in dicts_set.keys(): locationSet = set() for j in dicts_set[i]: locationSet.add(mygeohash.encode(float(j[1]), float(j[0]), 2)) dicts_sets[i] = locationSet def noisyCount(sensitivety, epsilon): beta = sensitivety / epsilon u1 = np.random.random() u2 = np.random.random() if u1 <= 0.5: n_value = -beta * np.log(1. - u2) else: n_value = beta * np.log(u2) return n_value def noisyCounts(sensitivety, epsilon):
def getL1(k,e): N = 20 # inputName = "G://data//map1_14_40.txt" inputName = "G://data//map1_14_" inputName=inputName+str(k)+".txt" usertupledict = {} sensitivety = int(2*k) epsilon = e # max = 0 # j = "" dicts_set = {} dicts_sets = {} with open(inputName) as fr: for line in fr.readlines(): lines = line.strip('\n').split(",") tuples = (lines[0], lines[1], lines[2], lines[3]) if lines[2] not in usertupledict: usertupledict[lines[2]] = [] usertupledict[lines[2]].append(tuples) for i in usertupledict.keys(): dicts_set[i] = clean(usertupledict[i]) for i in dicts_set.keys(): locationSet = set() for j in dicts_set[i]: locationSet.add(mygeohash.encode(float(j[1]), float(j[0]), 2)) dicts_sets[i] = locationSet counts = {} lists = [] for i in dicts_sets.values(): for j in i: if j[0] in mygeohash.neighbors('d').values(): lists.append(j) for i in lists: if not i in counts: counts[i] = 0 counts[i] = counts[i] + 1 sum = 0 for i in counts.values(): sum += i zoneCounts = {} for i in counts.values(): if not i in zoneCounts: zoneCounts[i] = 0 zoneCounts[i] = zoneCounts[i] + 1 a = sorted(zoneCounts.items(), key=lambda x: x[0], reverse=False) # max = a[len(a) - 1][0] # b = {} # for i in a: # b[i[0]] = i[1] # for i in range(1, max + 1): # if i not in b: # b[i] = 0 # # c = [] # for i in b.keys(): # c.append((i, b[i])) # # p = [] # p.append(1) # for i in range(1, N): # tamp = ((max - 1) * i / N) + 1 # p.append(int(tamp)) # p.append(max) # # g = [] # for i in range(N): # g.append(0) # # for i in c: # for j in range(1, N + 1): # if i[0] >= p[j - 1] and i[0] < p[j]: # g[j - 1] += i[1] # g[N - 1] += c[len(c) - 1][1] # # G = [] # for i in range(N): # G.append((p[i + 1], g[i])) x = [] y = [] y_noise = [] z = [] z_noise = [] for i in a: x.append(i[0]) y.append(i[1]) tamp = 0 for j in range(len(y)): tamp += y[j] z.append(tamp) y_noise = laplace_mech(y, sensitivety, epsilon) z_noise = laplace_mech(z, sensitivety, epsilon) for j in range(len(z_noise)): if z_noise[j] < 0: z_noise[j] = 0 for j in range(len(y_noise)): if y_noise[j] < 0: y_noise[j] = 0 return countL1(y,y_noise)
inputName = "G://data//map1_14_20.txt" sum = 0 userset = set() usercount = [] locationset = set() locationset2 = set() locationset3 = set() locationset4 = set() with open(inputName) as fr: for line in fr.readlines(): sum += 1 lines = line.strip('\n').split(",") locationset2.add(mygeohash.encode(float(lines[1]), float(lines[0]), 2)) locationset3.add(mygeohash.encode(float(lines[1]), float(lines[0]), 3)) locationset4.add(mygeohash.encode(float(lines[1]), float(lines[0]), 4)) userset.add(lines[2]) usercount.append(lines[2]) print(sum) print(len(userset)) print(len(usercount)) print("------------------") print(len(locationset2)) print(len(locationset3)) print(len(locationset4))