def expKM(): logging.basicConfig(level=logging.DEBUG, filename='./debug.log') logging.info(time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime()) + " START") p = Params(1000) p.select_dataset() p.locs, p.users, p.locDict = readCheckins(p) p.debug() copy_locs = copy.deepcopy(p.locs) copy_locDict = copy.deepcopy(p.locDict) pool = Pool(processes=len(eps_list)) params = [] for M in M_list: p.M = M global_sen = sensitivity_add(p.C, float(p.C)/p.K)[2] * p.M p.locs, p.users = cellStats(p, copy_locs, copy_locDict, global_sen) E_actual = shannonEntropy(p.locs) param = (p, global_sen, E_actual) evalLimitKM(param) # params.append((p, global_sen, E_actual)) # pool.map(evalLimitK, params) # pool.join() createGnuData(p, "evalLimitKM", M_list)
def expSensitivity(): logging.basicConfig(level=logging.DEBUG, filename='./debug.log') logging.info(time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime()) + " START") p = Params(1000) p.select_dataset() p.locs, p.users, p.locDict = readCheckins(p) evalActualSensitivity(p)
def post(self, param_id): """ Update geocast parameters """ global datasets, tree, all_data global eps, percent, com_range, mar, arf, utl, heuristic, subcell, localness, constraint workers = tornado.escape.json_decode(self.request.body) # print simplejson.dumps(workers) # np.fromiter(json.loads(workers),dtype) # save data to a file # tmp_workers_file = "../../dataset/tmp_workers_file.dat" # data = np.genfromtxt("../../dataset/yelp.dat",unpack = True) print "Start updating worker locations" i = 0 all_workers = [] for worker in workers: i += 1 if i % 1000 == 0: print "Updated ", i, " workers" pair = [worker['k'], worker['B']] all_workers.append(pair) data = np.array(all_workers) np.savetxt('../../dataset/update.txt', data, delimiter='\t') data = data.transpose() Params.NDIM, Params.NDATA = data.shape[0], data.shape[1] Params.LOW, Params.HIGH = np.amin(data, axis=1), np.amax(data, axis=1) print Params.NDIM, Params.NDATA print Params.LOW, Params.HIGH p = Params(1000) print "Creating WorkerPSD..." dataset = self.get_argument("dataset", default=Params.DATASET) Params.DATASET = dataset p.select_dataset() print dataset tree = Grid_adaptive(data, p) tree.buildIndex() bounds = np.array([[Params.x_min, Params.y_min], [Params.x_max, Params.y_max]]) print bounds all_data[dataset] = (tree, bounds, p.NDATA) self.write( json.dumps({"status": "update successfully"}, sort_keys=True))
def post(self, param_id): """ Update geocast parameters """ global datasets, tree, all_data global eps, percent, com_range, mar, arf, utl, heuristic, subcell, localness, constraint workers = tornado.escape.json_decode(self.request.body) # print simplejson.dumps(workers) # np.fromiter(json.loads(workers),dtype) # save data to a file # tmp_workers_file = "../../dataset/tmp_workers_file.dat" # data = np.genfromtxt("../../dataset/yelp.dat",unpack = True) print "Start updating worker locations" i = 0 all_workers = [] for worker in workers: i += 1 if i % 1000 == 0: print "Updated ", i, " workers" pair = [worker['k'], worker['B']] all_workers.append(pair) data = np.array(all_workers) np.savetxt('../../dataset/update.txt', data, delimiter='\t') data = data.transpose() Params.NDIM, Params.NDATA = data.shape[0], data.shape[1] Params.LOW, Params.HIGH = np.amin(data, axis=1), np.amax(data, axis=1) print Params.NDIM, Params.NDATA print Params.LOW, Params.HIGH p = Params(1000) print "Creating WorkerPSD..." dataset = self.get_argument("dataset", default=Params.DATASET) Params.DATASET = dataset p.select_dataset() print dataset tree = Grid_adaptiveM(data, 1, p) tree.buildIndex() bounds = np.array([[Params.x_min, Params.y_min], [Params.x_max, Params.y_max]]) print bounds all_data[dataset] = (tree, bounds, p.NDATA) self.write( json.dumps({"status": "update successfully"}, sort_keys=True))
def initialize(self): global boundaries, datasets, MTDs, worker_counts print "dataset init" if len(boundaries) == 0: for i in range(len(datasets)): Params.DATASET = datasets[i] p = Params(1000) data = data_readin(p) p.select_dataset() MTDs.append(p.MTD) worker_counts.append(p.NDATA) boundaries.append( str(p.x_min) + "," + str(p.y_min) + "," + str(p.x_max) + "," + str(p.y_max)) """
def expStats(): logging.basicConfig(level=logging.DEBUG, filename='./debug.log') logging.info(time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime()) + " START") p = Params(1000) p.select_dataset() p.locs, p.users, p.locDict = readCheckins("dataset/gowalla_NY.txt") c_locs, c_users = cellStats(p) # for uid in c_users: # print len(c_users.get(uid)) for lid in c_locs: print len(c_locs.get(lid))
class MyTestCase(unittest.TestCase): def setUp(self): # init parameters self.p = Params(1000) self.p.select_dataset() self.log = logging.getLogger("debug.log") def testlocationCount(self): locs = [] with open("dataset/weibo/checkins_filtered.txt") as worker_file: reader = csv.reader(worker_file, delimiter='\t') for row in reader: locs.append((float(row[1]), float(row[2]), int(row[3]))) # lat, lon, id count = locationCount(self.p, locs) print("number of non-empty cells", len(count)) print("average value per cell", np.mean(list(count.values())))
def data_readin(): """Read in spatial data and initialize global variables.""" p = Params(0) p.select_dataset() data = np.genfromtxt(Params.dataset, unpack=True) Params.NDIM, Params.NDATA = data.shape[0], data.shape[1] Params.LOW, Params.HIGH = np.amin(data, axis=1), np.amax(data, axis=1) logging.debug(data.shape) logging.debug(Params.LOW) logging.debug(Params.HIGH) return data all_points = [] if os.path.isfile(Params.TASKPATH): with open(Params.TASKPATH) as f: content = f.readlines() for i in range(len(seed_list)): ran_points = [] for j in range(taskNo): ran_points.append(map(float, content[i * taskNo + j].split())) all_points.append(ran_points) else: tasks = "" logging.debug('tasks_gen: generating tasks...') boundary = np.array([[x1, y1], [x2, y2]]) for seed in seed_list: ran_points = [] np.random.seed(seed) count = 0 while count < taskNo: idx = np.random.randint(0, data.shape[1]) _ran_point = data[:, idx] if is_rect_cover(boundary, _ran_point): ran_points.append(_ran_point) count += 1 all_points.append(ran_points) for item in ran_points: tasks += "%s\n" % " ".join(map(str, item)) outfile = open(Params.TASKPATH, "w") outfile.write(tasks) outfile.close() return all_points
def expM(): logging.basicConfig(level=logging.DEBUG, filename='./debug.log') logging.info(time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime()) + " START") p = Params(1000) p.select_dataset() p.locs, p.users, p.locDict = readCheckins(p) E_actual = shannonEntropy(p.locs) p.debug() # pool = Pool(processes=len(eps_list)) # params = [] for M in M_list: param = (p, M, E_actual) evalLimitM(param) # params.append((p, M, E_actual)) # pool.map(evalLimitM, params) # pool.join() createGnuData(p, "evalLimitM", M_list)
def testDifferential(): p = Params(1000) p.select_dataset() differ = Differential(1000) # RTH = (34.020412, -118.289936) TS = (40.758890, -73.985100) for i in range(100): # (x, y) = differ.getPolarNoise(1000000, p.eps) # pp = noisyPoint(TS, (x,y)) pp = differ.addPolarNoise(1.0, TS, 100) # u = distance(p.x_min, p.y_min, p.x_max, p.y_min) * 1000.0 / Params.GRID_SIZE # v = distance(p.x_min, p.y_min, p.x_min, p.y_max) * 1000.0 / Params.GRID_SIZE # rad = euclideanToRadian((u, v)) # cell_size = np.array([rad[0], rad[1]]) # roundedPoint = round2Grid(pp, cell_size, p.x_min, p.y_min) roundedPoint = pp print (str(roundedPoint[0]) + ',' + str(roundedPoint[1]))
] x = [math.cos(theta) * r, math.sin(theta) * r] r, theta = [ math.sqrt(random.uniform(0, 1)) * math.sqrt(1), 2 * math.pi * random.uniform(0, 1) ] y = [math.cos(theta) * r, math.sin(theta) * r] d = dist(x, y) total += d print("Expected dist: ", total / N) """ Simulated dataset """ p = Params(1000) p.select_dataset() reachable_range = Utils.reachableDistance() dp = Differential(p.seed) # Randomly picking location in a small MBR of tdrive dataset. minLat, maxLat = 39.1232147, 40.7225952 minLon, maxLon = 115.3879166, 117.3795395 diffLat = maxLat - minLat diffLon = maxLon - minLon maxLat = maxLat - 0.95 * diffLat maxLon = maxLon - 0.95 * diffLon # print ("diagonal dist: ", Utils.distance(minLat, minLon, maxLat, maxLon))
rows = grid_data.split("\n") for row in rows: values = row.split(" ") print values[0], values[1], values[4] if len(values) == 11 and float(values[4]) > 7: pass if False: if False: # read shakemap file read_shakemap_xml() if True: # read video metadata param = Params(1000) param.select_dataset() videos = read_data(os.path.splitext(param.dataset)[0] + ".txt") video_locs = np.zeros((2,len(videos))) idx = 0 for v in videos: vl = v.location() video_locs[0,idx] = vl[0] video_locs[1,idx] = vl[1] idx = idx + 1 # print video_locs np.savetxt(param.dataset, video_locs.transpose(), fmt='%.4f\t') # print sum([v.size for v in videos])
if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG, filename='log/debug.log') # dataset_list = ['yelp', 'foursquare', 'gowallasf', 'gowallala'] dataset_list = ['mediaq'] for dataset in dataset_list: param = Params(1000) data = data_readin(param) param.NDIM, param.NDATA = data.shape[0], data.shape[1] param.LOW, param.HIGH = np.amin(data, axis=1), np.amax(data, axis=1) param.DATASET = dataset param.select_dataset() param.debug() path_data = getPathData(data, param) fig, ax = plt.subplots() # img = imread("background.png") for data in path_data: path = data[0] codes, verts = zip(*path) path = mpath.Path(verts, codes) # weight = min(1, (data[1] + 0.0) / 500) weight = 1 patch = mpatches.PathPatch(path, facecolor='white', alpha=weight) ax.add_patch(patch)
class TestFunctions(unittest.TestCase): def setUp(self): # init parameters self.p = Params(1000) self.p.select_dataset() self.log = logging.getLogger("debug.log") # load precomputed smooth sensitivity # c_list = range(1, 21) # eps_list = [0.1, 0.4, 0.7, 1.0] # self.ss = getSmoothSensitivity(c_list, eps_list) # # if Params.DATASET in ["sparse", "medium", "dense"]: # synthetic # self.p.locs = readData(self.p.dataset) # else: # real # self.p.locs, self.p.locDict = readCheckins(self.p) # self.p.users = transformDict(self.p.locs) # Discretize # self.p.locs = cellStats(self.p) # self.p.users = transformDict(self.p.locs) # distribution_pdf(self.p.locs) # self.E_actual = actualEntropy(self.p.locs) # entropy # self.D_actual = actualDiversity(self.p.locs) # diversity # self.C_actual = actualLocationCount(self.p, self.p.locDict) # count @unittest.skip def testMain(self): # Visualization # le = sorted(list(self.E_actual.iteritems()), key=lambda x:x[1], reverse=True) # decrease entropy # locIds = [t[0] for t in le] # LEVals = [t[1] for t in le] # scatter(LEVals, "Location Id", "Entropy") # E_noisy = perturbedLocationEntropy(self.p, self.ss, "SS") # perturbedLEVals = [E_noisy.get(id, Params.DEFAULT_ENTROPY) for id in locIds] # scatter(perturbedLEVals, "Location Id", "Entropy") # div = sorted(list(self.D_actual.iteritems()), key=lambda x:x[1], reverse=True) # locIds = [t[0] for t in div] # divVals = [t[1] for t in div] # scatter(divVals, "Location Id", "Diversity") # D_noisy = perturbedDiversity(self.p) # perturbedDVals = [D_noisy.get(id, Params.DEFAULT_DIVERSITY) for id in locIds] # scatter(perturbedDVals, "Location Id", "Diversity") # cells = sorted(list(self.C_actual.iteritems()), key=lambda x:x[1], reverse=True) # cellIds = [t[0] for t in cells] # counts = [t[1] for t in cells] # scatter(counts, "Cell Id", "Locations") # # C_noisy = perturbeCount(self.p) # perturbedCounts = [C_noisy.get(id, Params.DEFAULT_FREQUENCY) for id in cellIds] # scatter(perturbedCounts, "Cell Id", "Locations") evalEnt(self.p, self.E_actual, self.ss) evalDiv(self.p, self.D_actual) evalBL(self.p, self.E_actual) # evalCountDiff(self.p, self.C_actual) # evalCountGeoI(self.p, self.C_actual) # evalDivGeoI(self.p, self.D_actual) @unittest.skip def testLEParser(self): self.p.locs, self.p.users, self.p.locDict = readCheckins(self.p) # distribution_pdf(self.p.locs) distribution_pdf(self.p.users) self.p.users = samplingUsers(self.p.users, Params.MAX_M) distribution_pdf(self.p.users) entropyStats(self.p.locs) # self.p.maxC, self.p.maxM = otherStats(self.p.locs, self.p.users) # discretize # cells = cellStats(self.p) # entropyStats(cells) # self.p.maxC, self.p.maxM = otherStats(cells, transformDict(cells)) # distribution_pdf(cells) # distribution_pdf(transformDict(cells)) @unittest.skip def testLEStats(self): nx = range(1,100+1) C, eps, K = 2, 1.0, 50 # Baseline sensitivity (max C) max_C = 100 max_gs = globalSensitivy(max_C) max_gsy = [max_gs] * len(nx) # global sensitivity (limit C) gs = globalSensitivy(C) gsy = [gs] * len(nx) # smooth sensitivity ssy = [v * 2 for v in self.ss[CEps2Str(C, eps)][:100]] # local sensitivity K = 20 ls = localSensitivity(C, K) lsy = [ls] * len(nx) # vary n (all bounds) ny = [max_gsy, gsy, ssy, lsy] markers = ["o", "-", "--", "+"] legends = ["Global (Max C)", "Global (Limit C)", "Smooth", "Local"] line_graph(nx, ny, markers, legends, "Number of users (n)", "Sensitivity") # vary C eps_list = [0.1, 0.4, 0.7, 1.0] c_list = range(1, 21) n = 100 ss_list = [[self.ss[CEps2Str(c, eps)][n - 1] for c in c_list] for eps in eps_list] markers = ["o", "-", "--", "+", "x"] legends = ["Eps=" + str(eps) for eps in eps_list] line_graph(c_list, ss_list, markers, legends, "C", "Sensitivity") # vary n c = 10 ss_list = [[self.ss[CEps2Str(c, eps)][n - 1] for n in nx] for eps in eps_list] line_graph(nx, ss_list, markers, legends, "Number of users (n)", "Sensitivity") # vary n & C c_list = [1, 10, 20] legends = ["C=" + str(c) for c in c_list] ss_list = [[self.ss[CEps2Str(c, eps)][n - 1] for n in nx] for c in c_list] line_graph(nx, ss_list, markers, legends, "Number of users (n)", "Sensitivity") @unittest.skip def testLEBounds(self): # precompute smooth sensitivity eps_list = [1.0] pool = Pool(processes=len(eps_list)) pool.map(precomputeSmoothSensitivity, eps_list) pool.join() for eps in eps_list: precomputeSmoothSensitivity(eps) @unittest.skip def testMetrics(self): P = [1,2,3,4,5,6,7,8,9] Q = [1,2,4,8,7,6,5,8,9] self.assertEqual(True, abs(KLDivergence2(P, Q) - KLDiv(P, Q)) < 1e-6) true = [1,2,3,4,5,6,7,8,9] predicted = [1,2,3,4,5,6,7,8,9] self.assertEqual(1, DivCatScore(true, predicted)) @unittest.skip def testDifferential(self): differ = Differential(1000) RTH = (34.020412, -118.289936) radius = 500.0 # default unit is meters eps = np.log(2) for i in range(100): (x, y) = differ.getPolarNoise(radius, eps) print (str(RTH[0] + x * Params.ONE_KM * 0.001) + ',' + str(RTH[1] + y * Params.ONE_KM*1.2833*0.001)) @unittest.skip def testUtils(self): values1 = [1,2,3,4,5,6,7,8,9] values2 = [1, 2, 3, 9, 5, 6, 7, 8, 4] topVals1, topVals2 = topKValues(3, values1), topKValues(3, values2) indices1 = [t[1] for t in topVals1] indices2 = [t[1] for t in topVals2] self.assertEqual([8,7,6], indices1) self.assertEqual([3, 7, 6], indices2) self.assertEqual(2.0 / 3, metrics.precision_score(indices1, indices2, average="micro")) # @unittest.skip def test_filter_gowalla(self): filter_gowalla(self.p) # filter_yelp(self.p) @unittest.skip def testDataGen(self): SPARSE_N = int(self.p.MAX_N / 10) MEDIUM_N = int(self.p.MAX_N) DENSE_N = int(self.p.MAX_N * 10) np.random.seed(self.p.seed) # writeData(generateData(1e+3, SPARSE_N, Params.MAX_M, Params.MAX_C, 2), "../dataset/sparse.txt") # writeData(generateData(1e+3, MEDIUM_N, Params.MAX_M, Params.MAX_C, 2), "../dataset/medium.txt") writeData(generateData(1e+3, DENSE_N, Params.MAX_M, Params.MAX_C, 2), "../dataset/dense.txt") # readData("../dataset/sparse.txt")