Esempio n. 1
0
def expKM():
    logging.basicConfig(level=logging.DEBUG, filename='./debug.log')
    logging.info(time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime()) + "  START")

    p = Params(1000)

    p.select_dataset()

    p.locs, p.users, p.locDict = readCheckins(p)
    p.debug()

    copy_locs = copy.deepcopy(p.locs)
    copy_locDict = copy.deepcopy(p.locDict)

    pool = Pool(processes=len(eps_list))
    params = []
    for M in M_list:
        p.M = M
        global_sen = sensitivity_add(p.C, float(p.C)/p.K)[2] * p.M
        p.locs, p.users = cellStats(p, copy_locs, copy_locDict, global_sen)
        E_actual = shannonEntropy(p.locs)

        param = (p, global_sen, E_actual)
        evalLimitKM(param)
        # params.append((p, global_sen, E_actual))
    # pool.map(evalLimitK, params)
    # pool.join()

    createGnuData(p, "evalLimitKM", M_list)
Esempio n. 2
0
def expSensitivity():

    logging.basicConfig(level=logging.DEBUG, filename='./debug.log')
    logging.info(time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime()) + "  START")

    p = Params(1000)

    p.select_dataset()

    p.locs, p.users, p.locDict = readCheckins(p)

    evalActualSensitivity(p)
Esempio n. 3
0
    def post(self, param_id):
        """
        Update geocast parameters
        """
        global datasets, tree, all_data
        global eps, percent, com_range, mar, arf, utl, heuristic, subcell, localness, constraint
        workers = tornado.escape.json_decode(self.request.body)

        # print simplejson.dumps(workers)

        # np.fromiter(json.loads(workers),dtype)

        # save data to a file
        # tmp_workers_file = "../../dataset/tmp_workers_file.dat"

        # data = np.genfromtxt("../../dataset/yelp.dat",unpack = True)

        print "Start updating worker locations"
        i = 0
        all_workers = []
        for worker in workers:
            i += 1
            if i % 1000 == 0:
                print "Updated ", i, " workers"
            pair = [worker['k'], worker['B']]
            all_workers.append(pair)
            data = np.array(all_workers)
            np.savetxt('../../dataset/update.txt', data, delimiter='\t')
            data = data.transpose()
        Params.NDIM, Params.NDATA = data.shape[0], data.shape[1]

        Params.LOW, Params.HIGH = np.amin(data, axis=1), np.amax(data, axis=1)
        print Params.NDIM, Params.NDATA
        print Params.LOW, Params.HIGH

        p = Params(1000)
        print "Creating WorkerPSD..."
        dataset = self.get_argument("dataset", default=Params.DATASET)
        Params.DATASET = dataset
        p.select_dataset()
        print dataset
        tree = Grid_adaptive(data, p)
        tree.buildIndex()
        bounds = np.array([[Params.x_min, Params.y_min],
                           [Params.x_max, Params.y_max]])
        print bounds
        all_data[dataset] = (tree, bounds, p.NDATA)

        self.write(
            json.dumps({"status": "update successfully"}, sort_keys=True))
Esempio n. 4
0
    def post(self, param_id):
        """
        Update geocast parameters
        """
        global datasets, tree, all_data
        global eps, percent, com_range, mar, arf, utl, heuristic, subcell, localness, constraint
        workers = tornado.escape.json_decode(self.request.body)

        # print simplejson.dumps(workers)

        # np.fromiter(json.loads(workers),dtype)

        # save data to a file
        # tmp_workers_file = "../../dataset/tmp_workers_file.dat"

        # data = np.genfromtxt("../../dataset/yelp.dat",unpack = True)

        print "Start updating worker locations"
        i = 0
        all_workers = []
        for worker in workers:
            i += 1
            if i % 1000 == 0:
                print "Updated ", i, " workers"
            pair = [worker['k'], worker['B']]
            all_workers.append(pair)
            data = np.array(all_workers)
            np.savetxt('../../dataset/update.txt', data, delimiter='\t')
            data = data.transpose()
        Params.NDIM, Params.NDATA = data.shape[0], data.shape[1]

        Params.LOW, Params.HIGH = np.amin(data, axis=1), np.amax(data, axis=1)
        print Params.NDIM, Params.NDATA
        print Params.LOW, Params.HIGH

        p = Params(1000)
        print "Creating WorkerPSD..."
        dataset = self.get_argument("dataset", default=Params.DATASET)
        Params.DATASET = dataset
        p.select_dataset()
        print dataset
        tree = Grid_adaptiveM(data, 1, p)
        tree.buildIndex()
        bounds = np.array([[Params.x_min, Params.y_min], [Params.x_max, Params.y_max]])
        print bounds
        all_data[dataset] = (tree, bounds, p.NDATA)

        self.write(
            json.dumps({"status": "update successfully"}, sort_keys=True))
Esempio n. 5
0
    def initialize(self):
        global boundaries, datasets, MTDs, worker_counts
        print "dataset init"
        if len(boundaries) == 0:
            for i in range(len(datasets)):
                Params.DATASET = datasets[i]
                p = Params(1000)
                data = data_readin(p)
                p.select_dataset()
                MTDs.append(p.MTD)
                worker_counts.append(p.NDATA)
                boundaries.append(
                    str(p.x_min) + "," + str(p.y_min) + "," + str(p.x_max) + "," + str(p.y_max))

        """
Esempio n. 6
0
def expStats():
    logging.basicConfig(level=logging.DEBUG, filename='./debug.log')
    logging.info(time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime()) + "  START")

    p = Params(1000)

    p.select_dataset()

    p.locs, p.users, p.locDict = readCheckins("dataset/gowalla_NY.txt")

    c_locs, c_users = cellStats(p)

    # for uid in c_users:
    #     print len(c_users.get(uid))

    for lid in c_locs:
        print len(c_locs.get(lid))
Esempio n. 7
0
class MyTestCase(unittest.TestCase):
    def setUp(self):
        # init parameters
        self.p = Params(1000)
        self.p.select_dataset()

        self.log = logging.getLogger("debug.log")

    def testlocationCount(self):
        locs = []
        with open("dataset/weibo/checkins_filtered.txt") as worker_file:
            reader = csv.reader(worker_file, delimiter='\t')
            for row in reader:
                locs.append((float(row[1]), float(row[2]),
                             int(row[3])))  # lat, lon, id
        count = locationCount(self.p, locs)
        print("number of non-empty cells", len(count))
        print("average value per cell", np.mean(list(count.values())))
Esempio n. 8
0
def data_readin():
    """Read in spatial data and initialize global variables."""
    p = Params(0)
    p.select_dataset()
    data = np.genfromtxt(Params.dataset, unpack=True)
    Params.NDIM, Params.NDATA = data.shape[0], data.shape[1]
    Params.LOW, Params.HIGH = np.amin(data, axis=1), np.amax(data, axis=1)
    logging.debug(data.shape)
    logging.debug(Params.LOW)
    logging.debug(Params.HIGH)
    return data

    all_points = []
    if os.path.isfile(Params.TASKPATH):
        with open(Params.TASKPATH) as f:
            content = f.readlines()
        for i in range(len(seed_list)):
            ran_points = []
            for j in range(taskNo):
                ran_points.append(map(float, content[i * taskNo + j].split()))
            all_points.append(ran_points)
    else:
        tasks = ""
        logging.debug('tasks_gen: generating tasks...')

        boundary = np.array([[x1, y1], [x2, y2]])
        for seed in seed_list:
            ran_points = []
            np.random.seed(seed)
            count = 0
            while count < taskNo:
                idx = np.random.randint(0, data.shape[1])
                _ran_point = data[:, idx]
                if is_rect_cover(boundary, _ran_point):
                    ran_points.append(_ran_point)
                    count += 1
            all_points.append(ran_points)
            for item in ran_points:
                tasks += "%s\n" % " ".join(map(str, item))
        outfile = open(Params.TASKPATH, "w")
        outfile.write(tasks)
        outfile.close()
    return all_points
Esempio n. 9
0
def expM():
    logging.basicConfig(level=logging.DEBUG, filename='./debug.log')
    logging.info(time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime()) + "  START")

    p = Params(1000)

    p.select_dataset()

    p.locs, p.users, p.locDict = readCheckins(p)
    E_actual = shannonEntropy(p.locs)
    p.debug()

    # pool = Pool(processes=len(eps_list))
    # params = []
    for M in M_list:
        param = (p, M, E_actual)
        evalLimitM(param)
        # params.append((p, M, E_actual))
    # pool.map(evalLimitM, params)
    # pool.join()

    createGnuData(p, "evalLimitM", M_list)
Esempio n. 10
0
def testDifferential():
    p = Params(1000)
    p.select_dataset()
    differ = Differential(1000)
    # RTH = (34.020412, -118.289936)
    TS = (40.758890, -73.985100)

    for i in range(100):
        # (x, y) = differ.getPolarNoise(1000000, p.eps)
        # pp = noisyPoint(TS, (x,y))

        pp = differ.addPolarNoise(1.0, TS, 100)


        # u = distance(p.x_min, p.y_min, p.x_max, p.y_min) * 1000.0 / Params.GRID_SIZE
        # v = distance(p.x_min, p.y_min, p.x_min, p.y_max) * 1000.0 / Params.GRID_SIZE
        # rad = euclideanToRadian((u, v))
        # cell_size = np.array([rad[0], rad[1]])
        # roundedPoint = round2Grid(pp, cell_size, p.x_min, p.y_min)


        roundedPoint = pp
        print (str(roundedPoint[0]) + ',' + str(roundedPoint[1]))
Esempio n. 11
0
        ]
        x = [math.cos(theta) * r, math.sin(theta) * r]
        r, theta = [
            math.sqrt(random.uniform(0, 1)) * math.sqrt(1),
            2 * math.pi * random.uniform(0, 1)
        ]
        y = [math.cos(theta) * r, math.sin(theta) * r]
        d = dist(x, y)
        total += d

    print("Expected dist: ", total / N)
"""
Simulated dataset
"""
p = Params(1000)
p.select_dataset()
reachable_range = Utils.reachableDistance()
dp = Differential(p.seed)

# Randomly picking location in a small MBR of tdrive dataset.
minLat, maxLat = 39.1232147, 40.7225952
minLon, maxLon = 115.3879166, 117.3795395

diffLat = maxLat - minLat
diffLon = maxLon - minLon

maxLat = maxLat - 0.95 * diffLat
maxLon = maxLon - 0.95 * diffLon

# print ("diagonal dist: ", Utils.distance(minLat, minLon, maxLat, maxLon))
Esempio n. 12
0
            rows = grid_data.split("\n")
            for row in rows:
                values = row.split(" ")
                print values[0], values[1], values[4]
                if len(values) == 11 and float(values[4]) > 7:
                    pass


if False:

    if False:    # read shakemap file
        read_shakemap_xml()

    if True:   # read video metadata
        param = Params(1000)
        param.select_dataset()
        videos = read_data(os.path.splitext(param.dataset)[0] + ".txt")

        video_locs = np.zeros((2,len(videos)))
        idx = 0
        for v in videos:
            vl = v.location()
            video_locs[0,idx] = vl[0]
            video_locs[1,idx] = vl[1]
            idx = idx + 1

        # print video_locs

        np.savetxt(param.dataset, video_locs.transpose(), fmt='%.4f\t')

        # print sum([v.size for v in videos])
Esempio n. 13
0

if __name__ == '__main__':
    logging.basicConfig(level=logging.DEBUG, filename='log/debug.log')

    # dataset_list = ['yelp', 'foursquare', 'gowallasf', 'gowallala']
    dataset_list = ['mediaq']

    for dataset in dataset_list:
        param = Params(1000)
        data = data_readin(param)
        param.NDIM, param.NDATA = data.shape[0], data.shape[1]
        param.LOW, param.HIGH = np.amin(data, axis=1), np.amax(data, axis=1)

        param.DATASET = dataset
        param.select_dataset()
        param.debug()

        path_data = getPathData(data, param)

        fig, ax = plt.subplots()
        # img = imread("background.png")
        for data in path_data:
            path = data[0]
            codes, verts = zip(*path)
            path = mpath.Path(verts, codes)
            # weight = min(1, (data[1] + 0.0) / 500)
            weight = 1
            patch = mpatches.PathPatch(path, facecolor='white', alpha=weight)
            ax.add_patch(patch)
Esempio n. 14
0
class TestFunctions(unittest.TestCase):

    def setUp(self):
        # init parameters
        self.p = Params(1000)
        self.p.select_dataset()

        self.log = logging.getLogger("debug.log")

        # load precomputed smooth sensitivity
        # c_list = range(1, 21)
        # eps_list = [0.1, 0.4, 0.7, 1.0]
        # self.ss = getSmoothSensitivity(c_list, eps_list)
        #
        # if Params.DATASET in ["sparse", "medium", "dense"]: # synthetic
        #     self.p.locs = readData(self.p.dataset)
        # else: # real
        #     self.p.locs, self.p.locDict = readCheckins(self.p)
        # self.p.users = transformDict(self.p.locs)

        # Discretize
        # self.p.locs = cellStats(self.p)
        # self.p.users = transformDict(self.p.locs)
        # distribution_pdf(self.p.locs)

        # self.E_actual = actualEntropy(self.p.locs)      # entropy
        # self.D_actual = actualDiversity(self.p.locs)    # diversity

        # self.C_actual = actualLocationCount(self.p, self.p.locDict) # count


    @unittest.skip
    def testMain(self):

        # Visualization
        # le = sorted(list(self.E_actual.iteritems()), key=lambda x:x[1], reverse=True)    # decrease entropy
        # locIds = [t[0] for t in le]
        # LEVals = [t[1] for t in le]
        # scatter(LEVals, "Location Id", "Entropy")

        # E_noisy = perturbedLocationEntropy(self.p, self.ss, "SS")
        # perturbedLEVals = [E_noisy.get(id, Params.DEFAULT_ENTROPY) for id in locIds]
        # scatter(perturbedLEVals, "Location Id", "Entropy")

        # div = sorted(list(self.D_actual.iteritems()), key=lambda x:x[1], reverse=True)
        # locIds = [t[0] for t in div]
        # divVals = [t[1] for t in div]
        # scatter(divVals, "Location Id", "Diversity")

        # D_noisy = perturbedDiversity(self.p)
        # perturbedDVals = [D_noisy.get(id, Params.DEFAULT_DIVERSITY) for id in locIds]
        # scatter(perturbedDVals, "Location Id", "Diversity")

        # cells = sorted(list(self.C_actual.iteritems()), key=lambda x:x[1], reverse=True)
        # cellIds = [t[0] for t in cells]
        # counts = [t[1] for t in cells]
        # scatter(counts, "Cell Id", "Locations")
        #
        # C_noisy = perturbeCount(self.p)
        # perturbedCounts = [C_noisy.get(id, Params.DEFAULT_FREQUENCY) for id in cellIds]
        # scatter(perturbedCounts, "Cell Id", "Locations")

        evalEnt(self.p, self.E_actual, self.ss)
        evalDiv(self.p, self.D_actual)
        evalBL(self.p, self.E_actual)

        # evalCountDiff(self.p, self.C_actual)
        # evalCountGeoI(self.p, self.C_actual)
        # evalDivGeoI(self.p, self.D_actual)

    @unittest.skip
    def testLEParser(self):
        self.p.locs, self.p.users, self.p.locDict = readCheckins(self.p)
        # distribution_pdf(self.p.locs)
        distribution_pdf(self.p.users)
        self.p.users = samplingUsers(self.p.users, Params.MAX_M)
        distribution_pdf(self.p.users)
        entropyStats(self.p.locs)


        # self.p.maxC, self.p.maxM = otherStats(self.p.locs, self.p.users)

        # discretize
        # cells = cellStats(self.p)
        # entropyStats(cells)
        # self.p.maxC, self.p.maxM = otherStats(cells, transformDict(cells))
        # distribution_pdf(cells)
        # distribution_pdf(transformDict(cells))

    @unittest.skip
    def testLEStats(self):
        nx = range(1,100+1)
        C, eps, K = 2, 1.0, 50

        # Baseline sensitivity (max C)
        max_C = 100
        max_gs = globalSensitivy(max_C)
        max_gsy = [max_gs] * len(nx)

        # global sensitivity (limit C)
        gs = globalSensitivy(C)
        gsy = [gs] * len(nx)

        # smooth sensitivity
        ssy = [v * 2 for v in self.ss[CEps2Str(C, eps)][:100]]

        # local sensitivity
        K = 20
        ls = localSensitivity(C, K)
        lsy = [ls] * len(nx)

        # vary n (all bounds)
        ny = [max_gsy, gsy, ssy, lsy]
        markers = ["o", "-", "--", "+"]
        legends = ["Global (Max C)", "Global (Limit C)", "Smooth", "Local"]
        line_graph(nx, ny, markers, legends, "Number of users (n)", "Sensitivity")

        # vary C
        eps_list = [0.1, 0.4, 0.7, 1.0]
        c_list = range(1, 21)
        n = 100
        ss_list = [[self.ss[CEps2Str(c, eps)][n - 1] for c in c_list] for eps in eps_list]

        markers = ["o", "-", "--", "+", "x"]
        legends = ["Eps=" + str(eps) for eps in eps_list]
        line_graph(c_list, ss_list, markers, legends, "C", "Sensitivity")

        # vary n
        c = 10
        ss_list = [[self.ss[CEps2Str(c, eps)][n - 1] for n in nx] for eps in eps_list]
        line_graph(nx, ss_list, markers, legends, "Number of users (n)", "Sensitivity")

        # vary n & C
        c_list = [1, 10, 20]
        legends = ["C=" + str(c) for c in c_list]
        ss_list = [[self.ss[CEps2Str(c, eps)][n - 1] for n in nx] for c in c_list]
        line_graph(nx, ss_list, markers, legends, "Number of users (n)", "Sensitivity")

    @unittest.skip
    def testLEBounds(self):
        # precompute smooth sensitivity
        eps_list = [1.0]
        pool = Pool(processes=len(eps_list))
        pool.map(precomputeSmoothSensitivity, eps_list)
        pool.join()
        for eps in eps_list:
            precomputeSmoothSensitivity(eps)

    @unittest.skip
    def testMetrics(self):
        P = [1,2,3,4,5,6,7,8,9]
        Q = [1,2,4,8,7,6,5,8,9]
        self.assertEqual(True, abs(KLDivergence2(P, Q) - KLDiv(P, Q)) < 1e-6)

        true = [1,2,3,4,5,6,7,8,9]
        predicted = [1,2,3,4,5,6,7,8,9]
        self.assertEqual(1, DivCatScore(true, predicted))

    @unittest.skip
    def testDifferential(self):
        differ = Differential(1000)
        RTH = (34.020412, -118.289936)
        radius = 500.0  # default unit is meters
        eps = np.log(2)
        for i in range(100):
            (x, y) = differ.getPolarNoise(radius, eps)
            print (str(RTH[0] + x * Params.ONE_KM * 0.001) + ',' + str(RTH[1] + y * Params.ONE_KM*1.2833*0.001))

    @unittest.skip
    def testUtils(self):
        values1 = [1,2,3,4,5,6,7,8,9]
        values2 = [1, 2, 3, 9, 5, 6, 7, 8, 4]
        topVals1, topVals2 = topKValues(3, values1), topKValues(3, values2)
        indices1 = [t[1] for t in topVals1]
        indices2 = [t[1] for t in topVals2]
        self.assertEqual([8,7,6], indices1)
        self.assertEqual([3, 7, 6], indices2)
        self.assertEqual(2.0 / 3, metrics.precision_score(indices1, indices2, average="micro"))


    # @unittest.skip
    def test_filter_gowalla(self):
        filter_gowalla(self.p)
        # filter_yelp(self.p)

    @unittest.skip
    def testDataGen(self):
        SPARSE_N = int(self.p.MAX_N / 10)
        MEDIUM_N = int(self.p.MAX_N)
        DENSE_N = int(self.p.MAX_N * 10)

        np.random.seed(self.p.seed)
        # writeData(generateData(1e+3, SPARSE_N, Params.MAX_M, Params.MAX_C, 2), "../dataset/sparse.txt")
        # writeData(generateData(1e+3, MEDIUM_N, Params.MAX_M, Params.MAX_C, 2), "../dataset/medium.txt")
        writeData(generateData(1e+3, DENSE_N, Params.MAX_M, Params.MAX_C, 2), "../dataset/dense.txt")

        # readData("../dataset/sparse.txt")