def query_gen(queryShape, taskNo, seed, x1=-124.8193, y1=31.3322, x2=-103.0020, y2=49.0025): """Generate query around a random data point""" np.random.seed(seed) querylist = [] cell_size_x = (x2 - x1) / (2**Params.queryUnit[0]) cell_size_y = (y2 - y1) / (2**Params.queryUnit[1]) x_range, y_range = cell_size_x * 2**queryShape[ 0], cell_size_y * 2**queryShape[1] data = data_readin() ran_indices = np.random.randint(0, data.shape[1], taskNo) ran_points = data[:, ran_indices] x_low = ran_points[0, :] - x_range / 2 x_high = ran_points[0, :] + x_range / 2 y_low = ran_points[1, :] - y_range / 2 y_high = ran_points[1, :] + y_range / 2 for i in range(taskNo): query = [[max(x_low[i], x1), max(y_low[i], y1)], [min(x_high[i], x2), min(y_high[i], y2)]] querylist.append(np.array(query)) return querylist
def get(self, param_id): """ Update geocast parameters """ global datasets, tree, all_data global eps, percent, com_range, mar, arf, utl, heuristic, subcell, localness, constraint dataset = self.get_argument("dataset", default=Params.DATASET) eps = self.get_argument("eps", default=eps) percent = self.get_argument("percent", default=Params.PercentGrid) com_range = self.get_argument("range", default=Params.NETWORK_DIAMETER) # geocast parameters mar = self.get_argument("mar", default=Params.MAR) arf = self.get_argument("arf", default=Params.AR_FUNCTION) utl = self.get_argument("utl", default=Params.U) heuristic = self.get_argument("heuristic", default=Params.COST_FUNCTION) subcell = self.get_argument("subcell", default=Params.PARTIAL_CELL_SELECTION) localness = self.get_argument("localness", default=Params.CUSTOMIZED_GRANULARITY) constraint = self.get_argument("constraint", default=Params.CONSTRAINT_INFERENCE) Params.DATASET = dataset Params.Eps = float(eps) Params.PercentGrid = float(percent) Params.NETWORK_DIAMETER = float(com_range) / 1000.0 Params.MAR = float(mar) Params.AR_FUNCTION = arf Params.U = float(utl) Params.COST_FUNCTION = heuristic Params.PARTIAL_CELL_SELECTION = (subcell == "true" or subcell == True) Params.CUSTOMIZED_GRANULARITY = (localness == "true" or localness == True) Params.CONSTRAINT_INFERENCE = constraint == "true" print "Update parameters ... " print Params.DATASET, Params.Eps, Params.PercentGrid, Params.NETWORK_DIAMETER, Params.MAR, Params.AR_FUNCTION, Params.U, Params.COST_FUNCTION, Params.PARTIAL_CELL_SELECTION, Params.CUSTOMIZED_GRANULARITY # workerPSD parameters rebuild = self.get_argument("rebuild", default=0) rebuild = int(rebuild) if rebuild == 1: print "Reading data ... " + dataset data = data_readin() p = Params(1000) print "Creating WorkerPSD..." tree = Grid_adaptive(data, p) tree.buildIndex() bounds = np.array([[Params.x_min, Params.y_min], [Params.x_max, Params.y_max]]) all_data[dataset] = (tree, bounds, p.NDATA) print "Created WorkerPSD..." + dataset self.write( json.dumps({"status": "update successfully"}, sort_keys=True))
def generate_tasks(seed, time_instance): """ Generate a set of tasks per time instance""" data = data_readin() task_locs = task_locs_gen(Params.TASK_NO, seed, Params.x_min, Params.y_min, Params.x_max, Params.y_max) filename = "../dataset/taskworker/tasks" + str(time_instance) + ".txt" if os.path.exists(filename): os.remove(filename) for loc in task_locs: with open(filename, "a") as tasks: tasks.write(str(loc[0]) + ", " + str(loc[1]) + "\n")
def generate_workers(seed, time_instance): """ Generate a set of workers per time instance""" # global x_min, y_min, x_max, y_max data = data_readin() worker_locs = worker_locs_gen(data, Params.WorkerNo, seed, Params.x_min, Params.y_min, Params.x_max, Params.y_max) filename = "../dataset/taskworker/workers" + str(time_instance) + ".txt" if os.path.exists(filename): os.remove(filename) for loc in worker_locs: with open(filename, "a") as workers: workers.write(str(loc[0]) + ", " + str(loc[1]) + "\n")
def get(self): global tree, eps, all_data, datasets print "Reset data``" all_data = {} for dataset in datasets: Params.DATASET = dataset p = Params(1000) data = data_readin(p) eps = p.Eps tree = Grid_adaptiveM(data, 1, p) tree.buildIndex() bounds = np.array([[p.x_min, p.y_min], [p.x_max, p.y_max]]) all_data[dataset] = (tree, bounds, p.NDATA)
def get(self, param_id): """ Update geocast parameters """ global datasets, tree, all_data global eps, percent, com_range, mar, arf, utl, heuristic, subcell, localness, constraint dataset = self.get_argument("dataset", default=Params.DATASET) eps = self.get_argument("eps", default=eps) percent = self.get_argument("percent", default=Params.PercentGrid) com_range = self.get_argument("range", default=Params.NETWORK_DIAMETER) # geocast parameters mar = self.get_argument("mar", default=Params.MAR) arf = self.get_argument("arf", default=Params.AR_FUNCTION) utl = self.get_argument("utl", default=Params.U) heuristic = self.get_argument("heuristic", default=Params.COST_FUNCTION) subcell = self.get_argument("subcell", default=Params.PARTIAL_CELL_SELECTION) localness = self.get_argument("localness", default=Params.CUSTOMIZED_GRANULARITY) constraint = self.get_argument("constraint", default=Params.CONSTRAINT_INFERENCE) Params.DATASET = dataset Params.Eps = float(eps) Params.PercentGrid = float(percent) Params.NETWORK_DIAMETER = float(com_range) / 1000.0 Params.MAR = float(mar) Params.AR_FUNCTION = arf Params.U = float(utl) Params.COST_FUNCTION = heuristic Params.PARTIAL_CELL_SELECTION = (subcell == "true" or subcell == True) Params.CUSTOMIZED_GRANULARITY = (localness == "true" or localness == True) Params.CONSTRAINT_INFERENCE = constraint == "true" print "Update parameters ... " print Params.DATASET, Params.Eps, Params.PercentGrid, Params.NETWORK_DIAMETER, Params.MAR, Params.AR_FUNCTION, Params.U, Params.COST_FUNCTION, Params.PARTIAL_CELL_SELECTION, Params.CUSTOMIZED_GRANULARITY # workerPSD parameters rebuild = self.get_argument("rebuild", default=0) rebuild = int(rebuild) if rebuild == 1: print "Reading data ... " + dataset p = Params(1000) data = data_readin(p) print "Creating WorkerPSD..." tree = Grid_adaptiveM(data, 1, p) tree.buildIndex() bounds = np.array([[p.x_min, p.y_min], [p.x_max, p.y_max]]) all_data[dataset] = (tree, bounds, p.NDATA) print "Created WorkerPSD..." + dataset self.write( json.dumps({"status": "update successfully"}, sort_keys=True))
def initialize(self): global boundaries, datasets, MTDs, worker_counts print "dataset init" if len(boundaries) == 0: for i in range(len(datasets)): Params.DATASET = datasets[i] p = Params(1000) data = data_readin(p) p.select_dataset() MTDs.append(p.MTD) worker_counts.append(p.NDATA) boundaries.append( str(p.x_min) + "," + str(p.y_min) + "," + str(p.x_max) + "," + str(p.y_max)) """
def initialize(self): """ Hook for subclass initialization A dictionary passed as the third argument of a url spec will be supplied as keyword arguments to initialize(). """ global tree, eps, all_data, datasets if len(all_data) == 0: for dataset in datasets: Params.DATASET = dataset p = Params(1000) data = data_readin(p) eps = p.Eps tree = Grid_adaptiveM(data, 1, p) tree.buildIndex() bounds = np.array([[p.x_min, p.y_min], [p.x_max, p.y_max]]) all_data[dataset] = (tree, bounds, p.NDATA)
def query_init(x1=-124.8193, y1=31.3322, x2=-103.0020, y2=49.0025): """ Init a random query of some specific size within a rect [[x1,y1],[x2,y2]] """ x_range = (x2 - x1) * 2 ** initQueryShape[0] / (2 ** Params.queryUnit[0]) y_range = (y2 - y1) * 2 ** initQueryShape[1] / (2 ** Params.queryUnit[1]) data = data_readin() ran_indices = np.random.randint(0, data.shape[1], 1) ran_points = data[:, ran_indices] x_low = ran_points[0, :] - x_range / 2 x_high = ran_points[0, :] + x_range / 2 y_low = ran_points[1, :] - y_range / 2 y_high = ran_points[1, :] + y_range / 2 query = [[max(x_low[0], x1), max(y_low[0], y1)], [min(x_high[0], x2), min(y_high[0], y2)]] return np.array(query)
def query_init(x1=-124.8193, y1=31.3322, x2=-103.0020, y2=49.0025): """ Init a random query of some specific size within a rect [[x1,y1],[x2,y2]] """ x_range = (x2 - x1) * 2**initQueryShape[0] / (2**Params.queryUnit[0]) y_range = (y2 - y1) * 2**initQueryShape[1] / (2**Params.queryUnit[1]) data = data_readin() ran_indices = np.random.randint(0, data.shape[1], 1) ran_points = data[:, ran_indices] x_low = ran_points[0, :] - x_range / 2 x_high = ran_points[0, :] + x_range / 2 y_low = ran_points[1, :] - y_range / 2 y_high = ran_points[1, :] + y_range / 2 query = [[max(x_low[0], x1), max(y_low[0], y1)], [min(x_high[0], x2), min(y_high[0], y2)]] return np.array(query)
def query_gen(queryShape, taskNo, seed, x1=-124.8193, y1=31.3322, x2=-103.0020, y2=49.0025): """Generate query around a random data point""" np.random.seed(seed) querylist = [] cell_size_x = (x2 - x1) / (2 ** Params.queryUnit[0]) cell_size_y = (y2 - y1) / (2 ** Params.queryUnit[1]) x_range, y_range = cell_size_x * 2 ** queryShape[0], cell_size_y * 2 ** queryShape[1] data = data_readin() ran_indices = np.random.randint(0, data.shape[1], taskNo) ran_points = data[:, ran_indices] x_low = ran_points[0, :] - x_range / 2 x_high = ran_points[0, :] + x_range / 2 y_low = ran_points[1, :] - y_range / 2 y_high = ran_points[1, :] + y_range / 2 for i in range(taskNo): query = [[max(x_low[i], x1), max(y_low[i], y1)], [min(x_high[i], x2), min(y_high[i], y2)]] querylist.append(np.array(query)) return querylist
def post(self): global all_data, datasets, datasets2, boundaries, MTDs, worker_counts, all_datafiles, pearson_skewness, areas, spearman_skewness fileinfo = self.request.files['dataset'][0] print "fileinfo is", fileinfo fname = fileinfo['filename'] fname = os.path.splitext(fname)[0] # cname = str(uuid.uuid4()) + extn cname = fname fh = open(__UPLOADS__ + cname, 'w') fh.write(fileinfo['body']) # update variables datasets.append(fname) datasets2.append(fname) all_datafiles[fname] = fname Params.DATASET = fname + '.dat' data = data_readin() p = Params(1000) eps = p.Eps tree = Grid_adaptive(data, p) tree.buildIndex() bounds = np.array([[Params.LOW[0], Params.LOW[1]], [Params.HIGH[0], Params.HIGH[1]]]) MTDs.append(Params.MTD) worker_counts.append(p.NDATA) pearson_skewness.append(0) areas.append(0) spearman_skewness.append(0) boundaries.append( str(Params.LOW[0]) + "," + str(Params.LOW[1]) + "," + str(Params.HIGH[0]) + "," + str(Params.HIGH[1])) all_data[fname] = (tree, bounds, data) self.finish(Params.DATASET + " is uploaded to the server. Its PSD is constructed.")
def post(self): global all_data, datasets, datasets2, boundaries, MTDs, worker_counts, all_datafiles, pearson_skewness, areas, spearman_skewness fileinfo = self.request.files['dataset'][0] print "fileinfo is", fileinfo fname = fileinfo['filename'] fname = os.path.splitext(fname)[0] # cname = str(uuid.uuid4()) + extn cname = fname fh = open(__UPLOADS__ + cname, 'w') fh.write(fileinfo['body']) # update variables datasets.append(fname) datasets2.append(fname) all_datafiles[fname] = fname Params.DATASET = fname + '.dat' p = Params(1000) data = data_readin(p) eps = p.Eps tree = Grid_adaptiveM(data, 1, p) tree.buildIndex() bounds = np.array([[Params.LOW[0], Params.LOW[1]], [Params.HIGH[0], Params.HIGH[1]]]) MTDs.append(Params.MTD) worker_counts.append(p.NDATA) pearson_skewness.append(0) areas.append(0) spearman_skewness.append(0) boundaries.append( str(Params.LOW[0]) + "," + str(Params.LOW[1]) + "," + str(Params.HIGH[0]) + "," + str(Params.HIGH[1])) all_data[fname] = (tree, bounds, data) self.finish(Params.DATASET + " is uploaded to the server. Its PSD is constructed.")
if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG, filename='../log/debug.log') logging.info( time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime()) + " START") # eps_list = [0.001, 0.004, 0.007, 0.01] # dataset_list = ['yelp', 'foursquare', 'gowallasf', 'gowallala'] eps_list = [0.05, 0.45] dataset_list = ['gowallasf'] for dataset in dataset_list: for eps in eps_list: param = Params(1000) all_workers = data_readin(param) param.NDIM, param.NDATA = all_workers.shape[0], all_workers.shape[ 1] param.LOW, param.HIGH = np.amin(all_workers, axis=1), np.amax(all_workers, axis=1) param.DATASET = dataset param.select_dataset() param.Eps = eps param.debug() path_data = getPathData(all_workers, param) # max_count = 0 # for data in path_data:
if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG, filename='../log/debug.log') logging.info(time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime()) + " START") # eps_list = [0.001, 0.004, 0.007, 0.01] # dataset_list = ['yelp', 'foursquare', 'gowallasf', 'gowallala'] eps_list = [0.05, 0.45] dataset_list = ['gowallasf'] for dataset in dataset_list: for eps in eps_list: param = Params(1000) all_workers = data_readin(param) param.NDIM, param.NDATA = all_workers.shape[0], all_workers.shape[1] param.LOW, param.HIGH = np.amin(all_workers, axis=1), np.amax(all_workers, axis=1) param.DATASET = dataset param.select_dataset() param.Eps = eps param.debug() path_data = getPathData(all_workers, param) # max_count = 0 # for data in path_data: # if data[1] > max_count: # max_count = data[1]