def distribute_pois_in_queries(dims, nq, npq, seed=None): # rnd = RandomState() if seed is not None: rnd = RandomState(seed) # Compute number of POIs per zone. P = nq * npq s = rnd.zipf(a=2., size=P) ppz = sorted([len(list(v)) for _, v in itertools.groupby(sorted(s))]) nz = len(ppz) # Divide graph into zones. nz_, np1, np2, zones = divide_grid_graph(dims, nz) # Merge two zones if needed. zones_ = dict(zones) if nz != nz_: zones_ = merge_two_zones(zones, np1, np2, seed=seed) # Assign number of POIs to each zone. npz = dict() if "m" in zones_: # Two zones were merged. npz["m"] = ppz[ -1] # Assign to zone "m" the largest number of POIs which is at the last position. zone_keys = [k for k in zones_ if k != "m"] else: zone_keys = list(zones_.keys()) rnd.shuffle(zone_keys) for i, k in enumerate(zone_keys): npz[k] = ppz[i] # Locate POIs within each zone. qpp = assign_query_to_poi(nq, npq, seed=seed) ppq = dict() w = 0 for k, nodes in zones_.iteritems(): pois = rnd.choice(a=nodes, size=npz[k], replace=False) # Assign which query each POI belongs to. ass = zip(qpp[w:w + npz[k]], pois) for q, p in ass: try: ppq[q].append(p) except KeyError: ppq[q] = [p] w += npz[k] return ppq
def zipf(seed, size): state = RandomState(seed) for _ in range(size): yield state.zipf(1.6)