Example #1
0
def distribute_pois_in_queries(dims, nq, npq, seed=None):
    #
    rnd = RandomState()
    if seed is not None:
        rnd = RandomState(seed)
    # Compute number of POIs per zone.
    P = nq * npq
    s = rnd.zipf(a=2., size=P)
    ppz = sorted([len(list(v)) for _, v in itertools.groupby(sorted(s))])
    nz = len(ppz)
    # Divide graph into zones.
    nz_, np1, np2, zones = divide_grid_graph(dims, nz)
    # Merge two zones if needed.
    zones_ = dict(zones)
    if nz != nz_:
        zones_ = merge_two_zones(zones, np1, np2, seed=seed)
    # Assign number of POIs to each zone.
    npz = dict()
    if "m" in zones_:  # Two zones were merged.
        npz["m"] = ppz[
            -1]  # Assign to zone "m" the largest number of POIs which is at the last position.
        zone_keys = [k for k in zones_ if k != "m"]
    else:
        zone_keys = list(zones_.keys())
    rnd.shuffle(zone_keys)
    for i, k in enumerate(zone_keys):
        npz[k] = ppz[i]
    # Locate POIs within each zone.
    qpp = assign_query_to_poi(nq, npq, seed=seed)
    ppq = dict()
    w = 0
    for k, nodes in zones_.iteritems():
        pois = rnd.choice(a=nodes, size=npz[k], replace=False)
        # Assign which query each POI belongs to.
        ass = zip(qpp[w:w + npz[k]], pois)
        for q, p in ass:
            try:
                ppq[q].append(p)
            except KeyError:
                ppq[q] = [p]
        w += npz[k]
    return ppq
Example #2
0
def zipf(seed, size):
    state = RandomState(seed)

    for _ in range(size):
        yield state.zipf(1.6)