def optimal_median_cache_placement(topology,
                                   cache_budget,
                                   n_cache_nodes,
                                   hit_ratio,
                                   weight='delay',
                                   **kwargs):
    """Deploy caching nodes in locations that minimize overall latency assuming
    a partitioned strategy (a la Google Global Cache). According to this, in
    the network, a set of caching nodes are deployed and each receiver is
    mapped to one and only one caching node. Requests from this receiver are
    always sent to the designated caching node. In case of cache miss requests
    are forwarded to the original source.

    This placement problem can be mapped to the p-median location-allocation
    problem. This function solves this problem using the vertex substitution
    heuristic, which practically works like the k-medoid PAM algorithms, which
    is also similar to the k-means clustering algorithm. The result is not
    guaranteed to be globally optimal, only locally optimal.

    Notes
    -----
    This placement assumes that all receivers have degree = 1 and are connected
    to an ICR candidate nodes. Also, it assumes that contents are uniformly
    assigned to sources.

    Parameters
    ----------
    topology : Topology
        The topology object
    cache_budget : int
        The cumulative cache budget
    n_nodes : int
        The number of caching nodes to deploy
    hit_ratio : float
        The expected cache hit ratio of a single cache
    weight : str
        The weight attribute
    """
    n_cache_nodes = int(n_cache_nodes)
    icr_candidates = topology.graph['icr_candidates']
    if len(icr_candidates) < n_cache_nodes:
        raise ValueError("The number of ICR candidates (%d) is lower than "
                         "the target number of caches (%d)" %
                         (len(icr_candidates), n_cache_nodes))
    elif len(icr_candidates) == n_cache_nodes:
        caches = list(icr_candidates)
        cache_assignment = {
            v: list(topology.edge[v].keys())[0]
            for v in topology.receivers()
        }
    else:
        # Need to optimally allocate caching nodes
        distances = nx.all_pairs_dijkstra_path_length(topology, weight=weight)
        sources = topology.sources()
        d = {u: {} for u in icr_candidates}
        for u in icr_candidates:
            source_dist = sum(distances[u][source]
                              for source in sources) / len(sources)
            for v in icr_candidates:
                if v in d[u]:
                    d[v][u] = d[u][v]
                else:
                    d[v][u] = distances[v][u] + (hit_ratio * source_dist)
        allocation, caches, _ = compute_p_median(distances, n_cache_nodes)
        cache_assignment = {
            v: allocation[list(topology.edge[v].keys())[0]]
            for v in topology.receivers()
        }

    cache_size = iround(cache_budget / n_cache_nodes)
    if cache_size == 0:
        raise ValueError(
            "Cache budget is %d but it's too small to deploy it on %d nodes. "
            "Each node will have a zero-sized cache. "
            "Set a larger cache budget and try again" %
            (cache_budget, n_cache_nodes))
    for v in caches:
        topology.node[v]['stack'][1]['cache_size'] = cache_size
    topology.graph['cache_assignment'] = cache_assignment
Beispiel #2
0
def optimal_median_cache_placement(topology, cache_budget, n_cache_nodes,
                                   hit_ratio, weight='delay', **kwargs):
    """Deploy caching nodes in locations that minimize overall latency assuming
    a partitioned strategy (a la Google Global Cache). According to this, in
    the network, a set of caching nodes are deployed and each receiver is
    mapped to one and only one caching node. Requests from this receiver are
    always sent to the designated caching node. In case of cache miss requests
    are forwarded to the original source.

    This placement problem can be mapped to the p-median location-allocation
    problem. This function solves this problem using the vertex substitution
    heuristic, which practically works like the k-medoid PAM algorithms, which
    is also similar to the k-means clustering algorithm. The result is not
    guaranteed to be globally optimal, only locally optimal.

    Notes
    -----
    This placement assumes that all receivers have degree = 1 and are connected
    to an ICR candidate nodes. Also, it assumes that contents are uniformly
    assigned to sources.

    Parameters
    ----------
    topology : Topology
        The topology object
    cache_budget : int
        The cumulative cache budget
    n_nodes : int
        The number of caching nodes to deploy
    hit_ratio : float
        The expected cache hit ratio of a single cache
    weight : str
        The weight attribute
    """
    n_cache_nodes = int(n_cache_nodes)
    icr_candidates = topology.graph['icr_candidates']
    if len(icr_candidates) < n_cache_nodes:
        raise ValueError("The number of ICR candidates (%d) is lower than "
                         "the target number of caches (%d)"
                         % (len(icr_candidates), n_cache_nodes))
    elif len(icr_candidates) == n_cache_nodes:
        caches = list(icr_candidates)
        cache_assignment = {v: list(topology.adj[v].keys())[0]
                            for v in topology.receivers()}
    else:
        # Need to optimally allocate caching nodes
        distances = dict(nx.all_pairs_dijkstra_path_length(topology, weight=weight))
        sources = topology.sources()
        d = {u: {} for u in icr_candidates}
        for u in icr_candidates:
            source_dist = sum(distances[u][source] for source in sources) / len(sources)
            for v in icr_candidates:
                if v in d[u]:
                    d[v][u] = d[u][v]
                else:
                    d[v][u] = distances[v][u] + (hit_ratio * source_dist)
        allocation, caches, _ = compute_p_median(distances, n_cache_nodes)
        cache_assignment = {v: allocation[list(topology.adj[v].keys())[0]]
                            for v in topology.receivers()}

    cache_size = iround(cache_budget / n_cache_nodes)
    if cache_size == 0:
        raise ValueError("Cache budget is %d but it's too small to deploy it on %d nodes. "
                         "Each node will have a zero-sized cache. "
                         "Set a larger cache budget and try again"
                         % (cache_budget, n_cache_nodes))
    for v in caches:
        topology.node[v]['stack'][1]['cache_size'] = cache_size
    topology.graph['cache_assignment'] = cache_assignment