def __init__(self, topology, reqs_file, contents_file, beta=0, **kwargs): """Constructor""" if beta < 0: raise ValueError("beta must be positive") self.receivers = [ v for v in topology.nodes() if topology.node[v]["stack"][0] == "receiver" ] self.n_contents = 0 with open(contents_file) as f: reader = csv.reader(f, delimiter="\t") for content, popularity, size, app_type in reader: self.n_contents = max(self.n_contents, content) self.n_contents += 1 self.contents = range(self.n_contents) self.request_file = reqs_file self.beta = beta if beta != 0: degree = nx.degree(self.topology) self.receivers = sorted( self.receivers, key=lambda x: degree[iter(topology.adj[x]).next()], reverse=True, ) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers))
def __init__(self, topology, reqs_file, contents_file, n_contents, n_warmup, n_measured, rate=1.0, beta=0, **kwargs): """Constructor""" if beta < 0: raise ValueError('beta must be positive') # Set high buffering to avoid one-line reads self.buffering = 64 * 1024 * 1024 self.n_contents = n_contents self.n_warmup = n_warmup self.n_measured = n_measured self.reqs_file = reqs_file self.rate = rate self.receivers = [v for v in topology.nodes_iter() if topology.node[v]['stack'][0] == 'receiver'] self.contents = [] with open(contents_file, 'r', buffering=self.buffering) as f: for content in f: self.contents.append(content) self.beta = beta if beta != 0: degree = nx.degree(topology) self.receivers = sorted(self.receivers, key=lambda x: degree[iter(topology.edge[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers))
def __init__(self, workload, n_contents, n_warmup, n_measured, alpha=0.99, seed=None, **kwargs): """Constructor Parameters ---------- workload : str Workload identifier. Currently supported: "A", "B", "C" n_contents : int Number of content items n_warmup : int, optional The number of warmup requests (i.e. requests executed to fill cache but not logged) n_measured : int, optional The number of logged requests after the warmup alpha : float, optional Parameter of Zipf distribution seed : int, optional The seed for the random generator """ if workload not in ("A", "B", "C", "D", "E"): raise ValueError("Incorrect workload ID [A-B-C-D-E]") elif workload in ("D", "E"): raise NotImplementedError("Workloads D and E not yet implemented") self.workload = workload if seed is not None: random.seed(seed) self.zipf = TruncatedZipfDist(alpha, n_contents) self.n_warmup = n_warmup self.n_measured = n_measured
def __init__(self, topology, n_contents, alpha, beta=0, rate=1.0, n_warmup=10**5, n_measured=4 * 10**5, seed=None, **kwargs): if alpha < 0: raise ValueError('alpha must be positive') if beta < 0: raise ValueError('beta must be positive') self.receivers = [ v for v in topology.nodes_iter() if topology.node[v]['stack'][0] == 'receiver' ] self.zipf = TruncatedZipfDist(alpha, n_contents) self.n_contents = n_contents self.contents = range(1, n_contents + 1) self.alpha = alpha self.rate = rate self.n_warmup = n_warmup self.n_measured = n_measured random.seed(seed) self.beta = beta if beta != 0: degree = nx.degree(self.topology) self.receivers = sorted( self.receivers, key=lambda x: degree[iter(topology.edge[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers))
def __init__( self, topology, n_contents, alpha, beta=0, rate=1.0, n_warmup=10 ** 5, n_measured=4 * 10 ** 5, seed=None, **kwargs ): if alpha < 0: raise ValueError("alpha must be positive") if beta < 0: raise ValueError("beta must be positive") self.receivers = [v for v in topology.nodes_iter() if topology.node[v]["stack"][0] == "receiver"] self.zipf = TruncatedZipfDist(alpha, n_contents) self.n_contents = n_contents self.contents = range(1, n_contents + 1) self.alpha = alpha self.rate = rate self.n_warmup = n_warmup self.n_measured = n_measured random.seed(seed) self.beta = beta if beta != 0: degree = nx.degree(self.topology) self.receivers = sorted(self.receivers, key=lambda x: degree[iter(topology.edge[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers))
def __init__(self, topology, n_contents, n_rank, rank_per_group, alpha, beta=0, rate=1.0, n_warmup=10**5, n_measured=4*10**5, seed=None, **kwargs): if alpha < 0: raise ValueError('alpha must be positive') if beta < 0: raise ValueError('beta must be positive') self.receivers = [v for v in topology.nodes_iter() if topology.node[v]['stack'][0] == 'receiver'] self.topology = topology rank_lst = array.array('i',(i for i in range(1,(n_rank+1)))) #differentiate requests distribution inter groups, each group has $rank_per_group distributions. # when num_of_group>N_NODE, multiple groups share a same workload for v in self.receivers: g = self.topology.node[v]['group'] self.topology.node[v]['rank'] = random.choice(array.array('i',(i for i in range(int(rank_per_group*g-rank_per_group+1),int(math.ceil(rank_per_group*g+1)))))) self.n_contents = n_contents self.contents_range = int(n_contents * 32) self.contents = range(1, self.contents_range + 1) self.zipf = TruncatedZipfDist(alpha, self.n_contents) self.n_rank = int(n_rank) self.alpha = alpha self.rate = rate self.n_warmup = n_warmup self.n_measured = n_measured random.seed(seed) self.beta = beta if beta != 0: degree = nx.degree(self.topology) self.receivers = sorted(self.receivers, key=lambda x: degree[iter(topology.edge[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers))
def uniform_req_gen(topology, n_contents, alpha, rate=12.0, n_warmup=10**5, n_measured=4 * 10**5, seed=None): """This function generates events on the fly, i.e. instead of creating an event schedule to be kept in memory, returns an iterator that generates events when needed. This is useful for running large schedules of events where RAM is limited as its memory impact is considerably lower. These requests are Poisson-distributed while content popularity is Zipf-distributed Parameters ---------- topology : fnss.Topology The topology to which the workload refers n_contents : int The number of content object alpha : float The Zipf alpha parameter rate : float The mean rate of requests per second n_warmup : int The number of warmup requests (i.e. requests executed to fill cache but not logged) n_measured : int The number of logged requests after the warmup Returns ------- events : iterator Iterator of events. Each event is a 2-tuple where the first element is the timestamp at which the event occurs and the second element is a dictionary of event attributes. """ receivers = [ v for v in topology.nodes_iter() if topology.node[v]['stack'][0] == 'receiver' ] zipf = TruncatedZipfDist(alpha, n_contents) random.seed(seed) req_counter = 0 t_event = 0.0 while req_counter < n_warmup + n_measured: t_event += (random.expovariate(rate)) receiver = random.choice(receivers) content = int(zipf.rv()) log = (req_counter >= n_warmup) event = {'receiver': receiver, 'content': content, 'log': log} yield (t_event, event) req_counter += 1 raise StopIteration()
class DiffrankWorkload(object): #different rankings with same alpha def __init__(self, topology, n_contents, n_rank, rank_per_group, alpha, beta=0, rate=1.0, n_warmup=10**5, n_measured=4*10**5, seed=None, **kwargs): if alpha < 0: raise ValueError('alpha must be positive') if beta < 0: raise ValueError('beta must be positive') self.receivers = [v for v in topology.nodes_iter() if topology.node[v]['stack'][0] == 'receiver'] self.topology = topology rank_lst = array.array('i',(i for i in range(1,(n_rank+1)))) #differentiate requests distribution inter groups, each group has $rank_per_group distributions. # when num_of_group>N_NODE, multiple groups share a same workload for v in self.receivers: g = self.topology.node[v]['group'] self.topology.node[v]['rank'] = random.choice(array.array('i',(i for i in range(int(rank_per_group*g-rank_per_group+1),int(math.ceil(rank_per_group*g+1)))))) self.n_contents = n_contents self.contents_range = int(n_contents * 32) self.contents = range(1, self.contents_range + 1) self.zipf = TruncatedZipfDist(alpha, self.n_contents) self.n_rank = int(n_rank) self.alpha = alpha self.rate = rate self.n_warmup = n_warmup self.n_measured = n_measured random.seed(seed) self.beta = beta if beta != 0: degree = nx.degree(self.topology) self.receivers = sorted(self.receivers, key=lambda x: degree[iter(topology.edge[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers)) def __iter__(self): req_counter = 0 t_event = 0.0 while req_counter < self.n_warmup + self.n_measured: t_event += (random.expovariate(self.rate)) if self.beta == 0: receiver = random.choice(self.receivers) else: receiver = self.receivers[self.receiver_dist.rv()-1] self.receiver = receiver rank_receiver = int(self.topology.node[self.receiver]['rank']-1) content = int(self.zipf.rv()) + self.n_contents * rank_receiver #print ("content:%d, self.n_contents:%d, rank_receiver:%d") % (content, self.n_contents, rank_receiver) log = (req_counter >= self.n_warmup) event = {'receiver': receiver, 'content': content, 'log': log} yield (t_event, event) req_counter += 1 raise StopIteration()
def __init__(self, topology, n_contents, alpha, beta=0, rates=[0], rate_dist=[0], n_warmup=10**5, n_measured=4 * 10**5, seed=0, n_services=10, **kwargs): if alpha < 0: raise ValueError('alpha must be positive') if beta < 0: raise ValueError('beta must be positive') self.receivers = [ v for v in topology.nodes_iter() if topology.node[v]['stack'][0] == 'receiver' ] #self.zipf = TruncatedZipfDist(alpha, n_services-1, seed) self.num_classes = topology.graph['n_classes'] #self.zipf = TruncatedZipfDist(alpha, self.num_classes-1, seed) self.n_contents = n_contents self.contents = range(0, n_contents) self.n_services = n_services self.alpha = alpha self.rates = rates self.n_edgeRouters = topology.graph['n_edgeRouters'] self.n_warmup = n_warmup self.n_measured = n_measured self.model = None self.beta = beta self.topology = topology self.rate_cum_dist = [0.0] * self.num_classes print "rate_dist= ", rate_dist, "\n" print "Number of classes: " + repr(self.num_classes) for c in range(self.num_classes): for k in range(0, c + 1): self.rate_cum_dist[c] += rate_dist[k] print "Cumulative dist: " + repr(self.rate_cum_dist) if beta != 0: degree = nx.degree(self.topology) self.receivers = sorted( self.receivers, key=lambda x: degree[iter(topology.edge[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers), seed) self.seed = seed self.first = True
def uniform_req_gen(topology, n_contents, alpha, rate=12.0, n_warmup=10 ** 5, n_measured=4 * 10 ** 5, seed=None): """This function generates events on the fly, i.e. instead of creating an event schedule to be kept in memory, returns an iterator that generates events when needed. This is useful for running large schedules of events where RAM is limited as its memory impact is considerably lower. These requests are Poisson-distributed while content popularity is Zipf-distributed Parameters ---------- topology : fnss.Topology The topology to which the workload refers n_contents : int The number of content object alpha : float The Zipf alpha parameter rate : float The mean rate of requests per second n_warmup : int The number of warmup requests (i.e. requests executed to fill cache but not logged) n_measured : int The number of logged requests after the warmup Returns ------- events : iterator Iterator of events. Each event is a 2-tuple where the first element is the timestamp at which the event occurs and the second element is a dictionary of event attributes. """ receivers = [v for v in topology.nodes_iter() if topology.node[v]['stack'][0] == 'receiver'] zipf = TruncatedZipfDist(alpha, n_contents) random.seed(seed) req_counter = 0 t_event = 0.0 while req_counter < n_warmup + n_measured: t_event += (random.expovariate(rate)) receiver = random.choice(receivers) content = int(zipf.rv()) log = (req_counter >= n_warmup) event = {'receiver': receiver, 'content': content, 'log': log} yield (t_event, event) req_counter += 1 raise StopIteration()
def laoutaris_cache_hit_ratio(alpha, population, cache_size, order=3): """Estimate the cache hit ratio of an LRU cache under general power-law demand using the Laoutaris approximation. Parameters ---------- alpha : float The coefficient of the demand power-law distribution population : int The content population cache_size : int The cache size order : int, optional The order of the Taylor expansion. Supports only 2 and 3 Returns ------- cache_hit_ratio : float The cache hit ratio References ---------- http://arxiv.org/pdf/0705.1970.pdf """ pdf = TruncatedZipfDist(alpha, population).pdf r = laoutaris_characteristic_time(alpha, population, cache_size, order) return np.sum(pdf * (1 - math.e**-(r * pdf)))
def __init__(self, topology, reqs_file, contents_file, n_contents, n_warmup, n_measured, rate=1.0, beta=0, **kwargs): """Constructor""" if beta < 0: raise ValueError('beta must be positive') # Set high buffering to avoid one-line reads self.buffering = 64 * 1024 * 1024 self.n_contents = n_contents self.n_warmup = n_warmup self.n_measured = n_measured self.reqs_file = reqs_file self.rate = rate self.receivers = [v for v in topology.nodes() if topology.node[v]['stack'][0] == 'receiver'] self.contents = [] with open(contents_file, 'r', buffering=self.buffering) as f: for content in f: self.contents.append(content) self.beta = beta if beta != 0: degree = nx.degree(topology) self.receivers = sorted(self.receivers, key=lambda x: degree[iter(topology.adj[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers))
def __iter__(self): self.local_contents = list(topology.graph['internal_contents']) self.remote_contents = list(topology.graph['edge_contents']) self.local_receivers = topology.graph['internal_receivers'] self.remote_receivers = topology.graph['edge_receivers'] self.zipf_local = TruncatedZipfDist(alpha, len(self.local_contents)) self.zipf_remote = TruncatedZipfDist(alpha, len(self.transit_contents)) req_counter = 0 t_event = 0.0 while req_counter < self.n_warmup + self.n_measured: t_event += (random.expovariate(self.rate)) x = random.random() content = -1 receiver = -1 if x < self.transit: # transit traffic receiver = random.choice(self.remote_receivers) indx = int(self.zipf_remote.rv()) content = self.remote_contents[indx] elif x < self.transit + self.local: # local traffic receiver = random.choice(self.local_receivers) indx = int(self.zipf_local.rv()) content = self.local_contents[indx] elif x < self.transit + self.local + self.ingress: # ingress traffic receiver = random.choice(self.remote_receivers) indx = int(self.zipf_local.rv()) content = self.local_contents[indx] else: # egress traffic receiver = random.choice(self.local_receivers) indx = int(self.zipf_remote.rv()) content = self.remote_contents[indx] #if self.beta == 0: # receiver = random.choice(self.receivers) #else: # receiver = self.receivers[self.receiver_dist.rv() - 1] #content = int(self.zipf.rv()) log = (req_counter >= self.n_warmup) event = {'receiver': receiver, 'content': content, 'log': log} yield (t_event, event) req_counter += 1 raise StopIteration()
def test_expected_fit(self): """Test that the Zipf fit function correctly estimates the alpha parameter of a known Zipf distribution""" alpha_tolerance = 0.02 # Tolerated alpha estimation error p_min = 0.99 # Min p n = 1000 # Number of Zipf distribution items alpha = np.arange(0.2, 5.0, 0.1) # Tested range of Zipf's alpha for a in alpha: z = TruncatedZipfDist(a, n) est_a, p = traces.zipf_fit(z.pdf) self.assertLessEqual(np.abs(a - est_a), alpha_tolerance) self.assertGreaterEqual(p, p_min)
def test_expected_fit_not_sorted(self): """Test that the Zipf fit function correctly estimates the alpha parameter of a known Zipf distribution""" alpha_tolerance = 0.02 # Tolerated alpha estimation error p_min = 0.99 # Min p n = 1000 # Number of Zipf distribution items alpha = np.arange(0.2, 5.0, 0.1) # Tested range of Zipf's alpha for a in alpha: pdf = TruncatedZipfDist(a, n).pdf np.random.shuffle(pdf) est_a, p = traces.zipf_fit(pdf, need_sorting=True) assert np.abs(a - est_a) <= alpha_tolerance assert p >= p_min
def zipf_fit(obs_freqs, need_sorting=False): """Returns the value of the Zipf's distribution alpha parameter that best fits the data provided and the p-value of the fit test. Parameters ---------- obs_freqs : array The array of observed frequencies sorted in descending order need_sorting : bool, optional If True, indicates that obs_freqs is not sorted and this function will sort it. If False, assume that the array is already sorted Returns ------- alpha : float The alpha parameter of the best Zipf fit p : float The p-value of the test Notes ----- This function uses the method described in http://stats.stackexchange.com/questions/6780/how-to-calculate-zipfs-law-coefficient-from-a-set-of-top-frequencies """ try: from scipy.optimize import minimize_scalar except ImportError: raise ImportError("Cannot import scipy.optimize minimize_scalar. " "You either don't have scipy install or you have a " "version too old (required 0.12 onwards)") obs_freqs = np.asarray(obs_freqs) if need_sorting: # Sort in descending order obs_freqs = -np.sort(-obs_freqs) n = len(obs_freqs) def log_likelihood(alpha): return np.sum(obs_freqs * (alpha * np.log(np.arange(1.0, n + 1)) + math.log(sum(1.0 / np.arange(1.0, n + 1)**alpha)))) # Find optimal alpha alpha = minimize_scalar(log_likelihood)['x'] # Calculate goodness of fit if alpha <= 0: # Silently report a zero probability of a fit return alpha, 0 exp_freqs = np.sum(obs_freqs) * TruncatedZipfDist(alpha, n).pdf p = chisquare(obs_freqs, exp_freqs)[1] return alpha, p
def __init__(self, topology, reqs_file, contents_file, beta=0, **kwargs): """Constructor""" if beta < 0: raise ValueError("beta must be positive") self.receivers = [v for v in topology.nodes_iter() if topology.node[v]["stack"][0] == "receiver"] self.n_contents = 0 with open(contents_file, "r") as f: reader = csv.reader(f, delimiter="\t") for content, popularity, size, app_type in reader: self.n_contents = max(self.n_contents, content) self.n_contents += 1 self.contents = range(self.n_contents) self.request_file = reqs_file self.beta = beta if beta != 0: degree = nx.degree(self.topology) self.receivers = sorted(self.receivers, key=lambda x: degree[iter(topology.edge[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers))
def laoutaris_per_content_cache_hit_ratio(alpha, population, cache_size, order=3, target=None): """Estimates the per-content cache hit ratio of an LRU cache under general power-law demand using the Laoutaris approximation. Parameters ---------- alpha : float The coefficient of the demand power-law distribution population : int The content population cache_size : int The cache size order : int, optional The order of the Taylor expansion. Supports only 2 and 3 target : int, optional The item index [1,N] for which cache hit ratio is requested. If not specified, the function calculates the cache hit ratio of all the items in the population. Returns ------- cache_hit_ratio : array of float or float If target is None, returns an array with the cache hit ratios of all items in the population. If a target is specified, then it returns the cache hit ratio of only the specified item. References ---------- http://arxiv.org/pdf/0705.1970.pdf """ pdf = TruncatedZipfDist(alpha, population).pdf r = laoutaris_characteristic_time(alpha, population, cache_size, order) items = range(len(pdf)) if target is None else [target - 1] hit_ratio = [1 - math.exp(-pdf[i] * r) for i in items] return hit_ratio if target is None else hit_ratio[0]
class TraceDrivenWorkload(object): """Parse requests from a generic request trace. This workload requires two text files: * a requests file, where each line corresponds to a string identifying the content requested * a contents file, which lists all unique content identifiers appearing in the requests file. Since the trace do not provide timestamps, requests are scheduled according to a Poisson process of rate *rate*. All requests are mapped to receivers uniformly unless a positive *beta* parameter is specified. If a *beta* parameter is specified, then receivers issue requests at different rates. The algorithm used to determine the requests rates for each receiver is the following: * All receiver are sorted in decreasing order of degree of the PoP they are attached to. This assumes that all receivers have degree = 1 and are attached to a node with degree > 1 * Rates are then assigned following a Zipf distribution of coefficient beta where nodes with higher-degree PoPs have a higher request rate Parameters ---------- topology : fnss.Topology The topology to which the workload refers reqs_file : str The path to the requests file contents_file : str The path to the contents file n_contents : int The number of content object (i.e. the number of lines of contents_file) n_warmup : int The number of warmup requests (i.e. requests executed to fill cache but not logged) n_measured : int The number of logged requests after the warmup rate : float, optional The network-wide mean rate of requests per second beta : float, optional Spatial skewness of requests rates Returns ------- events : iterator Iterator of events. Each event is a 2-tuple where the first element is the timestamp at which the event occurs and the second element is a dictionary of event attributes. """ def __init__(self, topology, reqs_file, contents_file, n_contents, n_warmup, n_measured, rate=1.0, beta=0, **kwargs): """Constructor""" if beta < 0: raise ValueError('beta must be positive') # Set high buffering to avoid one-line reads self.buffering = 64 * 1024 * 1024 self.n_contents = n_contents self.n_warmup = n_warmup self.n_measured = n_measured self.reqs_file = reqs_file self.rate = rate self.receivers = [ v for v in topology.nodes_iter() if topology.node[v]['stack'][0] == 'receiver' ] self.contents = [] with open(contents_file, 'r', buffering=self.buffering) as f: for content in f: self.contents.append(content) self.beta = beta if beta != 0: degree = nx.degree(topology) self.receivers = sorted( self.receivers, key=lambda x: degree[iter(topology.edge[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers)) def __iter__(self): req_counter = 0 t_event = 0.0 with open(self.reqs_file, 'r', buffering=self.buffering) as f: for content in f: t_event += (random.expovariate(self.rate)) if self.beta == 0: receiver = random.choice(self.receivers) else: receiver = self.receivers[self.receiver_dist.rv() - 1] log = (req_counter >= self.n_warmup) event = {'receiver': receiver, 'content': content, 'log': log} yield (t_event, event) req_counter += 1 if (req_counter >= self.n_warmup + self.n_measured): raise StopIteration() raise ValueError("Trace did not contain enough requests")
class GlobetraffWorkload(object): """Parse requests from GlobeTraff workload generator All requests are mapped to receivers uniformly unless a positive *beta* parameter is specified. If a *beta* parameter is specified, then receivers issue requests at different rates. The algorithm used to determine the requests rates for each receiver is the following: * All receiver are sorted in decreasing order of degree of the PoP they are attached to. This assumes that all receivers have degree = 1 and are attached to a node with degree > 1 * Rates are then assigned following a Zipf distribution of coefficient beta where nodes with higher-degree PoPs have a higher request rate Parameters ---------- topology : fnss.Topology The topology to which the workload refers reqs_file : str The GlobeTraff request file contents_file : str The GlobeTraff content file beta : float, optional Spatial skewness of requests rates Returns ------- events : iterator Iterator of events. Each event is a 2-tuple where the first element is the timestamp at which the event occurs and the second element is a dictionary of event attributes. """ def __init__(self, topology, reqs_file, contents_file, beta=0, **kwargs): """Constructor""" if beta < 0: raise ValueError('beta must be positive') self.receivers = [ v for v in topology.nodes_iter() if topology.node[v]['stack'][0] == 'receiver' ] self.n_contents = 0 with open(contents_file, 'r') as f: reader = csv.reader(f, delimiter='\t') for content, popularity, size, app_type in reader: self.n_contents = max(self.n_contents, content) self.n_contents += 1 self.contents = range(self.n_contents) self.request_file = reqs_file self.beta = beta if beta != 0: degree = nx.degree(self.topology) self.receivers = sorted( self.receivers, key=lambda x: degree[iter(topology.edge[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers)) def __iter__(self): with open(self.request_file, 'r') as f: reader = csv.reader(f, delimiter='\t') for timestamp, content, size in reader: if self.beta == 0: receiver = random.choice(self.receivers) else: receiver = self.receivers[self.receiver_dist.rv() - 1] event = { 'receiver': receiver, 'content': content, 'size': size } yield (timestamp, event) raise StopIteration()
class StationaryUpdated(object): '''Represents a content as a multiple objects, while keeping track of the link_delay ''' def __init__(self, topology, n_contents, alpha, chunks_per_content, beta=0, rate=1.0, n_warmup=10**5, n_measured=4 * 10**5, seed=None, **kwargs): if alpha < 0: raise ValueError('alpha must be positive') if beta < 0: raise ValueError('beta must be positive') self.model = None self.controller = None self.contents = [] self.receivers = [ v for v in topology.nodes() if topology.node[v]['stack'][0] == 'receiver' ] self.zipf = TruncatedZipfDist(alpha, n_contents) self.n_contents = n_contents self.chunks_per_content = chunks_per_content self.contents = [] self.n_chunks = [] if (sum([pair[0] for pair in self.chunks_per_content]) != 100): raise ValueError('The percents should add up to a 100') for pair in self.chunks_per_content: for j in range(self.n_contents * pair[0] / 100): self.n_chunks.append(pair[1]) if (len(self.n_chunks) != self.n_contents): for j in range(len(self.n_chunks), self.n_contents): self.n_chunks.append(self.chunks_per_content[-1][-1]) if (self.n_contents != len(self.n_chunks)): raise ValueError('n_chunks must be the same length as n_contents ') # define a two-dimensional array which stores the contents for easier index manipulations self.content_objects = [[ Content(i, j) for j in range(self.n_chunks[i]) ] for i in range(self.n_contents)] # create a list of the two-dimensional array for i in range(self.n_contents): for j in range(self.n_chunks[i]): self.content_objects[i][j].chunks = self.n_chunks[i] self.contents.append(self.content_objects[i][j]) self.alpha = alpha self.rate = rate self.n_warmup = n_warmup self.n_measured = n_measured random.seed(seed) self.beta = beta if beta != 0: degree = nx.degree(self.topology) self.receivers = sorted( self.receivers, key=lambda x: degree[iter(topology.adj[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers)) def __iter__(self): req_counter = 0 t_content = 0.0 t_total = 0.0 t_arrival = 0.01 while req_counter < self.n_warmup + self.n_measured: # check whether the list is empty if not (self.model.queue_event): t_total = t_content if ( t_total >= t_content ): # if download of current content interrupted with new request t_content += (random.expovariate(self.rate)) req_counter += 1 if self.beta == 0: receiver = random.choice(self.receivers) else: receiver = self.receivers[self.receiver_dist.rv() - 1] # choose the content randomly new_content = int(self.zipf.rv() - 1) log = (req_counter >= self.n_warmup) # push new objects into heap for t in range(self.n_chunks[new_content]): add_event = Event() packet = Packet() packet.receiver = receiver packet.current_node = receiver packet.content = self.content_objects[new_content][t] t_total += t_arrival add_event.timing = t_total add_event.log = log add_event.packet = packet self.controller.queue_push(add_event) else: event_dispatched = self.controller.queue_pop() t_total = event_dispatched.timing event = { 'packet': event_dispatched.packet, 'log': event_dispatched.log, 'type': event_dispatched.type_chunk, 'session_id': event_dispatched.session_id } yield (t_total, event) raise StopIteration()
class YOUTUBE_TRACE(object): """ YOUTUBE_TRACE Parameters ---------- topology : fnss.Topology The topology to which the workload refers n_contents : int The number of content object alpha : float The Zipf alpha parameter beta : float, optional Parameter indicating rate : float, optional The mean rate of requests per second n_warmup : int, optional The number of warmup requests (i.e. requests executed to fill cache but not logged) n_measured : int, optional The number of logged requests after the warmup Returns ------- events : iterator Iterator of events. Each event is a 2-tuple where the first element is the timestamp at which the event occurs and the second element is a dictionary of event attributes. """ def __init__(self, topology, n_contents, alpha, beta=0, rate=1.0, n_warmup=10**3, n_measured=4 * 10**5, seed=None, **kwargs): if alpha < 0: raise ValueError('alpha must be positive') if beta < 0: raise ValueError('beta must be positive') self.receivers = [ v for v in topology.nodes_iter() if topology.node[v]['stack'][0] == 'receiver' ] self.zipf = TruncatedZipfDist(alpha, n_contents) self.n_contents = n_contents self.contents = range(1, n_contents + 1) self.alpha = alpha self.rate = rate self.n_warmup = n_warmup self.n_measured = n_measured random.seed(seed) self.beta = beta if beta != 0: degree = nx.degree(self.topology) self.receivers = sorted( self.receivers, key=lambda x: degree[iter(topology.edge[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers)) def __iter__(self): req_counter = 0 t_event = 0.0 mypath = "C:\Users\widndows7\Desktop\myresult\youtube.parsed.012908.24.txt" for n in my_parse_youtube_umass(mypath): t_event += (random.expovariate(self.rate)) if self.beta == 0: receiver = self.receivers[int(n['client_addr']) % len(self.receivers)] else: receiver = self.receivers[self.receiver_dist.rv() - 1] content = n['video_id'] log = (req_counter >= self.n_warmup) event = {'receiver': receiver, 'content': content, 'log': log} yield (t_event, event) req_counter += 1 for n in my_parse_youtube_umass(mypath1): t_event += (random.expovariate(self.rate)) if self.beta == 0: receiver = self.receivers[int(n['client_addr']) % len(self.receivers)] else: receiver = self.receivers[self.receiver_dist.rv() - 1] content = n['video_id'] log = (req_counter >= self.n_warmup) event = {'receiver': receiver, 'content': content, 'log': log} yield (t_event, event) req_counter += 1 raise StopIteration()
class My_Workload(object): """This function generates events on the fly, i.e. instead of creating an event schedule to be kept in memory, returns an iterator that generates events when needed. This is useful for running large schedules of events where RAM is limited as its memory impact is considerably lower. These requests are Poisson-distributed while content popularity is Zipf-distributed All requests are mapped to receivers uniformly unless a positive *beta* parameter is specified. If a *beta* parameter is specified, then receivers issue requests at different rates. The algorithm used to determine the requests rates for each receiver is the following: * All receiver are sorted in decreasing order of degree of the PoP they are attached to. This assumes that all receivers have degree = 1 and are attached to a node with degree > 1 * Rates are then assigned following a Zipf distribution of coefficient beta where nodes with higher-degree PoPs have a higher request rate Parameters ---------- topology : fnss.Topology The topology to which the workload refers n_contents : int The number of content object alpha : float The Zipf alpha parameter beta : float, optional Parameter indicating rate : float, optional The mean rate of requests per second n_warmup : int, optional The number of warmup requests (i.e. requests executed to fill cache but not logged) n_measured : int, optional The number of logged requests after the warmup Returns ------- events : iterator Iterator of events. Each event is a 2-tuple where the first element is the timestamp at which the event occurs and the second element is a dictionary of event attributes. """ def __init__(self, topology, n_contents, alpha, beta=0, rate=1.0, n_warmup=10**5, n_measured=4 * 10**5, seed=None, **kwargs): if alpha < 0: raise ValueError('alpha must be positive') if beta < 0: raise ValueError('beta must be positive') self.receivers = [ v for v in topology.nodes_iter() if topology.node[v]['stack'][0] == 'receiver' ] self.zipf = TruncatedZipfDist(alpha, n_contents) self.n_contents = n_contents self.contents = range(1, n_contents + 1) self.alpha = alpha self.rate = rate self.n_warmup = n_warmup self.n_measured = n_measured random.seed(seed) self.beta = beta if beta != 0: degree = nx.degree(self.topology) self.receivers = sorted( self.receivers, key=lambda x: degree[iter(topology.edge[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers)) def __iter__(self): req_counter = 0 t_event = 0.0 while req_counter < self.n_warmup + self.n_measured: t_event += (random.expovariate(self.rate)) ''' if self.beta == 0: receiver = random.choice(self.receivers) else: receiver = self.receivers[self.receiver_dist.rv() - 1] ''' content = int(self.zipf.rv()) k = random.choice([0, 1]) if k != 0: receiver = random.choice(self.receivers) else: receiver = random.choice( [v for v in self.receivers if content % 64 == v % 64]) log = (req_counter >= self.n_warmup) event = {'receiver': receiver, 'content': content, 'log': log} yield (t_event, event) req_counter += 1 raise StopIteration()
def __init__(self, topology, n_contents, alpha, chunks_per_content, beta=0, rate=1.0, n_warmup=10**5, n_measured=4 * 10**5, seed=None, **kwargs): if alpha < 0: raise ValueError('alpha must be positive') if beta < 0: raise ValueError('beta must be positive') self.model = None self.controller = None self.contents = [] self.receivers = [ v for v in topology.nodes() if topology.node[v]['stack'][0] == 'receiver' ] self.zipf = TruncatedZipfDist(alpha, n_contents) self.n_contents = n_contents self.chunks_per_content = chunks_per_content self.contents = [] self.n_chunks = [] if (sum([pair[0] for pair in self.chunks_per_content]) != 100): raise ValueError('The percents should add up to a 100') for pair in self.chunks_per_content: for j in range(self.n_contents * pair[0] / 100): self.n_chunks.append(pair[1]) if (len(self.n_chunks) != self.n_contents): for j in range(len(self.n_chunks), self.n_contents): self.n_chunks.append(self.chunks_per_content[-1][-1]) if (self.n_contents != len(self.n_chunks)): raise ValueError('n_chunks must be the same length as n_contents ') # define a two-dimensional array which stores the contents for easier index manipulations self.content_objects = [[ Content(i, j) for j in range(self.n_chunks[i]) ] for i in range(self.n_contents)] # create a list of the two-dimensional array for i in range(self.n_contents): for j in range(self.n_chunks[i]): self.content_objects[i][j].chunks = self.n_chunks[i] self.contents.append(self.content_objects[i][j]) self.alpha = alpha self.rate = rate self.n_warmup = n_warmup self.n_measured = n_measured random.seed(seed) self.beta = beta if beta != 0: degree = nx.degree(self.topology) self.receivers = sorted( self.receivers, key=lambda x: degree[iter(topology.adj[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers))
class GlobetraffWorkload(object): """Parse requests from GlobeTraff workload generator All requests are mapped to receivers uniformly unless a positive *beta* parameter is specified. If a *beta* parameter is specified, then receivers issue requests at different rates. The algorithm used to determine the requests rates for each receiver is the following: * All receiver are sorted in decreasing order of degree of the PoP they are attached to. This assumes that all receivers have degree = 1 and are attached to a node with degree > 1 * Rates are then assigned following a Zipf distribution of coefficient beta where nodes with higher-degree PoPs have a higher request rate Parameters ---------- topology : fnss.Topology The topology to which the workload refers reqs_file : str The GlobeTraff request file contents_file : str The GlobeTraff content file beta : float, optional Spatial skewness of requests rates Returns ------- events : iterator Iterator of events. Each event is a 2-tuple where the first element is the timestamp at which the event occurs and the second element is a dictionary of event attributes. """ def __init__(self, topology, reqs_file, contents_file, beta=0, **kwargs): """Constructor""" if beta < 0: raise ValueError('beta must be positive') self.receivers = [v for v in topology.nodes() if topology.node[v]['stack'][0] == 'receiver'] self.n_contents = 0 with open(contents_file, 'r') as f: reader = csv.reader(f, delimiter='\t') for content, popularity, size, app_type in reader: self.n_contents = max(self.n_contents, content) self.n_contents += 1 self.contents = range(self.n_contents) self.request_file = reqs_file self.beta = beta if beta != 0: degree = nx.degree(self.topology) self.receivers = sorted(self.receivers, key=lambda x: degree[iter(topology.adj[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers)) def __iter__(self): with open(self.request_file, 'r') as f: reader = csv.reader(f, delimiter='\t') for timestamp, content, size in reader: if self.beta == 0: receiver = random.choice(self.receivers) else: receiver = self.receivers[self.receiver_dist.rv() - 1] event = {'receiver': receiver, 'content': content, 'size': size} yield (timestamp, event) raise StopIteration()
class StationaryWorkload(object): """This function generates events on the fly, i.e. instead of creating an event schedule to be kept in memory, returns an iterator that generates events when needed. This is useful for running large schedules of events where RAM is limited as its memory impact is considerably lower. These requests are Poisson-distributed while content popularity is Zipf-distributed All requests are mapped to receivers uniformly unless a positive *beta* parameter is specified. If a *beta* parameter is specified, then receivers issue requests at different rates. The algorithm used to determine the requests rates for each receiver is the following: * All receiver are sorted in decreasing order of degree of the PoP they are attached to. This assumes that all receivers have degree = 1 and are attached to a node with degree > 1 * Rates are then assigned following a Zipf distribution of coefficient beta where nodes with higher-degree PoPs have a higher request rate Parameters ---------- topology : fnss.Topology The topology to which the workload refers n_contents : int The number of content object alpha : float The Zipf alpha parameter beta : float, optional Parameter indicating rate : float, optional The mean rate of requests per second n_warmup : int, optional The number of warmup requests (i.e. requests executed to fill cache but not logged) n_measured : int, optional The number of logged requests after the warmup Returns ------- events : iterator Iterator of events. Each event is a 2-tuple where the first element is the timestamp at which the event occurs and the second element is a dictionary of event attributes. """ def __init__(self, topology, n_contents, n_segments, time_interval, alpha, beta=0, rate=1.0, n_warmup=10**5, n_measured=4 * 10**5, seed=None, **kwargs): if alpha < 0: raise ValueError('alpha must be positive') if beta < 0: raise ValueError('beta must be positive') self.receivers = [ v for v in topology.nodes_iter() if topology.node[v]['stack'][0] == 'receiver' ] self.zipf = TruncatedZipfDist(alpha, n_contents / n_segments) self.time_interval = time_interval self.n_contents = n_contents self.n_segments = n_segments self.contents = range(1, n_contents + 1) # A list of all segments. self.delay = 0.01 self.alpha = alpha self.rate = rate self.n_warmup = n_warmup self.n_measured = n_measured random.seed(seed) self.beta = beta if beta != 0: degree = nx.degree(self.topology) self.receivers = sorted( self.receivers, key=lambda x: degree[iter(topology.edge[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers)) def __iter__(self): req_counter = 0 t_event = 0.0 event_dict = dict() # Dictionary: key=time, value=event object time_heap = [] # Heap_queue: item=time while req_counter <= self.n_warmup + self.n_measured: t_event += (random.expovariate(self.rate)) event_time = time_heap[0] if len(time_heap) > 0 else None while event_time is not None and event_time < t_event: event = event_dict[event_time] yield (event_time, event) heapq.heappop(time_heap) # Remove the time from heapq. del event_dict[ event_time] # Remove the time-event pair from dictionary. # If it is not the last segment, append the event for next segment. if event['content'] % self.n_segments != 0: new_event_time = event_time + self.delay new_event = copy.copy(event) new_event['content'] += 1 heapq.heappush(time_heap, new_event_time) event_dict[new_event_time] = new_event event_time = time_heap[0] if len(time_heap) > 0 else None if req_counter == (self.n_warmup + self.n_measured): # Exit the method when there is no pending event and all requests are sent. if len(time_heap) == 0: break else: continue if self.beta == 0: receiver = random.choice(self.receivers) else: receiver = self.receivers[self.receiver_dist.rv() - 1] content = int(self.zipf.rv()) content = ( content - 1 ) * self.n_segments + 1 # This gives the first segment of the content. log = (req_counter >= self.n_warmup) event = { 'receiver': receiver, 'content': content, 'n_segments': self.n_segments, 'time_interval': self.time_interval, 'log': log } yield (t_event, event) # If it is not the last segment, append the event (to heapq) for next segment. if event['content'] % self.n_segments != 0: new_event_time = t_event + self.delay new_event = copy.copy(event) new_event['content'] += 1 heapq.heappush(time_heap, new_event_time) event_dict[new_event_time] = new_event req_counter += 1 raise StopIteration()
class StationaryWorkload(object): """This function generates events on the fly, i.e. instead of creating an event schedule to be kept in memory, returns an iterator that generates events when needed. This is useful for running large schedules of events where RAM is limited as its memory impact is considerably lower. These requests are Poisson-distributed while content popularity is Zipf-distributed All requests are mapped to receivers uniformly unless a positive *beta* parameter is specified. If a *beta* parameter is specified, then receivers issue requests at different rates. The algorithm used to determine the requests rates for each receiver is the following: * All receiver are sorted in decreasing order of degree of the PoP they are attached to. This assumes that all receivers have degree = 1 and are attached to a node with degree > 1 * Rates are then assigned following a Zipf distribution of coefficient beta where nodes with higher-degree PoPs have a higher request rate Parameters ---------- topology : fnss.Topology The topology to which the workload refers n_contents : int The number of content object alpha : float The Zipf alpha parameter beta : float, optional Parameter indicating rate : float, optional The mean rate of requests per second n_warmup : int, optional The number of warmup requests (i.e. requests executed to fill cache but not logged) n_measured : int, optional The number of logged requests after the warmup Returns ------- events : iterator Iterator of events. Each event is a 2-tuple where the first element is the timestamp at which the event occurs and the second element is a dictionary of event attributes. """ def __init__(self, topology, n_contents, alpha, beta=0, rate=1.0, n_warmup=10 ** 5, n_measured=4 * 10 ** 5, seed=None, **kwargs): if alpha < 0: raise ValueError('alpha must be positive') if beta < 0: raise ValueError('beta must be positive') self.receivers = [v for v in topology.nodes() if topology.node[v]['stack'][0] == 'receiver'] self.zipf = TruncatedZipfDist(alpha, n_contents) self.n_contents = n_contents self.contents = range(1, n_contents + 1) self.alpha = alpha self.rate = rate self.n_warmup = n_warmup self.n_measured = n_measured random.seed(seed) self.beta = beta if beta != 0: degree = nx.degree(self.topology) self.receivers = sorted(self.receivers, key=lambda x: degree[iter(topology.adj[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers)) def __iter__(self): req_counter = 0 t_event = 0.0 while req_counter < self.n_warmup + self.n_measured: t_event += (random.expovariate(self.rate)) if self.beta == 0: receiver = random.choice(self.receivers) else: receiver = self.receivers[self.receiver_dist.rv() - 1] content = int(self.zipf.rv()) log = (req_counter >= self.n_warmup) event = {'receiver': receiver, 'content': content, 'log': log} yield (t_event, event) req_counter += 1 raise StopIteration()
def __init__(self, topology, n_contents, alpha, beta=0, rates=10, rate_dist=[0], n_warmup=10**5, n_measured=4 * 10**5, seed=0, n_services=10, **kwargs): if alpha < 0: raise ValueError('alpha must be positive') if beta < 0: raise ValueError('beta must be positive') self.receivers = [ v for v in topology.nodes_iter() if topology.node[v]['stack'][0] == 'receiver' ] #self.zipf = TruncatedZipfDist(alpha, n_services-1, seed) self.num_classes = topology.graph['n_classes'] #self.zipf = TruncatedZipfDist(alpha, self.num_classes-1, seed) self.n_contents = n_contents self.contents = range(0, n_contents) self.n_services = n_services self.alpha = alpha self.rates = rates self.n_edgeRouters = topology.graph['n_edgeRouters'] self.n_warmup = n_warmup self.n_measured = n_measured self.model = None self.beta = beta self.topology = topology self.rate_cum_dist = [0.0] * self.num_classes print "rate_dist= ", rate_dist, "\n" for c in range(self.num_classes): for k in range(0, c + 1): self.rate_cum_dist[c] += rate_dist[k] print "Cumulative dist: " + repr(self.rate_cum_dist) if beta != 0: degree = nx.degree(self.topology) self.receivers = sorted( self.receivers, key=lambda x: degree[iter(topology.edge[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers), seed) self.seed = seed self.first = True self.first_iter = True self.n_edgeRouters = topology.graph['n_edgeRouters'] self.aFile = None self.end_of_file = False fname = './top_n_trace.txt' #'./top_n_trace.txt' #'./processed_google_trace.txt' try: self.aFile = open(fname, 'r') except IOError: print "Could not read the workload trace file:", fname sys.exit()
class YCSBWorkload(object): """Yahoo! Cloud Serving Benchmark (YCSB) The YCSB is a set of reference workloads used to benchmark databases and, more generally any storage/caching systems. It comprises five workloads: +------------------+------------------------+------------------+ | Workload | Operations | Record selection | +------------------+------------------------+------------------+ | A - Update heavy | Read: 50%, Update: 50% | Zipfian | | B - Read heavy | Read: 95%, Update: 5% | Zipfian | | C - Read only | Read: 100% | Zipfian | | D - Read latest | Read: 95%, Insert: 5% | Latest | | E - Short ranges | Scan: 95%, Insert 5% | Zipfian/Uniform | +------------------+------------------------+------------------+ Notes ----- At the moment only workloads A, B and C are implemented, since they are the most relevant for caching systems. """ def __init__(self, workload, n_contents, n_warmup, n_measured, alpha=0.99, seed=None, **kwargs): """Constructor Parameters ---------- workload : str Workload identifier. Currently supported: "A", "B", "C" n_contents : int Number of content items n_warmup : int, optional The number of warmup requests (i.e. requests executed to fill cache but not logged) n_measured : int, optional The number of logged requests after the warmup alpha : float, optional Parameter of Zipf distribution seed : int, optional The seed for the random generator """ if workload not in ("A", "B", "C", "D", "E"): raise ValueError("Incorrect workload ID [A-B-C-D-E]") elif workload in ("D", "E"): raise NotImplementedError("Workloads D and E not yet implemented") self.workload = workload if seed is not None: random.seed(seed) self.zipf = TruncatedZipfDist(alpha, n_contents) self.n_warmup = n_warmup self.n_measured = n_measured def __iter__(self): """Return an iterator over the workload""" req_counter = 0 while req_counter < self.n_warmup + self.n_measured: rand = random.random() op = { "A": "READ" if rand < 0.5 else "UPDATE", "B": "READ" if rand < 0.95 else "UPDATE", "C": "READ" }[self.workload] item = int(self.zipf.rv()) log = (req_counter >= self.n_warmup) event = {'op': op, 'item': item, 'log': log} yield event req_counter += 1 return
class TransitLocalWorkload(object): """ This function generates transit and local traffic according to given percentages and content popularity. The following traffic patterns are generated: transit traffic: traffic that transits through the domain. local traffic: both end-points of the traffic is within the domain. ingress traffic: consumer is outside and the producer is inside the domain. egress traffic: consumer is inside and the producer is outside the domain. """ def __init__(self, topology, n_contents, alpha, beta=0, rate=1.0, n_warmup=10 ** 5, n_measured=4 * 10 ** 5, transit=0.7, local=0.1, ingress=0.1, egress=0.1, seed=None, **kwargs): if alpha < 0: raise ValueError('alpha must be positive') if beta < 0: raise ValueError('beta must be positive') self.receivers = [v for v in topology.nodes_iter() if topology.node[v]['stack'][0] == 'receiver'] self.n_contents = n_contents self.contents = range(1, n_contents + 1) self.alpha = alpha self.rate = rate self.n_warmup = n_warmup self.n_measured = n_measured random.seed(seed) self.beta = beta self.local = local self.transit = transit self.ingress = ingress self.egress = egress if beta != 0: degree = nx.degree(self.topology) self.receivers = sorted(self.receivers, key=lambda x: degree[iter(topology.edge[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers)) def __iter__(self): self.local_contents = list(topology.graph['internal_contents']) self.remote_contents = list(topology.graph['edge_contents']) self.local_receivers = topology.graph['internal_receivers'] self.remote_receivers = topology.graph['edge_receivers'] self.zipf_local = TruncatedZipfDist(alpha, len(self.local_contents)) self.zipf_remote = TruncatedZipfDist(alpha, len(self.transit_contents)) req_counter = 0 t_event = 0.0 while req_counter < self.n_warmup + self.n_measured: t_event += (random.expovariate(self.rate)) x = random.random() content = -1 receiver = -1 if x < self.transit: # transit traffic receiver = random.choice(self.remote_receivers) indx = int(self.zipf_remote.rv()) content = self.remote_contents[indx] elif x < self.transit + self.local: # local traffic receiver = random.choice(self.local_receivers) indx = int(self.zipf_local.rv()) content = self.local_contents[indx] elif x < self.transit + self.local + self.ingress: # ingress traffic receiver = random.choice(self.remote_receivers) indx = int(self.zipf_local.rv()) content = self.local_contents[indx] else: # egress traffic receiver = random.choice(self.local_receivers) indx = int(self.zipf_remote.rv()) content = self.remote_contents[indx] #if self.beta == 0: # receiver = random.choice(self.receivers) #else: # receiver = self.receivers[self.receiver_dist.rv() - 1] #content = int(self.zipf.rv()) log = (req_counter >= self.n_warmup) event = {'receiver': receiver, 'content': content, 'log': log} yield (t_event, event) req_counter += 1 raise StopIteration()
class TraceDrivenWorkload(object): """Parse requests from a generic request trace. This workload requires two text files: * a requests file, where each line corresponds to a string identifying the content requested * a contents file, which lists all unique content identifiers appearing in the requests file. Since the trace do not provide timestamps, requests are scheduled according to a Poisson process of rate *rate*. All requests are mapped to receivers uniformly unless a positive *beta* parameter is specified. If a *beta* parameter is specified, then receivers issue requests at different rates. The algorithm used to determine the requests rates for each receiver is the following: * All receiver are sorted in decreasing order of degree of the PoP they are attached to. This assumes that all receivers have degree = 1 and are attached to a node with degree > 1 * Rates are then assigned following a Zipf distribution of coefficient beta where nodes with higher-degree PoPs have a higher request rate Parameters ---------- topology : fnss.Topology The topology to which the workload refers reqs_file : str The path to the requests file contents_file : str The path to the contents file n_contents : int The number of content object (i.e. the number of lines of contents_file) n_warmup : int The number of warmup requests (i.e. requests executed to fill cache but not logged) n_measured : int The number of logged requests after the warmup rate : float, optional The network-wide mean rate of requests per second beta : float, optional Spatial skewness of requests rates Returns ------- events : iterator Iterator of events. Each event is a 2-tuple where the first element is the timestamp at which the event occurs and the second element is a dictionary of event attributes. """ def __init__(self, topology, reqs_file, contents_file, n_contents, n_warmup, n_measured, rate=1.0, beta=0, **kwargs): """Constructor""" if beta < 0: raise ValueError('beta must be positive') # Set high buffering to avoid one-line reads self.buffering = 64 * 1024 * 1024 self.n_contents = n_contents self.n_warmup = n_warmup self.n_measured = n_measured self.reqs_file = reqs_file self.rate = rate self.receivers = [v for v in topology.nodes() if topology.node[v]['stack'][0] == 'receiver'] self.contents = [] with open(contents_file, 'r', buffering=self.buffering) as f: for content in f: self.contents.append(content) self.beta = beta if beta != 0: degree = nx.degree(topology) self.receivers = sorted(self.receivers, key=lambda x: degree[iter(topology.adj[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers)) def __iter__(self): req_counter = 0 t_event = 0.0 with open(self.reqs_file, 'r', buffering=self.buffering) as f: for content in f: t_event += (random.expovariate(self.rate)) if self.beta == 0: receiver = random.choice(self.receivers) else: receiver = self.receivers[self.receiver_dist.rv() - 1] log = (req_counter >= self.n_warmup) event = {'receiver': receiver, 'content': content, 'log': log} yield (t_event, event) req_counter += 1 if(req_counter >= self.n_warmup + self.n_measured): raise StopIteration() raise ValueError("Trace did not contain enough requests")
class YCSBWorkload(object): """Yahoo! Cloud Serving Benchmark (YCSB) The YCSB is a set of reference workloads used to benchmark databases and, more generally any storage/caching systems. It comprises five workloads: +------------------+------------------------+------------------+ | Workload | Operations | Record selection | +------------------+------------------------+------------------+ | A - Update heavy | Read: 50%, Update: 50% | Zipfian | | B - Read heavy | Read: 95%, Update: 5% | Zipfian | | C - Read only | Read: 100% | Zipfian | | D - Read latest | Read: 95%, Insert: 5% | Latest | | E - Short ranges | Scan: 95%, Insert 5% | Zipfian/Uniform | +------------------+------------------------+------------------+ Notes ----- At the moment only workloads A, B and C are implemented, since they are the most relevant for caching systems. """ def __init__(self, workload, n_contents, n_warmup, n_measured, alpha=0.99, seed=None, **kwargs): """Constructor Parameters ---------- workload : str Workload identifier. Currently supported: "A", "B", "C" n_contents : int Number of content items n_warmup : int, optional The number of warmup requests (i.e. requests executed to fill cache but not logged) n_measured : int, optional The number of logged requests after the warmup alpha : float, optional Parameter of Zipf distribution seed : int, optional The seed for the random generator """ if workload not in ("A", "B", "C", "D", "E"): raise ValueError("Incorrect workload ID [A-B-C-D-E]") elif workload in ("D", "E"): raise NotImplementedError("Workloads D and E not yet implemented") self.workload = workload if seed is not None: random.seed(seed) self.zipf = TruncatedZipfDist(alpha, n_contents) self.n_warmup = n_warmup self.n_measured = n_measured def __iter__(self): """Return an iterator over the workload""" req_counter = 0 while req_counter < self.n_warmup + self.n_measured: rand = random.random() op = { "A": "READ" if rand < 0.5 else "UPDATE", "B": "READ" if rand < 0.95 else "UPDATE", "C": "READ" }[self.workload] item = int(self.zipf.rv()) log = (req_counter >= self.n_warmup) event = {'op': op, 'item': item, 'log': log} yield event req_counter += 1 raise StopIteration()
class StationaryWorkload(object): """This function generates events on the fly, i.e. instead of creating an event schedule to be kept in memory, returns an iterator that generates events when needed. This is useful for running large schedules of events where RAM is limited as its memory impact is considerably lower. These requests are Poisson-distributed while content popularity is Zipf-distributed All requests are mapped to receivers uniformly unless a positive *beta* parameter is specified. If a *beta* parameter is specified, then receivers issue requests at different rates. The algorithm used to determine the requests rates for each receiver is the following: * All receiver are sorted in decreasing order of degree of the PoP they are attached to. This assumes that all receivers have degree = 1 and are attached to a node with degree > 1 * Rates are then assigned following a Zipf distribution of coefficient beta where nodes with higher-degree PoPs have a higher request rate Parameters ---------- topology : fnss.Topology The topology to which the workload refers n_contents : int The number of content object alpha : float The Zipf alpha parameter beta : float, optional Parameter indicating rate : float, optional The mean rate of requests per second n_warmup : int, optional The number of warmup requests (i.e. requests executed to fill cache but not logged) n_measured : int, optional The number of logged requests after the warmup Returns ------- events : iterator Iterator of events. Each event is a 2-tuple where the first element is the timestamp at which the event occurs and the second element is a dictionary of event attributes. """ def __init__(self, topology, n_contents, alpha, beta=0, rate=1.0, n_warmup=10**5, n_measured=4 * 10**5, seed=0, n_services=10, **kwargs): if alpha < 0: raise ValueError('alpha must be positive') if beta < 0: raise ValueError('beta must be positive') self.receivers = [ v for v in topology.nodes_iter() if topology.node[v]['stack'][0] == 'receiver' ] self.zipf = TruncatedZipfDist(alpha, n_services - 1, seed) self.n_contents = n_contents self.contents = range(0, n_contents) self.n_services = n_services self.alpha = alpha self.rate = rate self.n_warmup = n_warmup self.n_measured = n_measured self.model = None self.beta = beta self.topology = topology if beta != 0: degree = nx.degree(self.topology) self.receivers = sorted( self.receivers, key=lambda x: degree[iter(topology.edge[x]).next()], reverse=True) self.receiver_dist = TruncatedZipfDist(beta, len(self.receivers), seed) self.seed = seed self.first = True def __iter__(self): req_counter = 0 t_event = 0.0 flow_id = 0 if self.first: #TODO remove this first variable, this is not necessary here random.seed(self.seed) self.first = False #aFile = open('workload.txt', 'w') #aFile.write("# Time\tNodeID\tserviceID\n") eventObj = self.model.eventQ[0] if len(self.model.eventQ) > 0 else None while req_counter < self.n_warmup + self.n_measured or len( self.model.eventQ) > 0: t_event += (random.expovariate(self.rate)) eventObj = self.model.eventQ[0] if len( self.model.eventQ) > 0 else None while eventObj is not None and eventObj.time < t_event: heapq.heappop(self.model.eventQ) log = (req_counter >= self.n_warmup) event = { 'receiver': eventObj.receiver, 'content': eventObj.service, 'log': log, 'node': eventObj.node, 'flow_id': eventObj.flow_id, 'deadline': eventObj.deadline, 'rtt_delay': eventObj.rtt_delay, 'status': eventObj.status, 'task': eventObj.task } yield (eventObj.time, event) eventObj = self.model.eventQ[0] if len( self.model.eventQ) > 0 else None if req_counter >= (self.n_warmup + self.n_measured): # skip below if we already sent all the requests continue if self.beta == 0: receiver = random.choice(self.receivers) else: receiver = self.receivers[self.receiver_dist.rv() - 1] node = receiver content = int(self.zipf.rv()) log = (req_counter >= self.n_warmup) flow_id += 1 deadline = self.model.services[content].deadline + t_event event = { 'receiver': receiver, 'content': content, 'log': log, 'node': node, 'flow_id': flow_id, 'rtt_delay': 0, 'deadline': deadline, 'status': REQUEST } neighbors = self.topology.neighbors(receiver) s = str(t_event) + "\t" + str( neighbors[0]) + "\t" + str(content) + "\n" #aFile.write(s) yield (t_event, event) req_counter += 1 print "End of iteration: len(eventObj): " + repr(len( self.model.eventQ)) #aFile.close() raise StopIteration()
def laoutaris_cache_hit_ratio(alpha, population, cache_size, order=3): """Estimates the cache hit ratio of an LRU cache under general power-law demand using the Laoutaris approximation. Parameters ---------- alpha : float The coefficient of the demand power-law population : int The content population cache_size : int The cache size order : int, optional The order of the Taylor expansion. Supports only 2 and 3 Returns ------- cache_hit_ratio : float The cache hit ratio References ---------- http://arxiv.org/pdf/0705.1970.pdf """ def H(N, alpha): return sum(1.0 / l**alpha for l in range(1, N + 1)) def cubrt(x): """Compute cubic root of a number Parameters ---------- x : float Number whose cubic root is to be calculated Returns ------- cubrt : float The cubic root """ exp = 1.0 / 3 return x**exp if x >= 0 else -(-x)**exp def solve_3rd_order_equation(a, b, c, d): """Calculates the real solutions of the 3rd order equations a*x**3 + b*x**2 + c*x + d = 0 Parameters ---------- a : float Coefficent of 3rd degree monomial b : float Coefficent of 2nd degree monomial c : float Coefficent of 1st degree monomial d : float Constant Returns ------- roots : tuple Tuple of real solutions. The tuple may comprise either 1 or 3 values Notes ----- The method used to calculate roots is described in this paper: http://www.nickalls.org/dick/papers/maths/cubic1993.pdf """ # Compute parameters x_N = -b / (3 * a) y_N = a * x_N**3 + b * x_N**2 + c * x_N + d delta_2 = (b**2 - 3 * a * c) / (9 * a**2) h_2 = 4 * (a**2) * (delta_2**3) # Calculate discriminator and find roots discr = y_N**2 - h_2 if discr > 0: r_x = (x_N + cubrt(0.5/a * (-y_N + math.sqrt(discr))) \ + cubrt(0.5/a * (-y_N - math.sqrt(discr))),) elif discr == 0: delta = math.sqrt(delta_2) r1 = r2 = x_N + delta r3 = x_N - 2 * delta r_x = (r1, r2, r3) else: # discr < 0 h = math.sqrt(h_2) delta = math.sqrt(delta_2) Theta = np.arccos(-y_N / h) / 3.0 r1 = x_N + 2 * delta * np.cos(Theta) r2 = x_N + 2 * delta * np.cos(2 * np.pi / 3 - Theta) r3 = x_N + 2 * delta * np.cos(2 * np.pi / 3 + Theta) r_x = (r1, r2, r3) return r_x # Get parameters pdf = TruncatedZipfDist(alpha, population).pdf C = cache_size N = population # Calculate harmonics H_N_a = H(N, alpha) H_N_2a = H(N, 2 * alpha) H_N_3a = H(N, 3 * alpha) H_N_4a = H(N, 4 * alpha) Lambda = 1.0 / H_N_a # Find values of r if order == 2: alpha_2 = (0.5 * Lambda**2 * H_N_2a) - ( 0.5 * Lambda**3 * C * H_N_3a) + (0.25 * Lambda**4 * C**2 * H_N_4a) alpha_1 = -(Lambda * H_N_a) + (0.5 * Lambda**3 * C**2 * H_N_3a) - ( 0.5 * Lambda**4 * C**3 * H_N_4a) alpha_0 = C + (0.25 * Lambda**4 * C**4 * H_N_4a) # Calculate discriminant to verify if there are real solutions discr = alpha_1**2 - 4 * alpha_2 * alpha_0 if discr < 0: raise ValueError('Could not find real values for the ' 'characteristic time. Try using a 3rd order ' 'expansion') # Calculate roots of the 2nd order equation r1 = (-alpha_1 + math.sqrt(discr)) / (2 * alpha_2) r2 = (-alpha_1 - math.sqrt(discr)) / (2 * alpha_2) r_x = (r1, r2) elif order == 3: # Additional parameters H_N_5a = H(N, 5 * alpha) H_N_6a = H(N, 6 * alpha) # Calculate coefficients of the 3rd order equation alpha_3 = - (Lambda**3/6 * H_N_3a) + (Lambda**4*C/6 * H_N_4a) - \ (Lambda**5*C**2/12 * H_N_5a) + (Lambda**6*C**3/36 * H_N_6a) alpha_2 = (Lambda**2/2 * H_N_2a) - (Lambda**4*C**2/4 * H_N_4a) + \ (Lambda**5*C**3/6 * H_N_5a) - (Lambda**6*C**4/12 * H_N_6a) alpha_1 = - Lambda*H_N_a + (Lambda**4*C**3/6 * H_N_4a) - \ (Lambda**5*C**4/12 * H_N_5a) + (Lambda**6*C**5/12 * H_N_6a) alpha_0 = C - (Lambda**4*C**4/12 * H_N_4a) - \ (Lambda**6*C**6/36 * H_N_6a) # Solve 3rd order equation r_x = solve_3rd_order_equation(alpha_3, alpha_2, alpha_1, alpha_0) else: raise ValueError('Only 2nd and 3rd order solutions are supported') # Find actual value of characteristic time (r) if exists # We select the minimum positive r greater than C r_c = [x for x in r_x if x > C] if r_c: r = min(r_c) else: raise ValueError( 'Cannot compute cache hit ratio using this method. ' 'Could not find positive values of characteristic time' ' greater than the cache size.') return np.sum(pdf * (1 - math.e**-(r * pdf)))