def process_files(data_dir, percentiles, output_files, ip_to_use=None, replace_timeout=False, cloud=None): probe_round_offset = 0 server_latencies = dict() # Maps from the IP address to a list of latencies. for path, _, files in os.walk(data_dir): print 'running path: ' + path if len(files) <= 0: splitted_path = path.split('/') if splitted_path[len(splitted_path) - 1] in common_module.SERVER_SET: print 'From: ' + path if probe_round_offset > 0 and common_module.check_cloud(cloud, server): output_to_file(server, server_latencies, percentiles, output_files, ip_to_use=ip_to_use, replace_timeout=replace_timeout) # A new server, reset all the variables. probe_round_offset = 0 server_latencies = dict() # Maps from the IP address to a list of latencies. print 'reseted the data structures.' continue tokenized_path = path.split('/') server = tokenized_path[len(tokenized_path) - 2] if common_module.check_cloud(cloud, server): for filename in files: if not filename.startswith(common_module.FILENAME_PREFIX): continue full_path = os.path.join(path, filename) with open(full_path, 'rb') as input_file: for raw_line in input_file: line = raw_line.rstrip().split() round_id = int(line[common_module.ROUND_COL_INDEX]) ip_address = line[common_module.IP_COL_INDEX] latency = float(line[common_module.LATENCY_COL_INDEX]) actual_round = round_id + probe_round_offset if ip_address not in server_latencies: server_latencies[ip_address] = [] if actual_round < len(server_latencies[ip_address]): # The round already exists. server_latencies[ip_address][actual_round] = min(server_latencies[ip_address][actual_round], latency) else: for i in range(len(server_latencies[ip_address]), actual_round - 1): server_latencies[ip_address].append(10000) server_latencies[ip_address].append(latency) probe_round_offset = actual_round + 1 # The probe offset is the last round. if common_module.check_cloud(cloud, server): output_to_file(server, server_latencies, percentiles, output_files, ip_to_use=ip_to_use, replace_timeout=replace_timeout)
def get_servers(median_latencies, cloud=None): print cloud server_set = set() nameserver_set = set() for pair, latency in median_latencies.iteritems(): nameserver_set.add(pair[1]) if common_module.check_cloud(cloud, pair[0]): server_set.add(pair[0]) print '# Server: ' + str(len(server_set)) + ' # Nameserver: ' + str(len(nameserver_set)) return server_set, nameserver_set
def find_server_cover_set(median_latencies, server_set, nameserver_set, threshold, output_filename, cloud=None): prev_cover_set = None current_cover_set = set() total_nameservers = len(nameserver_set) while current_cover_set != prev_cover_set and \ len(server_set) > 0 and \ len(nameserver_set) > 0: prev_cover_set = set(current_cover_set) histogram = dict() # mapping from server --> list of pl nodes that are < threshold for pair, latency in median_latencies.iteritems(): server, nameserver = pair if server in server_set and nameserver in nameserver_set: # Only consider unchosen server and unchosen planetlab nodes if common_module.check_cloud(cloud, server): if server not in histogram: histogram[server] = set() if latency < threshold: histogram[server].add(nameserver) # Update only if the histogram contains some elements. if len(histogram) > 0: best_server = None num_nodes_covered = -1 for server in histogram: if len(histogram[server]) > num_nodes_covered: best_server = server num_nodes_covered = len(histogram[server]) print 'best server: ' + best_server + ' num nodes covered: ' + str(num_nodes_covered) percentage = num_nodes_covered * 100.0 / total_nameservers if num_nodes_covered > 0 and percentage > 1: # At this point, we get the best_server # Remove that server and remove the planetlab nodes covered. server_set.remove(best_server) nodes_covered = histogram[best_server] for node in nodes_covered: nameserver_set.remove(node) current_cover_set.add(best_server) with open(output_filename, 'wb') as output_file: # Done finding the cover set for server in current_cover_set: output_file.write(str(server) + '\n') print 'cover set: ' + str(current_cover_set) + ' remaining nameservers: ' + str(len(nameserver_set)) + ' remaining servers: ' + str(len(server_set))