def generate(self, pool_period, max_flow_age, threading_mode, shm_size, aeron_lib_path, aeron_term_buffer_length, aeron_ipc_term_buffer_length, bw_emulation=True): number_of_gods = 0 try: number_of_gods = len( docker.APIClient(base_url='unix:/' + DOCKER_SOCK).nodes()) except Exception as e: msg = "DockerComposeFileGenerator.py requires special permissions in order to view cluster state.\n" msg += "please, generate the .yaml file on a manager node." print_error_named("compose_generator", msg) print_and_fail(e) self.print_header() self.print_bootstrapper(number_of_gods, pool_period, max_flow_age, threading_mode, shm_size, aeron_lib_path, aeron_term_buffer_length, aeron_ipc_term_buffer_length, bw_emulation) for service in self.graph.services: self.print_service(self.graph.services[service]) self.print_configs() self.print_networks()
def calculate_end_to_end_properties(self): total_not_drop_probability = 1.0 self.max_bandwidth = None self.latency = 0 self.jitter = 0 for link in self.links: try: # Pick the smallest bandwidth if self.max_bandwidth is None: self.max_bandwidth = link.bandwidth_bps if link.bandwidth_bps < self.max_bandwidth: self.max_bandwidth = link.bandwidth_bps # Accumulate jitter by summing the variances self.jitter = sqrt( (self.jitter*self.jitter)+(link.jitter*link.jitter)) # Latency is just a sum self.latency += float(link.latency) # Drop is product of reverse probabilities reversed # basically calculate the probability of not dropping across the entire path # and then invert it # Problem is similar to probability of getting at least one 6 in multiple dice rolls total_not_drop_probability *= (1.0-float(link.drop)) except: print_and_fail("Provided link data is not valid: " + str(link.latency) + "ms " + str(link.drop) + "drop rate " + link.bandwidth) self.RTT = self.latency*2 self.drop = (1.0-total_not_drop_probability)
def new_bridge(self, name): bridge = NetGraph.Bridge(name) if len(self.get_nodes(name)) == 0: self.bridges[name] = [bridge] else: print_and_fail("Cant add bridge with name: " + name + ". Another node with the same name already exists") return bridge
def start_aeron_media_driver(self): if getenv('RUNTIME_EMULATION', 'true') != 'false': try: self.aeron_media_driver = Popen('/usr/bin/Aeron/aeronmd') print_named("god", "started aeron_media_driver.") except Exception as e: print_error("[Py (god)] failed to start aeron media driver.") print_and_fail(e)
def bandwidth_in_bps(self, bandwidth_string): if re.match(self.bandwidth_re, bandwidth_string) is None: print_and_fail("Bandwidth is not properly specified, accepted values must be: [0-9]+[KMG]bps") results = re.findall(self.bandwidth_re, bandwidth_string) base = results[0][0] multiplier = results[0][1] if multiplier == 'K': return int(base)*1000 if multiplier == 'M': return int(base) * 1000 * 1000 if multiplier == 'G': return int(base) * 1000 * 1000 * 1000
def main(): if len(sys.argv) < 4: print_and_fail("Missing arguments. emucore <topology> <container id>") else: topology_file = sys.argv[1] # For future reference: This topology file must not exceed 512KB otherwise docker refuses # to copy it as a config file, this has happened with the 2k scale-free topology... setup_container(sys.argv[2], sys.argv[3]) # Because of the bootstrapper hack we cant get output from the emucore through standard docker logs... #sys.stdout = open("/var/log/need.log", "w") #sys.stderr = sys.stdout graph = NetGraph() parser = XMLGraphParser(topology_file, graph) parser.fill_graph() print_message("Done parsing topology") print_message("Resolving hostnames...") graph.resolve_hostnames() print_message("All hosts found!") print_message("Determining the root of the tree...") # Get our own ip address and set the root of the "tree" ownIP = get_own_ip(graph) graph.root = graph.hosts_by_ip[ip2int(ownIP)] if graph.root is None: print_and_fail( "Failed to identify current service instance in topology!") print_message("We are " + graph.root.name + "@" + ownIP) print_identified(graph, "Calculating shortest paths...") graph.calculate_shortest_paths() print_message("Parsing dynamic event schedule...") scheduler = parser.parse_schedule(graph.root, graph) signal(SIGTERM, lambda signum, frame: exit(0)) print_message("Initializing network emulation...") manager = EmulationCore(graph, scheduler) manager.initialize() print_identified(graph, "Waiting for command to start experiment") sys.stdout.flush() sys.stderr.flush() if getenv('RUNTIME_EMULATION', 'true') != 'false': # Enter the emulation loop manager.emulation_loop()
def print_networks(self): network = self.graph.links[0].network for link in self.graph.links: if link.network != network: print_and_fail( "Multiple network support is not yet implemented!") print("networks:") print(" KollapsNet:") print(" external:") print(" name: " + network) print(" outside:") print(" driver: overlay") print("")
def calculate_shortest_paths(self): # start = time() # Dijkstra's shortest path implementation # Distance is number of hops if self.root is None: print_and_fail("Root of the tree has not been defined.") inf = float("inf") dist = {} Q = [] for service in self.services: hosts = self.services[service] for host in hosts: distance = 0 if host != self.root: distance = inf entry = [distance, host] Q.append(entry) dist[host] = distance for bridge in self.bridges: b = self.bridges[bridge][0] Q.append([inf, b]) dist[b] = inf self.paths[self.root] = NetGraph.Path([], self.path_counter) self.paths_by_id[self.path_counter] = self.paths[self.root] self.path_counter += 1 while len(Q) > 0: Q.sort(key=lambda ls: ls[0]) u = Q.pop(0)[1] # type: NetGraph.Node for link in u.links: alt = dist[u] + 1 if link.destination in dist: # if destination is a bridge, it could have been removed if alt < dist[link.destination]: node = link.destination dist[node] = alt # append to the previous path path = self.paths[u].links[:] path.append(link) self.paths[node] = NetGraph.Path(path, self.path_counter) self.paths_by_id[self.path_counter] = self.paths[node] self.path_counter += 1 for e in Q: # find the node in Q and change its priority if e[1] == node: e[0] = alt
def __init__(self, source, destination, latency, jitter, drop, bandwidth, bps, network): self.lock = Lock() self.index = 0 self.source = source # type: NetGraph.Node self.destination = destination # type: NetGraph.Node try: self.latency = float(latency) self.drop = float(drop) self.jitter = float(jitter) except: print_and_fail("Provided link data is not valid: " + latency + "ms " + drop + "drop rate " + bandwidth) self.bandwidth = bandwidth # type: str self.bandwidth_bps = bps # type: int self.flows = [] # type: List[Tuple[int, int]] # (RTT, Bandwidth) self.last_flows_count = 0 self.network = network
def parse_services(self, experiment, services): for service in services: if service.tag != 'service': print_and_fail('Invalid tag inside <services>: ' + service.tag) if 'name' not in service.attrib or 'image' not in service.attrib: print_and_fail( 'A service needs a name and an image attribute.') if not service.attrib['name'] or not service.attrib['image']: print_and_fail( 'A service needs a name and an image attribute.') command = None if 'command' in service.attrib: command = service.attrib['command'] shared = False if 'share' in service.attrib: shared = (service.attrib['share'] == "true") supervisor = False supervisor_port = 0 if 'supervisor' in service.attrib: supervisor = True if 'port' in service.attrib: supervisor_port = int(service.attrib['port']) reuse = True if 'reuse' in service.attrib: reuse = (service.attrib['reuse'] == "true") replicas = 1 if 'replicas' in service.attrib: try: replicas = int(service.attrib['replicas']) except: print_and_fail( 'replicas attribute must be a valid integer.') replicas = self.calulate_required_replicas(service.attrib['name'], replicas, experiment, reuse) for i in range(replicas): srv = self.graph.new_service(service.attrib['name'], service.attrib['image'], command, shared, reuse, replicas) if supervisor: self.supervisors.append(srv) srv.supervisor_port = supervisor_port srv.supervisor = True
def generate(self, pool_period, max_flow_age, threading_mode, shm_size, aeron_lib_path, aeron_term_buffer_length, aeron_ipc_term_buffer_length, bw_emulation=True): number_of_gods = 0 try: if os.getenv('KUBERNETES_SERVICE_HOST'): config.load_incluster_config() else: config.load_kube_config() number_of_gods = len(client.CoreV1Api().list_node().to_dict()["items"]) except Exception as e: print_and_fail(e) self.print_roles() print("---") self.print_bootstrapper(number_of_gods, pool_period, max_flow_age, threading_mode, shm_size, aeron_lib_path, aeron_term_buffer_length, aeron_ipc_term_buffer_length, bw_emulation) print("---") for service in self.graph.services: self.print_service(self.graph.services[service]) print("---") self.print_topology()
def parse_bridges(self, root): for bridge in root: if bridge.tag != 'bridge': print_and_fail('Invalid tag inside <bridges>: ' + bridge.tag) if 'name' not in bridge.attrib: print_and_fail('A bridge needs to have a name.') if not bridge.attrib['name']: print_and_fail('A bridge needs to have a name.') self.graph.new_bridge(bridge.attrib['name'])
def main(): try: if len(sys.argv) < 3: msg = "If you are calling " + sys.argv[ 0] + " from your workstation stop." msg += "This should only be used inside containers." sleep(20) print_and_fail(msg) mode = sys.argv[1] label = sys.argv[2] bootstrapper_id = sys.argv[3] if len(sys.argv) > 3 else None bootstrapper = None orchestrator = os.getenv('KOLLAPS_ORCHESTRATOR', 'swarm') if orchestrator == 'kubernetes': bootstrapper = KubernetesBootstrapper() elif orchestrator == 'swarm': bootstrapper = SwarmBootstrapper() # insert here any other bootstrappping class required by new orchestrators else: print_named( "bootstrapper", "Unrecognized orchestrator. Using default: Docker Swarm.") bootstrapper = SwarmBootstrapper() bootstrapper.bootstrap(mode, label, bootstrapper_id) except Exception as e: sys.stdout.flush() print_error(e) sleep(20)
def __init__(self, flow_collector, graph, event_scheduler, ip=None): self.graph = graph # type: NetGraph self.scheduler = event_scheduler # type: EventScheduler self.flow_collector = flow_collector self.produced = 0 self.received = 0 self.consumed = 0 self.largest_produced_gap = -1 self.stop_lock = Lock() self.aeron_lib = None self.aeron_id = None self.local_ips = {} self.remote_ips = {} link_count = len(self.graph.links) if link_count <= BYTE_LIMIT: self.link_unit = "1B" elif link_count <= SHORT_LIMIT: self.link_unit = "1H" else: print_and_fail("Topology has too many links: " + str(link_count)) self.link_size = struct.calcsize("<" + self.link_unit) self.supervisor_count = 0 self.peer_count = 0 if ip is None: self.aeron_id = self.graph.root.ip else: self.aeron_id = ip2int(ip) # self.aeron_id = ip2int(socket.gethostbyname(socket.gethostname())) for service in self.graph.services: hosts = self.graph.services[service] for host in hosts: if host != self.graph.root: self.peer_count += 1 if host.supervisor: self.supervisor_count += 1 self.peer_count -= self.supervisor_count # setup python callback self.aeron_lib = ctypes.CDLL(AERON_LIB_PATH) if link_count <= BYTE_LIMIT: self.aeron_lib.init(self.aeron_id, False) self.flow_adding_func = self.aeron_lib.addFlow8 else: self.aeron_lib.init(self.aeron_id, True) self.flow_adding_func = self.aeron_lib.addFlow16 CALLBACKTYPE = CFUNCTYPE(c_voidp, c_ulong, c_uint, POINTER(c_uint)) c_callback = CALLBACKTYPE(self.receive_flow) self.callback = c_callback # keep reference so it does not get garbage collected self.aeron_lib.registerCallback(self.callback) self.dashboard_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.dashboard_socket.bind(('0.0.0.0', CommunicationsManager.TCP_PORT)) self.dashboard_thread = Thread(target=self.receive_dashboard_commands) self.dashboard_thread.daemon = True self.dashboard_thread.start() # TODO PG run through this again, rename variables to match new god logs functionality my_starting_links = [] for key, path in self.graph.paths_by_id.items(): if len(path.links ) > 0 and path.links[0].index not in my_starting_links: my_starting_links.append(path.links[0].index) with open(LOCAL_IPS_FILE, 'r') as file: for line in file.readlines(): self.aeron_lib.addLocalSubs( int(line), len(my_starting_links), (c_uint * len(my_starting_links))(*my_starting_links)) with open(REMOTE_IPS_FILE, 'r') as file: for line in file.readlines(): self.aeron_lib.addRemoteSubs(int(line)) self.aeron_lib.startPolling()
def parse_schedule(self, service, graph): """ :param service: NetGraph.Service :return: """ XMLtree = ET.parse(self.file) root = XMLtree.getroot() if root.tag != 'experiment': print_and_fail( 'Not a valid Kollaps topology file, root is not <experiment>') dynamic = None for child in root: if child.tag == 'dynamic': if dynamic is not None: print_and_fail("Only one <dynamic> block is allowed.") dynamic = child scheduler = EventScheduler() first_join = -1.0 first_leave = float('inf') # if there is no dynamic block than this instance joins straight away if dynamic is None: scheduler.schedule_join(0.0) return scheduler seed(12345) replicas = [] for i in range(service.replica_count): replicas.append( [False, False, False]) # Joined = False, Disconnected = False, Used = False # indexes for replicas entries JOINED = 0 DISCONNECTED = 1 USED = 2 # there is a dynamic block, so check if there is anything scheduled for us for event in dynamic: if event.tag != 'schedule': print_and_fail("Only <schedule> is allowed inside <dynamic>") # parse time of event time = 0.0 try: time = float(event.attrib['time']) if time < 0.0: print_and_fail("time attribute must be a positive number") except ValueError as e: print_and_fail("time attribute must be a valid real number") if 'name' in event.attrib and 'time' in event.attrib and 'action' in event.attrib: node_name = event.attrib['name'] bridge_names = [] for bridge in list(graph.bridges.keys()) + list( graph.removed_bridges.keys()): bridge_names.append(bridge) # if a bridge is scheduled if node_name in bridge_names: if event.attrib['action'] == 'join': scheduler.schedule_bridge_join(time, graph, node_name) elif event.attrib['action'] == 'leave': scheduler.schedule_bridge_leave(time, graph, node_name) continue # parse name of service. only process actions that target us if node_name != service.name: continue # parse amount of replicas affected amount = 1 if 'amount' in event.attrib: amount = int(event.attrib['amount']) # parse action if event.attrib['action'] == 'join': for i in range(amount): available = False id = 0 # Pick a random replica while (not available): id = randrange(0, service.replica_count) available = not replicas[id][JOINED] if not service.reuse_ip: available = available and not replicas[id][USED] # Mark the state replicas[id][JOINED] = True if not service.reuse_ip: replicas[id][USED] = True # if its us, schedule the action if service.replica_id == id: scheduler.schedule_join(time) print_message(service.name + " replica " + str(service.replica_id) + " scheduled to join at " + str(time)) if first_join < 0.0: first_join = time elif event.attrib['action'] == 'leave' or event.attrib[ 'action'] == 'crash': for i in range(amount): up = False id = 0 # Pick a random replica while (not up): id = randrange(0, service.replica_count) up = replicas[id][JOINED] # Mark the state replicas[id][JOINED] = False # if its us, schedule the action if service.replica_id == id: if event.attrib['action'] == 'leave': scheduler.schedule_leave(time) print_message(service.name + " replica " + str(service.replica_id) + " scheduled to leave at " + str(time)) elif event.attrib['action'] == 'crash': scheduler.schedule_crash(time) print_message(service.name + " replica " + str(service.replica_id) + " scheduled to crash at " + str(time)) if first_leave > time: first_leave = time elif event.attrib['action'] == 'reconnect': for i in range(amount): disconnected = False id = 0 # Pick a random replica while (not disconnected): id = randrange(0, service.replica_count) disconnected = replicas[id][DISCONNECTED] # Mark the state replicas[id][DISCONNECTED] = False # if its us, schedule the action if service.replica_id == id: print_message(service.name + " replica " + str(service.replica_id) + " scheduled to reconnect at " + str(time)) scheduler.schedule_reconnect(time) elif event.attrib['action'] == 'disconnect': for i in range(amount): connected = False id = 0 # Pick a random replica while (not connected): id = randrange(0, service.replica_count) connected = replicas[id][ JOINED] and not replicas[id][DISCONNECTED] # Mark the state replicas[id][DISCONNECTED] = True # if its us, schedule the action if service.replica_id == id: print_message(service.name + " replica " + str(service.replica_id) + " scheduled to disconnect at " + str(time)) scheduler.schedule_disconnect(time) else: print_and_fail( "Unrecognized action: " + event.attrib['action'] + " , allowed actions are join, leave, crash, disconnect, reconnect" ) #Do something dynamically with a link elif 'origin' in event.attrib and 'dest' in event.attrib and 'time' in event.attrib: #parse origin and destination origin = event.attrib['origin'] destination = event.attrib['dest'] if 'action' in event.attrib: #link is joining or leaving if event.attrib['action'] == 'leave': scheduler.schedule_link_leave(time, graph, origin, destination) elif event.attrib['action'] == 'join': #Link is already defined but has been removed before if not 'upload' in event.attrib or not 'latency' in event.attrib: scheduler.schedule_link_join( time, graph, origin, destination) #A completely new link with defined properties joins elif not 'upload' in event.attrib and not 'latency' in event.attrib and not 'network' in event.attrib: print_and_fail( "Link description incomplete. For a new link, you must provide at least latency, upload, and network attributes." ) else: bandwidth = event.attrib['upload'] latency = float(event.attrib['latency']) drop = 0 if 'drop' in event.attrib: drop = float(event.attrib['drop']) jitter = 0 if 'jitter' in event.attrib: jitter = float(event.attrib['jitter']) network = event.attrib['network'] scheduler.schedule_new_link( time, graph, origin, destination, latency, jitter, drop, bandwidth, network) if 'download' in event.attrib: bandwidth = event.attrib['download'] scheduler.schedule_new_link( time, graph, destination, origin, latency, jitter, drop, bandwidth, network) else: print_and_fail("Unrecognized action for link: " + event.attrib['action'] + ", allowed are join and leave") else: #properties of link are changing bandwidth = -1 if 'upload' in event.attrib: bandwidth = graph.bandwidth_in_bps( event.attrib['upload']) latency = -1 if 'latency' in event.attrib: latency = float(event.attrib['latency']) drop = -1 if 'drop' in event.attrib: drop = float(event.attrib['drop']) jitter = -1 if 'jitter' in event.attrib: jitter = float(event.attrib['jitter']) scheduler.schedule_link_change(time, graph, origin, destination, bandwidth, latency, jitter, drop) else: print_and_fail( '<schedule> must have either name, time and action attributes,' + ' or link origin dest and properties attributes') # deal with auto join if first_join < 0.0: print_message(service.name + " scheduled to join at " + str(0.0)) scheduler.schedule_join(0.0) if first_leave < first_join: print_and_fail("Dynamic: service " + service.name + " leaves before having joined") scheduler.schedule_graph_changes() return scheduler
def fill_graph(self): XMLtree = ET.parse(self.file) root = XMLtree.getroot() if root.tag != 'experiment': print_and_fail( 'Not a valid Kollaps topology file, root is not <experiment>') if 'boot' not in root.attrib: print_and_fail( '<experiment boot="?"> The experiment needs a valid boostrapper image name' ) self.graph.bootstrapper = root.attrib['boot'] services = None bridges = None links = None for child in root: if child.tag == 'services': if services is not None: print_and_fail("Only one <services> block is allowed.") services = child elif child.tag == 'bridges': if bridges is not None: print_and_fail("Only one <bridges> block is allowed.") bridges = child elif child.tag == 'links': if links is not None: print_and_fail("Only one <links> block is allowed.") links = child elif child.tag == 'dynamic': pass else: print_and_fail('Unknown tag: ' + child.tag) # Links must be parsed last if services is None: print_and_fail("No services declared in topology description") self.parse_services(root, services) if bridges is not None: self.parse_bridges(bridges) if links is None: print_and_fail("No links declared in topology descritpion") self.parse_links(links) for service in self.supervisors: self.graph.set_supervisor(service)
def calulate_required_replicas(self, service, hardcoded_count, root, reuse): dynamic = None for child in root: if child.tag == 'dynamic': if dynamic is not None: print_and_fail("Only one <dynamic> block is allowed.") dynamic = child if dynamic is None: return hardcoded_count # first we collect the join/leave/crash/disconnect/reconnect events # so we can later sort them and calculate the required replicas events = [] # type: List[Tuple[float, int, int]] JOIN = 1 LEAVE = 2 CRASH = 3 DISCONNECT = 4 RECONNECT = 5 TIME = 0 AMMOUNT = 1 TYPE = 2 has_joins = False for event in dynamic: if event.tag != 'schedule': print_and_fail("Only <schedule> is allowed inside <dynamic>") if 'name' in event.attrib and 'time' in event.attrib and 'action' in event.attrib: # parse name of service if event.attrib['name'] != service: continue # parse time of event time = 0.0 try: time = float(event.attrib['time']) if time < 0.0: print_and_fail( "time attribute must be a positive number") except ValueError as e: print_and_fail( "time attribute must be a valid real number") # parse amount amount = 1 if 'amount' in event.attrib: try: amount = int(event.attrib['amount']) if amount < 1: print_and_fail( "amount attribute must be an integer >= 1") except ValueError as e: print_and_fail( "amount attribute must be an integer >= 1") # parse action if event.attrib['action'] == 'join': has_joins = True events.append((time, amount, JOIN)) elif event.attrib['action'] == 'leave': events.append((time, amount, LEAVE)) elif event.attrib['action'] == 'crash': events.append((time, amount, CRASH)) elif event.attrib['action'] == 'disconnect': events.append((time, amount, DISCONNECT)) elif event.attrib['action'] == 'reconnect': events.append((time, amount, RECONNECT)) if not has_joins: return hardcoded_count events.sort(key=lambda event: event[TIME]) max_replicas = 0 cummulative_replicas = 0 disconnected = 0 # Calculate required replicas (and perform semantic checking) current_replicas = 0 for event in events: if event[TYPE] == JOIN: current_replicas += event[AMMOUNT] cummulative_replicas += event[AMMOUNT] elif event[TYPE] == LEAVE or event[TYPE] == CRASH: current_replicas -= event[AMMOUNT] elif event[TYPE] == DISCONNECT: disconnected += event[AMMOUNT] if event[AMMOUNT] > current_replicas: print_and_fail( "Dynamic section for " + service + " disconnects more replicas than are joined at second " + str(event[TIME])) elif event[TYPE] == RECONNECT: disconnected -= event[AMMOUNT] if event[AMMOUNT] > disconnected: print_and_fail( "Dynamic section for " + service + " reconnects more replicas than are disconnected at second " + str(event[TIME])) if current_replicas < 0: print_and_fail( "Dynamic section for " + service + " causes a negative number of replicas at second " + str(event[TIME])) if current_replicas > max_replicas: max_replicas = current_replicas if reuse: return max_replicas else: return cummulative_replicas
def parse_links(self, root): for link in root: if link.tag != 'link': print_and_fail('Invalid tag inside <links>: ' + link.tag) if 'origin' not in link.attrib or 'dest' not in link.attrib or 'latency' not in link.attrib or \ 'upload' not in link.attrib or 'network' not in link.attrib: print_and_fail("Incomplete link description.") source_nodes = self.graph.get_nodes(link.attrib['origin']) destination_nodes = self.graph.get_nodes(link.attrib['dest']) jitter = 0 if 'jitter' in link.attrib: jitter = link.attrib['jitter'] drop = 0 if 'drop' in link.attrib: drop = link.attrib['drop'] bidirectional = ('download' in link.attrib) both_shared = (source_nodes[0].shared_link and destination_nodes[0].shared_link) if both_shared: src_meta_bridge = self.create_meta_bridge() dst_meta_bridge = self.create_meta_bridge() # create a link between both meta bridges self.graph.new_link(src_meta_bridge, dst_meta_bridge, link.attrib['latency'], jitter, drop, link.attrib['upload'], link.attrib['network']) if bidirectional: self.graph.new_link(dst_meta_bridge, src_meta_bridge, link.attrib['latency'], jitter, drop, link.attrib['download'], link.attrib['network']) # connect source to src meta bridge self.graph.new_link(link.attrib['origin'], src_meta_bridge, 0, 0, 0.0, link.attrib['upload'], link.attrib['network']) if bidirectional: self.graph.new_link(src_meta_bridge, link.attrib['origin'], 0, 0, 0.0, link.attrib['download'], link.attrib['network']) # connect destination to dst meta bridge self.graph.new_link(dst_meta_bridge, link.attrib['dest'], 0, 0, 0.0, link.attrib['upload'], link.attrib['network']) if bidirectional: self.graph.new_link(link.attrib['dest'], dst_meta_bridge, 0, 0, 0.0, link.attrib['download'], link.attrib['network']) elif source_nodes[0].shared_link: meta_bridge = self.create_meta_bridge() # create a link between meta bridge and destination self.graph.new_link(meta_bridge, link.attrib['dest'], link.attrib['latency'], jitter, drop, link.attrib['upload'], link.attrib['network']) if bidirectional: self.graph.new_link(link.attrib['dest'], meta_bridge, link.attrib['latency'], jitter, drop, link.attrib['download'], link.attrib['network']) # connect origin to meta bridge self.graph.new_link(link.attrib['origin'], meta_bridge, 0, 0, 0.0, link.attrib['upload'], link.attrib['network']) if bidirectional: self.graph.new_link(meta_bridge, link.attrib['origin'], 0, 0, 0.0, link.attrib['download'], link.attrib['network']) elif destination_nodes[0].shared_link: meta_bridge = self.create_meta_bridge() # create a link between origin and meta_bridge self.graph.new_link(link.attrib['origin'], meta_bridge, link.attrib['latency'], jitter, drop, link.attrib['upload'], link.attrib['network']) if bidirectional: self.graph.new_link(meta_bridge, link.attrib['origin'], link.attrib['latency'], jitter, drop, link.attrib['download'], link.attrib['network']) # connect meta bridge to destination self.graph.new_link(meta_bridge, link.attrib['dest'], 0, 0, 0.0, link.attrib['upload'], link.attrib['network']) if bidirectional: self.graph.new_link(link.attrib['dest'], meta_bridge, 0, 0, 0.0, link.attrib['download'], link.attrib['network']) else: # Regular case create a link between origin and destination self.graph.new_link(link.attrib['origin'], link.attrib['dest'], link.attrib['latency'], jitter, drop, link.attrib['upload'], link.attrib['network']) if bidirectional: self.graph.new_link(link.attrib['dest'], link.attrib['origin'], link.attrib['latency'], jitter, drop, link.attrib['download'], link.attrib['network'])
def main(): gc.set_debug(gc.DEBUG_STATS) setup_mocking() topology_file = sys.argv[1] graph = NetGraph() parser = XMLGraphParser(topology_file, graph) parser.fill_graph() print("Done parsing topology") #__debug_print_paths(graph) #return print("Skipping Resolving hostnames...") #graph.resolve_hostnames() #print("All hosts found!") seed(None) print("Randomly Determining the root of the tree...") sv = randrange(0, len(graph.services)) while True: hosts = list(graph.services.values())[sv] h = randrange(0, len(hosts)) root = list(graph.services.values())[sv][h] if root.supervisor: sv = randrange(0, len(graph.services)) continue else: graph.root = root break ''' for service in graph.services: graph.root = graph.services[service][0] if graph.root.supervisor: continue break ''' if graph.root is None: print_and_fail( "Failed to identify current service instance in topology!") print("Calculating shortest paths...") graph.calculate_shortest_paths() for node in graph.paths: path = graph.paths[node] print("##############################") print(graph.root.name + " -> " + node.name + ":" + str(node.__hash__())) print("latency: " + str(path.latency)) print("drop: " + str(path.drop)) print("bandwidth: " + str(path.max_bandwidth)) print("------------------------------") for link in path.links: print(" " + link.source.name + " hop " + link.destination.name + " i:" + str(link.index)) print("Initializing network emulation conditions...") scheduler = parser.parse_schedule(graph.root, graph) manager = EmulationManager(graph, scheduler) manager.initialize() print("Starting experiment!") # Enter the emulation loop manager.emulation_loop()
def resolve_ips(self, number_of_gods): try: own_ip = "(not yet known)" own_ip_int = ip2int("127.0.0.1") if number_of_gods > 0: print_named( "god", "ip: " + str(own_ip) + ", nr. of gods: " + str(number_of_gods)) else: print_and_fail('there are no nodes on this "cluster".') # listen for msgs from other gods recv_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) recv_sock.bind(('', GOD_IPS_SHARE_PORT)) # setup broadcast sender_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sender_sock.bind(('', GOD_IPS_SHARE_PORT + 1)) sender_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) sender_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sender_sock.setblocking(False) # broadcast local IPs random_number = random.getrandbits(128) ip_broadcast = Process(target=self.broadcast_ips, args=( sender_sock, random_number, )) ip_broadcast.start() while len(self.gods) < number_of_gods: data, addr = recv_sock.recvfrom(self.BUFFER_LEN) msg = data.decode("utf-8").split() print_named("god1", f"{addr[0]} :: {msg}") ip_as_int = ip2int(addr[0]) if msg[0] == "READY" and ip_as_int not in self.ready_gods: self.ready_gods.append(ip_as_int) elif msg[0] == "HELLO" and ip_as_int not in self.gods: self.gods[ip_as_int] = msg[1] # broadcast ready msgs ready_broadcast = Process(target=self.broadcast_ready, args=(sender_sock, )) ready_broadcast.start() while len(self.ready_gods) < number_of_gods: data, addr = recv_sock.recvfrom(self.BUFFER_LEN) msg = data.decode("utf-8").split() print_named("god2", f"{addr[0]} :: {msg[0]}") ipAsInt = ip2int(addr[0]) if msg[0] == "READY" and ipAsInt not in self.ready_gods: self.ready_gods.append(ipAsInt) # terminate all broadcasts ip_broadcast.terminate() ready_broadcast.terminate() ip_broadcast.join() ready_broadcast.join() # find owr own IP by matching our random_number # and delete ourselves from the list of other gods for key, value in self.gods.items(): if str(random_number) == value: own_ip_int = key own_ip = int2ip(own_ip_int) del self.gods[own_ip_int] break print_named( "god", "ip: " + own_ip + ", nr. of gods: " + str(number_of_gods)) # write all known IPs to a file to be read from c++ lib if necessary with open(LOCAL_IPS_FILE, 'a') as locals_file: locals_file.write(str(own_ip_int)) with open(REMOTE_IPS_FILE, 'a') as remotes_file: for god in self.gods: remotes_file.write(str(god) + "\n") known_ips = "" with open(LOCAL_IPS_FILE, 'r') as file: known_ips += "local IP: " for line in file.readlines(): known_ips += int2ip(int(line.strip())) + ", " known_ips += "\n " with open(REMOTE_IPS_FILE, 'r') as file: known_ips += "remote IPs: " for line in file.readlines(): known_ips += int2ip(int(line.strip())) + ", " print_named("god", known_ips) return self.gods except Exception as e: print_and_fail(e)
def main(): if not (len(sys.argv) == 3 or len(sys.argv) == 4): msg = "Usage: deploymentGenerator.py <input topology> <orchestrator> > <output compose file>\n" \ + " <orchestrator> can be -s for Docker Swarm or -k for Kubernetes" \ + " optionally use -d to deactivate bandwidth emulation at runtime." print_and_fail(msg) shm_size = 8000000000 aeron_lib_path = "/home/daedalus/Documents/aeron4need/cppbuild/Release/lib/libaeronlib.so" aeron_term_buffer_length = 64 * 1024 * 1024 # must be multiple of 64*1024 aeron_ipc_term_buffer_length = 64 * 1024 * 1024 # must be multiple of 64*1024 threading_mode = 'SHARED' # aeron uses 1 thread # threading_mode = 'SHARED_NETWORK' # aeron uses 2 threads # threading_mode = 'DEDICATED' # aeron uses 3 threads pool_period = 0.05 max_flow_age = 2 output = "" # TODO use argparse to check for flags and arguments properly topology_file = sys.argv[1] orchestrator = "kubernetes" if sys.argv[2] == "-k" else "swarm" bw_emulation = False if (len(sys.argv) > 3 and sys.argv[3] == "-d") else True graph = NetGraph() XMLGraphParser(topology_file, graph).fill_graph() output += "Graph has " + str(len(graph.links)) + " links.\n" service_count = 0 for hosts in graph.services: for host in graph.services[hosts]: service_count += 1 output += " has " + str(service_count) + " hosts.\n" if len(graph.links) > SHORT_LIMIT: print_and_fail("Topology has too many links: " + str(len(graph.links))) for path in graph.paths: if len(path.links) > 249: msg = "Path from " + path.links[0].source.name + " to " \ + path.links[-1].destination.name + " is too long (over 249 hops)" print_and_fail(msg) generator = None if orchestrator == "kubernetes": generator = KubernetesManifestGenerator( os.getcwd() + "/" + topology_file, graph) elif orchestrator == 'swarm': generator = DockerComposeFileGenerator(topology_file, graph) # insert here any other generators required by new orchestrators else: pass if generator is not None: generator.generate(pool_period, max_flow_age, threading_mode, shm_size, aeron_lib_path, aeron_term_buffer_length, aeron_ipc_term_buffer_length, bw_emulation) output += "Experiment UUID: " + generator.experiment_UUID print(output, file=sys.stderr) else: print("Failed to find a suitable generator.", file=sys.stderr)