def make_cloudlab_topology(config, headless=False): agenda.section("Setup Cloudlab topology") agenda.subtask(f"headless: {headless}") driver = init_driver( config['topology']['cloudlab']['username'], config['topology']['cloudlab']['password'], headless=headless, ) machines = check_existing_experiment(driver) if machines is None: machines = launch(driver) machines = [ cloudlab_conn_rgx.match(m).groupdict() for m in machines if 'cloudlab.us' in m ] for (name, host_id) in zip(['sender', 'inbox', 'outbox', 'receiver'], [0, 1, 2, 2]): overrides = config['topology'][name] config['topology'][name] = machines[host_id] for (k, v) in overrides.items(): config['topology'][name][k] = v return config
def enumerate_experiments(config): agenda.section("Starting experiments") exp_args = config['experiment'] axes = list(exp_args.values()) ps = list(itertools.product(*axes)) exps = [dict(zip(exp_args.keys(), p)) for p in ps] Experiment = namedtuple("Experiment", exp_args.keys()) exps = [Experiment(**x) for x in flatten(exps, 'alg')] def skip_condition(_exp): return \ (_exp.alg['name'] == 'nimbus' and _exp.sch == 'fifo') or\ (_exp.alg['name'] == 'nobundler' and _exp.sch != 'fifo') #filtered_exps = [e for e in exps if not skip_condition(e)] random.shuffle(exps) return exps
def init_repo(config, machines): agenda.section("Init nodes") root = config['structure']['bundler_root'] clone = f'git clone --recurse-submodules https://github.com/bundler-project/evaluation {root}' for m in machines: if m == 'self': continue agenda.task(f"init {m}: {machines[m].addr}") agenda.subtask("cloning eval repo") if not machines[m].file_exists(root): res = machines[m].run(clone) else: # previously cloned, update to latest commit machines[m].run(f"cd {root} && git pull origin cloudlab") machines[m].run( f"cd {root} && git submodule update --init --recursive") agenda.subtask("compiling experiment tools") machines[m].run( f"make -C {root}", stdout=f"{config['structure']['bundler_root']}/{m}.out.mk", stderr=f"{config['structure']['bundler_root']}/{m}.err.mk")
def get_interfaces(config, machines): agenda.section("Get node interfaces") for m in machines: if m == 'self' or 'ifaces' in config['topology'][m]: agenda.subtask( f"{machines[m].addr}: {config['topology'][m]['ifaces']}") continue agenda.task(machines[m].addr) conn = machines[m] ifaces_raw = conn.run("ip -4 -o addr").stdout.strip().split("\n") ifaces = [ip_addr_rgx.match(i) for i in ifaces_raw] ifaces = [ i.groupdict() for i in ifaces if i is not None and i["dev"] != "lo" ] if len(ifaces) == 0: raise Exception( f"Could not find ifaces on {conn.addr}: {ifaces_raw}") config['topology'][m]['ifaces'] = ifaces agenda.subtask( f"{machines[m].addr}: {config['topology'][m]['ifaces']}") return config
warn("Stopping interactive mode", exit=False) for _name, m in machines.items(): m.interact = False m.verbose = False ################################################################################################### # Setup ################################################################################################### if __name__ == "__main__": args = parser.parse_args() if args.interact: warn("Running in interactive mode. Each command is printed before it's run.\nPress any key to continue executing the command or control-c to stop.", exit=False) agenda.section("Read config") config = read_config(args) config['args'] = args if config['args'].verbose and config['args'].verbose >= 2: logging.basicConfig(level=logging.DEBUG) if 'cloudlab' in config['topology']: config = make_cloudlab_topology(config, headless=args.headless) topo = MahimahiTopo(config) topo.setup_routing(config) machines = topo.machines conns = topo.conns disable_tcp_offloads(config, machines)
#!/usr/bin/env python3 import agenda agenda.section("Set up network") agenda.task("Create Virtual Private Cloud") agenda.task("Attach internet gateway") agenda.task("Allocate subnet #1") agenda.subtask("Hook in internet-enabled route table") agenda.task("Allocate subnet #2") agenda.task("Generate VPC key-pair") agenda.subfailure("Could not create key-pair") agenda.subtask("Attempting to delete old key-pair") agenda.subtask("Attempting to generate new key-pair") agenda.section("Launch instances") agenda.task("Launch instances in cluster #1") agenda.task("Launch instances in cluster #2") agenda.task("Wait for HQ to start running") agenda.subtask("Still in 'pending' state") agenda.subtask("Still in 'pending' state") agenda.task("Wait for workers to reach 'running' state") agenda.task("Wait for HQ to become pingable") print("54.84.179.156 | UNREACHABLE!") print("54.84.179.156 | UNREACHABLE!") print('54.84.179.156 | SUCCESS => {"changed": false, "ping": "pong"}') agenda.task("Wait for workers to become pingable") print('10.0.1.237 | SUCCESS => {"changed": false, "ping": "pong"}') agenda.section("Deploy application") print("""\ PLAY [ansible-playbook]
def main(argv=None): """The main entry-point to salvo.""" if argv is None: argv = sys.argv[1:] parser = argparse.ArgumentParser(description='Provision a new salvo.') parser.add_argument('config', type=argparse.FileType('r'), help='salvo configuration file to run') parser.add_argument('--playbook', '-p', type=argparse.FileType('r'), default='./deploy/playbook.yml', help='directory where playbooks reside') parser.add_argument('--wait', '-w', default=False, action='store_true', help='wait for [Enter] before cleaning up') parser.add_argument('--deployment', '-d', type=str, default='salvo', help='deployment name for this salvo') parser.add_argument('--set', '-s', nargs='*', type=str, help='key:value pair to set for this salvo execution') parser.add_argument('--dry-run', '-n', action='store_true', default=False, help='only print what actions would be taken') args = parser.parse_args(argv) args.set = dict(item.split(":", maxsplit=1) for item in args.set) if args.set is not None else {} topology = Topology.load_file(args.config, args.set) hq = Cluster('hq', { 'expose': [22], }, {}) topology.clusters = [hq] + topology.clusters agenda.section("Set up network") client = boto3.client('ec2') ec2 = boto3.resource('ec2') # Set up VPC agenda.task("Create VPC") vpc = client.create_vpc(DryRun=args.dry_run, CidrBlock='10.0.0.0/16') vpc = ec2.Vpc(vpc['Vpc']['VpcId']) agenda.task("Attach VPC internet gateway") gateway = client.create_internet_gateway(DryRun=args.dry_run) gateway = ec2.InternetGateway( gateway['InternetGateway']['InternetGatewayId']) gateway.attach_to_vpc(DryRun=args.dry_run, VpcId=vpc.id) agenda.task("Create internet-enabled route table") iroutable = vpc.create_route_table(DryRun=args.dry_run) iroutable.create_route(DryRun=args.dry_run, DestinationCidrBlock='0.0.0.0/0', GatewayId=gateway.id) subnets = [] secs = [] for i, c in enumerate(topology.clusters): agenda.task("Allocate subnet #{}".format(i + 1)) subnet = vpc.create_subnet(DryRun=args.dry_run, CidrBlock='10.0.{}.0/24'.format(i)) if c.internet: agenda.subtask("Hook in internet-enable route table") iroutable.associate_with_subnet(DryRun=args.dry_run, SubnetId=subnet.id) # set up security croups agenda.subtask("Create network security group") sec = vpc.create_security_group( DryRun=args.dry_run, GroupName='{}-cluster-{}'.format(args.deployment, i + 1), Description='Ingress rules for cluster {}-{}'.format( args.deployment, c.name)) # allow all internal traffic sec.authorize_ingress(DryRun=args.dry_run, IpProtocol='tcp', FromPort=1, ToPort=65535, CidrIp='10.0.0.0/16') if c.expose is not False: for p in c.expose: agenda.subtask("Allow ingress traffic on port {}".format(p)) sec.authorize_ingress(DryRun=args.dry_run, IpProtocol='tcp', FromPort=p, ToPort=p, CidrIp='0.0.0.0/0') secs.append(sec) subnets.append(subnet) # Tag all our VPC resources agenda.task("Tag all VPC resources") ec2.create_tags(DryRun=args.dry_run, Resources=[ vpc.id, gateway.id, iroutable.id, ] + [sn.id for sn in subnets] + [sg.id for sg in secs], Tags=[{ 'Key': 'salvo', 'Value': args.deployment, }]) # Create access keys agenda.task("Generate VPC key pair") try: keys = client.create_key_pair(DryRun=args.dry_run, KeyName=args.deployment) except botocore.exceptions.ClientError: # Key probably already exists. Delete and re-create. agenda.subfailure("Could not create key pair") agenda.subtask("Attempting to delete old key pair") client.delete_key_pair(DryRun=args.dry_run, KeyName=args.deployment) agenda.subtask("Attempting to generate new key pair") keys = client.create_key_pair(DryRun=args.dry_run, KeyName=args.deployment) keymat = keys['KeyMaterial'] keys = ec2.KeyPair(keys['KeyName']) agenda.section("Launch instances") # Launch instances clusters = [] for i, c in enumerate(topology.clusters): nics = [{ "DeviceIndex": 0, "Groups": [secs[i].id], "SubnetId": subnets[i].id, "DeleteOnTermination": True, "AssociatePublicIpAddress": c.internet, }] agenda.task("Launching {} instances in cluster {}".format( c.attrs['count'], c.name)) clusters.append( list( map(lambda x: ec2.Instance(x), [ instance['InstanceId'] for instance in client.run_instances( DryRun=args.dry_run, KeyName=keys.name, NetworkInterfaces=nics, ImageId=c.attrs['image'], MinCount=c.attrs['count'], MaxCount=c.attrs['count'], InstanceType=c.attrs['itype'], InstanceInitiatedShutdownBehavior='terminate') ['Instances'] ]))) exit = 1 try: agenda.task("Wait for HQ to start running") hq = clusters[0][0] while hq.state['Name'] == 'pending': agenda.subtask("Still in 'pending' state") sleep(3) hq.load() if hq.state['Name'] != 'running': agenda.failure(hq.state_reason['Message']) raise ChildProcessError(hq.state_reason['Message']) def prepare(ci, instance): global hq print("instance {} in {} now available through {}", instance.private_ip_address, topology.clusters[ci].name, hq.public_ip_address) agenda.task("Wait for workers to reach 'running' state") done = [] p = Pool(5) pending = True while pending: pending = False for i, cluster in enumerate(clusters): for ii, instance in enumerate(cluster): if instance.state['Name'] == 'pending': agenda.subtask( "Instance {}.{} is still pending".format( i + 1, ii + 1)) pending = True instance.load() break elif instance.state['Name'] != 'running': agenda.subfailure("Instance {}.{} failed: {}".format( i + 1, ii + 1, instance.state_reason['Message'])) raise ChildProcessError( instance.state_reason['Message']) else: # State is now 'running' tag = (i, ii) if tag not in done: # State hasn't been 'running' before done.append(tag) p.apply_async(prepare, [i, instance]) if pending: break sleep(3) p.close() p.join() agenda.task("Wait for HQ to become pingable") # Wait for hq to be pingable deployment = Deployer(args.playbook.name, topology, keymat, clusters) while not deployment.test(hq.public_ip_address): sleep(1) agenda.task("Wait for workers to become pingable") # Wait for workers to be pingable for i, cluster in enumerate(clusters): for ii, instance in enumerate(cluster): while not deployment.test(instance.private_ip_address): sleep(1) # Deploy! agenda.section("Deploy application") exit = deployment.deploy() except: import traceback traceback.print_exc() finally: agenda.section("Clean up VPC") if args.wait: agenda.prompt("Press [Enter] when you are ready to clean") input() # Terminate instances and delete VPC resources agenda.task("Terminate all instances") instances = list(vpc.instances.all()) vpc.instances.terminate(DryRun=args.dry_run) still_running = True while still_running: still_running = False for i in instances: i.load() if i.state['Name'] != 'terminated': agenda.subtask("At least one instance still shutting down") still_running = True sleep(3) break agenda.task("Delete network resources") agenda.subtask("key pair") keys.delete(DryRun=args.dry_run) agenda.subtask("internet-enabled route associations") for r in iroutable.associations.all(): r.delete(DryRun=args.dry_run) agenda.subtask("internet-enabled route table") iroutable.delete(DryRun=args.dry_run) agenda.subtask("internet gateway") gateway.detach_from_vpc(DryRun=args.dry_run, VpcId=vpc.id) gateway.delete(DryRun=args.dry_run) agenda.subtask("subnets") try: for sn in subnets: sn.delete(DryRun=args.dry_run) except: agenda.subfailure("failed to delete subnet:") import traceback traceback.print_exc() agenda.subtask("security groups") for sg in secs: sg.delete() agenda.subtask("network interfaces") for i in vpc.network_interfaces.all(): i.delete(DryRun=args.dry_run) agenda.task("Delete the VPC") vpc.delete(DryRun=args.dry_run) return exit